From 388042f0d89aed671bb0c36ffdb87f6caa9be8a9 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 9 Sep 2024 20:39:26 -0700 Subject: [PATCH 01/13] [WIP] Reorganize Cython to separate C++ bindings and make Cython classes public --- python/rmm/CMakeLists.txt | 4 +- python/rmm/rmm/cpp/CMakeLists.txt | 35 + python/rmm/rmm/cpp/__init__.pxd | 13 + python/rmm/rmm/cpp/_torch_allocator.cpp | 64 + python/rmm/rmm/cpp/cpp_cuda_stream.pxd | 28 + python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd | 23 + python/rmm/rmm/cpp/cpp_cuda_stream_view.pxd | 32 + python/rmm/rmm/cpp/cpp_device_buffer.pxd | 58 + python/rmm/rmm/cpp/cpp_device_uvector.pxd | 39 + python/rmm/rmm/cpp/cpp_logger.pyx | 66 + python/rmm/rmm/cpp/cpp_memory_resource.pxd | 34 + python/rmm/rmm/cpp/cpp_memory_resource.pyx | 215 +++ .../rmm/rmm/cpp/cpp_per_device_resource.pxd | 36 + python/rmm/rmm/python/CMakeLists.txt | 28 + python/rmm/rmm/python/__init__.pxd | 13 + python/rmm/rmm/python/__init__.py | 15 + python/rmm/rmm/python/cuda_stream.pyx | 36 + python/rmm/rmm/python/device_buffer.pxd | 71 + python/rmm/rmm/python/device_buffer.pyx | 559 ++++++++ python/rmm/rmm/python/helper.pxd | 16 + python/rmm/rmm/python/helper.pyx | 78 ++ python/rmm/rmm/python/lib.pxd | 17 + python/rmm/rmm/python/lib.pyx | 13 + python/rmm/rmm/python/logger.pyx | 208 +++ python/rmm/rmm/python/memory_resource.pxd | 83 ++ python/rmm/rmm/python/memory_resource.pyx | 1170 +++++++++++++++++ 26 files changed, 2953 insertions(+), 1 deletion(-) create mode 100644 python/rmm/rmm/cpp/CMakeLists.txt create mode 100644 python/rmm/rmm/cpp/__init__.pxd create mode 100644 python/rmm/rmm/cpp/_torch_allocator.cpp create mode 100644 python/rmm/rmm/cpp/cpp_cuda_stream.pxd create mode 100644 python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd create mode 100644 python/rmm/rmm/cpp/cpp_cuda_stream_view.pxd create mode 100644 python/rmm/rmm/cpp/cpp_device_buffer.pxd create mode 100644 python/rmm/rmm/cpp/cpp_device_uvector.pxd create mode 100644 python/rmm/rmm/cpp/cpp_logger.pyx create mode 100644 python/rmm/rmm/cpp/cpp_memory_resource.pxd create mode 100644 python/rmm/rmm/cpp/cpp_memory_resource.pyx create mode 100644 python/rmm/rmm/cpp/cpp_per_device_resource.pxd create mode 100644 python/rmm/rmm/python/CMakeLists.txt create mode 100644 python/rmm/rmm/python/__init__.pxd create mode 100644 python/rmm/rmm/python/__init__.py create mode 100644 python/rmm/rmm/python/cuda_stream.pyx create mode 100644 python/rmm/rmm/python/device_buffer.pxd create mode 100644 python/rmm/rmm/python/device_buffer.pyx create mode 100644 python/rmm/rmm/python/helper.pxd create mode 100644 python/rmm/rmm/python/helper.pyx create mode 100644 python/rmm/rmm/python/lib.pxd create mode 100644 python/rmm/rmm/python/lib.pyx create mode 100644 python/rmm/rmm/python/logger.pyx create mode 100644 python/rmm/rmm/python/memory_resource.pxd create mode 100644 python/rmm/rmm/python/memory_resource.pyx diff --git a/python/rmm/CMakeLists.txt b/python/rmm/CMakeLists.txt index 6c2515102..4c4e603ef 100644 --- a/python/rmm/CMakeLists.txt +++ b/python/rmm/CMakeLists.txt @@ -30,4 +30,6 @@ rapids_cython_init() add_compile_definitions("SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}") add_subdirectory(rmm/_cuda) -add_subdirectory(rmm/_lib) +# add_subdirectory(rmm/_lib) +add_subdirectory(rmm/cpp) +add_subdirectory(rmm/python) diff --git a/python/rmm/rmm/cpp/CMakeLists.txt b/python/rmm/rmm/cpp/CMakeLists.txt new file mode 100644 index 000000000..8ac924df4 --- /dev/null +++ b/python/rmm/rmm/cpp/CMakeLists.txt @@ -0,0 +1,35 @@ +# ============================================================================= +# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +set(cython_sources cpp_logger.pyx cpp_memory_resource.pyx) +set(linked_libraries rmm::rmm) + +# Build all of the Cython targets +rapids_cython_create_modules(SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" + CXX) + +# mark all symbols in these Cython targets "hidden" by default, so they won't collide with symbols +# loaded from other DSOs +foreach(_cython_target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${_cython_target} PROPERTIES C_VISIBILITY_PRESET hidden + CXX_VISIBILITY_PRESET hidden) +endforeach() + +add_library(_torch_allocator SHARED _torch_allocator.cpp) +# Want the output to be called _torch_allocator.so +set_target_properties(_torch_allocator PROPERTIES PREFIX "" SUFFIX ".so") +target_link_libraries(_torch_allocator PRIVATE rmm::rmm) +cmake_path(RELATIVE_PATH CMAKE_CURRENT_SOURCE_DIR BASE_DIRECTORY "${PROJECT_SOURCE_DIR}" + OUTPUT_VARIABLE _torch_allocator_location) +install(TARGETS _torch_allocator DESTINATION "${_torch_allocator_location}") diff --git a/python/rmm/rmm/cpp/__init__.pxd b/python/rmm/rmm/cpp/__init__.pxd new file mode 100644 index 000000000..46753baa3 --- /dev/null +++ b/python/rmm/rmm/cpp/__init__.pxd @@ -0,0 +1,13 @@ +# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/rmm/rmm/cpp/_torch_allocator.cpp b/python/rmm/rmm/cpp/_torch_allocator.cpp new file mode 100644 index 000000000..bfe94c2d0 --- /dev/null +++ b/python/rmm/rmm/cpp/_torch_allocator.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +// These signatures must match those required by CUDAPluggableAllocator in +// github.com/pytorch/pytorch/blob/main/torch/csrc/cuda/CUDAPluggableAllocator.h +// Since the loading is done at runtime via dlopen, no error checking +// can be performed for mismatching signatures. + +/** + * @brief Allocate memory of at least \p size bytes. + * + * @throws rmm::bad_alloc When the requested allocation cannot be satisfied. + * + * @param size The number of bytes to allocate + * @param device The device whose memory resource one should use + * @param stream CUDA stream to perform allocation on + * @return Pointer to the newly allocated memory + */ +extern "C" void* allocate(std::size_t size, int device, void* stream) +{ + rmm::cuda_device_id const device_id{device}; + rmm::cuda_set_device_raii with_device{device_id}; + auto mr = rmm::mr::get_per_device_resource_ref(device_id); + return mr.allocate_async( + size, rmm::CUDA_ALLOCATION_ALIGNMENT, rmm::cuda_stream_view{static_cast(stream)}); +} + +/** + * @brief Deallocate memory pointed to by \p ptr. + * + * @param ptr Pointer to be deallocated + * @param size The number of bytes in the allocation + * @param device The device whose memory resource one should use + * @param stream CUDA stream to perform deallocation on + */ +extern "C" void deallocate(void* ptr, std::size_t size, int device, void* stream) +{ + rmm::cuda_device_id const device_id{device}; + rmm::cuda_set_device_raii with_device{device_id}; + auto mr = rmm::mr::get_per_device_resource_ref(device_id); + mr.deallocate_async(ptr, + size, + rmm::CUDA_ALLOCATION_ALIGNMENT, + rmm::cuda_stream_view{static_cast(stream)}); +} diff --git a/python/rmm/rmm/cpp/cpp_cuda_stream.pxd b/python/rmm/rmm/cpp/cpp_cuda_stream.pxd new file mode 100644 index 000000000..16b66ee2b --- /dev/null +++ b/python/rmm/rmm/cpp/cpp_cuda_stream.pxd @@ -0,0 +1,28 @@ +# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cuda.ccudart cimport cudaStream_t +from libcpp cimport bool + +from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view + + +cdef extern from "rmm/cuda_stream.hpp" namespace "rmm" nogil: + cdef cppclass cuda_stream: + cuda_stream() except + + bool is_valid() except + + cudaStream_t value() except + + cuda_stream_view view() except + + void synchronize() except + + void synchronize_no_throw() diff --git a/python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd b/python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd new file mode 100644 index 000000000..553b38514 --- /dev/null +++ b/python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd @@ -0,0 +1,23 @@ +# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view + + +cdef extern from "rmm/cuda_stream_pool.hpp" namespace "rmm" nogil: + cdef cppclass cuda_stream_pool: + cuda_stream_pool(size_t pool_size) + cuda_stream_view get_stream() + cuda_stream_view get_stream(size_t stream_id) except + + size_t get_pool_size() diff --git a/python/rmm/rmm/cpp/cpp_cuda_stream_view.pxd b/python/rmm/rmm/cpp/cpp_cuda_stream_view.pxd new file mode 100644 index 000000000..bf0d33c24 --- /dev/null +++ b/python/rmm/rmm/cpp/cpp_cuda_stream_view.pxd @@ -0,0 +1,32 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cuda.ccudart cimport cudaStream_t +from libcpp cimport bool + + +cdef extern from "rmm/cuda_stream_view.hpp" namespace "rmm" nogil: + cdef cppclass cuda_stream_view: + cuda_stream_view() + cuda_stream_view(cudaStream_t) + cudaStream_t value() + bool is_default() + bool is_per_thread_default() + void synchronize() except + + + cdef bool operator==(cuda_stream_view const, cuda_stream_view const) + + const cuda_stream_view cuda_stream_default + const cuda_stream_view cuda_stream_legacy + const cuda_stream_view cuda_stream_per_thread diff --git a/python/rmm/rmm/cpp/cpp_device_buffer.pxd b/python/rmm/rmm/cpp/cpp_device_buffer.pxd new file mode 100644 index 000000000..1aa7634cf --- /dev/null +++ b/python/rmm/rmm/cpp/cpp_device_buffer.pxd @@ -0,0 +1,58 @@ +# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view +from rmm.cpp.cpp_memory_resource cimport device_memory_resource + + +cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil: + cdef cppclass cuda_device_id: + ctypedef int value_type + cuda_device_id() + cuda_device_id(value_type id) + value_type value() + + cdef cuda_device_id get_current_cuda_device() + +cdef extern from "rmm/prefetch.hpp" namespace "rmm" nogil: + cdef void prefetch(const void* ptr, + size_t bytes, + cuda_device_id device, + cuda_stream_view stream) except + + +cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: + cdef cppclass device_buffer: + device_buffer() + device_buffer( + size_t size, + cuda_stream_view stream, + device_memory_resource * + ) except + + device_buffer( + const void* source_data, + size_t size, + cuda_stream_view stream, + device_memory_resource * + ) except + + device_buffer( + const device_buffer buf, + cuda_stream_view stream, + device_memory_resource * + ) except + + void reserve(size_t new_capacity, cuda_stream_view stream) except + + void resize(size_t new_size, cuda_stream_view stream) except + + void shrink_to_fit(cuda_stream_view stream) except + + void* data() + size_t size() + size_t capacity() diff --git a/python/rmm/rmm/cpp/cpp_device_uvector.pxd b/python/rmm/rmm/cpp/cpp_device_uvector.pxd new file mode 100644 index 000000000..2cb647e3c --- /dev/null +++ b/python/rmm/rmm/cpp/cpp_device_uvector.pxd @@ -0,0 +1,39 @@ +# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.cpp.cuda_stream_view cimport cuda_stream_view +from rmm.cpp.device_buffer cimport device_buffer +from rmm.cpp.memory_resource cimport device_memory_resource + + +cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: + cdef cppclass device_uvector[T]: + device_uvector(size_t size, cuda_stream_view stream) except + + T* element_ptr(size_t index) + void set_element(size_t element_index, const T& v, cuda_stream_view s) + void set_element_async( + size_t element_index, + const T& v, + cuda_stream_view s + ) except + + T front_element(cuda_stream_view s) except + + T back_element(cuda_stream_view s) except + + void reserve(size_t new_capacity, cuda_stream_view stream) except + + void resize(size_t new_size, cuda_stream_view stream) except + + void shrink_to_fit(cuda_stream_view stream) except + + device_buffer release() + size_t capacity() + T* data() + size_t size() + device_memory_resource* memory_resource() diff --git a/python/rmm/rmm/cpp/cpp_logger.pyx b/python/rmm/rmm/cpp/cpp_logger.pyx new file mode 100644 index 000000000..241a748c3 --- /dev/null +++ b/python/rmm/rmm/cpp/cpp_logger.pyx @@ -0,0 +1,66 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from libcpp cimport bool + + +cdef extern from "spdlog/common.h" namespace "spdlog::level" nogil: + cpdef enum logging_level "spdlog::level::level_enum": + """ + The debug logging level for RMM. + + Debug logging prints messages to a log file. See + `Debug Logging `_ + for more information. + + Valid levels, in decreasing order of verbosity, are TRACE, DEBUG, + INFO, WARN, ERR, CRITICAL, and OFF. Default is INFO. + + Examples + -------- + >>> import rmm + >>> rmm.logging_level.DEBUG + + >>> rmm.logging_level.DEBUG.value + 1 + >>> rmm.logging_level.DEBUG.name + 'DEBUG' + + See Also + -------- + set_logging_level : Set the debug logging level + get_logging_level : Get the current debug logging level + """ + TRACE "spdlog::level::trace" + DEBUG "spdlog::level::debug" + INFO "spdlog::level::info" + WARN "spdlog::level::warn" + ERR "spdlog::level::err" + CRITICAL "spdlog::level::critical" + OFF "spdlog::level::off" + + +cdef extern from "spdlog/spdlog.h" namespace "spdlog" nogil: + cdef cppclass spdlog_logger "spdlog::logger": + spdlog_logger() except + + void set_level(logging_level level) + logging_level level() + void flush() except + + void flush_on(logging_level level) + logging_level flush_level() + bool should_log(logging_level msg_level) + + +cdef extern from "rmm/logger.hpp" namespace "rmm" nogil: + cdef spdlog_logger& logger() except + diff --git a/python/rmm/rmm/cpp/cpp_memory_resource.pxd b/python/rmm/rmm/cpp/cpp_memory_resource.pxd new file mode 100644 index 000000000..cef5d4737 --- /dev/null +++ b/python/rmm/rmm/cpp/cpp_memory_resource.pxd @@ -0,0 +1,34 @@ +# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from libcpp.pair cimport pair + +from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view + + +cdef extern from "rmm/mr/device/device_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass device_memory_resource: + void* allocate(size_t bytes) except + + void* allocate(size_t bytes, cuda_stream_view stream) except + + void deallocate(void* ptr, size_t bytes) except + + void deallocate( + void* ptr, + size_t bytes, + cuda_stream_view stream + ) except + + +cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil: + size_t percent_of_free_device_memory(int percent) except + + pair[size_t, size_t] available_device_memory() except + diff --git a/python/rmm/rmm/cpp/cpp_memory_resource.pyx b/python/rmm/rmm/cpp/cpp_memory_resource.pyx new file mode 100644 index 000000000..50e201517 --- /dev/null +++ b/python/rmm/rmm/cpp/cpp_memory_resource.pyx @@ -0,0 +1,215 @@ +# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This import is needed for Cython typing in translate_python_except_to_cpp +# See https://github.com/cython/cython/issues/5589 +from builtins import BaseException + +from libc.stddef cimport size_t +from libc.stdint cimport int8_t, int64_t +from libcpp cimport bool +from libcpp.optional cimport optional +from libcpp.pair cimport pair +from libcpp.string cimport string + +from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view +from rmm.cpp.cpp_memory_resource cimport device_memory_resource + +# Transparent handle of a C++ exception +ctypedef pair[int, string] CppExcept + +cdef CppExcept translate_python_except_to_cpp(err: BaseException) noexcept: + """Translate a Python exception into a C++ exception handle + + The returned exception handle can then be thrown by `throw_cpp_except()`, + which MUST be done without holding the GIL. + + This is useful when C++ calls a Python function and needs to catch or + propagate exceptions. + """ + if isinstance(err, MemoryError): + return CppExcept(0, str.encode(str(err))) + return CppExcept(-1, str.encode(str(err))) + +# Implementation of `throw_cpp_except()`, which throws a given `CppExcept`. +# This function MUST be called without the GIL otherwise the thrown C++ +# exception are translated back into a Python exception. +cdef extern from *: + """ + #include + #include + + void throw_cpp_except(std::pair res) { + switch(res.first) { + case 0: + throw rmm::out_of_memory(res.second); + default: + throw std::runtime_error(res.second); + } + } + """ + void throw_cpp_except(CppExcept) nogil + + +# NOTE: Keep extern declarations in .pyx file as much as possible to avoid +# leaking dependencies when importing RMM Cython .pxd files +cdef extern from "rmm/mr/device/cuda_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass cuda_memory_resource(device_memory_resource): + cuda_memory_resource() except + + +cdef extern from "rmm/mr/device/managed_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass managed_memory_resource(device_memory_resource): + managed_memory_resource() except + + +cdef extern from "rmm/mr/device/system_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass system_memory_resource(device_memory_resource): + system_memory_resource() except + + +cdef extern from "rmm/mr/device/sam_headroom_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass sam_headroom_memory_resource(device_memory_resource): + sam_headroom_memory_resource(size_t headroom) except + + +cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + + cdef cppclass cuda_async_memory_resource(device_memory_resource): + cuda_async_memory_resource( + optional[size_t] initial_pool_size, + optional[size_t] release_threshold, + optional[allocation_handle_type] export_handle_type) except + + +# TODO: when we adopt Cython 3.0 use enum class +cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \ + namespace \ + "rmm::mr::cuda_async_memory_resource::allocation_handle_type" \ + nogil: + enum allocation_handle_type \ + "rmm::mr::cuda_async_memory_resource::allocation_handle_type": + none + posix_file_descriptor + win32 + win32_kmt + + +cdef extern from "rmm/mr/device/pool_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass pool_memory_resource[Upstream](device_memory_resource): + pool_memory_resource( + Upstream* upstream_mr, + size_t initial_pool_size, + optional[size_t] maximum_pool_size) except + + size_t pool_size() + +cdef extern from "rmm/mr/device/fixed_size_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass fixed_size_memory_resource[Upstream](device_memory_resource): + fixed_size_memory_resource( + Upstream* upstream_mr, + size_t block_size, + size_t block_to_preallocate) except + + +cdef extern from "rmm/mr/device/callback_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + ctypedef void* (*allocate_callback_t)(size_t, cuda_stream_view, void*) + ctypedef void (*deallocate_callback_t)(void*, size_t, cuda_stream_view, void*) + + cdef cppclass callback_memory_resource(device_memory_resource): + callback_memory_resource( + allocate_callback_t allocate_callback, + deallocate_callback_t deallocate_callback, + void* allocate_callback_arg, + void* deallocate_callback_arg + ) except + + +cdef extern from "rmm/mr/device/binning_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass binning_memory_resource[Upstream](device_memory_resource): + binning_memory_resource(Upstream* upstream_mr) except + + binning_memory_resource( + Upstream* upstream_mr, + int8_t min_size_exponent, + int8_t max_size_exponent) except + + + void add_bin(size_t allocation_size) except + + void add_bin( + size_t allocation_size, + device_memory_resource* bin_resource) except + + +cdef extern from "rmm/mr/device/limiting_resource_adaptor.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass limiting_resource_adaptor[Upstream](device_memory_resource): + limiting_resource_adaptor( + Upstream* upstream_mr, + size_t allocation_limit) except + + + size_t get_allocated_bytes() except + + size_t get_allocation_limit() except + + +cdef extern from "rmm/mr/device/logging_resource_adaptor.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass logging_resource_adaptor[Upstream](device_memory_resource): + logging_resource_adaptor( + Upstream* upstream_mr, + string filename) except + + + void flush() except + + +cdef extern from "rmm/mr/device/statistics_resource_adaptor.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass statistics_resource_adaptor[Upstream](device_memory_resource): + struct counter: + counter() + + int64_t value + int64_t peak + int64_t total + + statistics_resource_adaptor(Upstream* upstream_mr) except + + + counter get_bytes_counter() except + + counter get_allocations_counter() except + + pair[counter, counter] pop_counters() except + + pair[counter, counter] push_counters() except + + +cdef extern from "rmm/mr/device/tracking_resource_adaptor.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass tracking_resource_adaptor[Upstream](device_memory_resource): + tracking_resource_adaptor( + Upstream* upstream_mr, + bool capture_stacks) except + + + size_t get_allocated_bytes() except + + string get_outstanding_allocations_str() except + + void log_outstanding_allocations() except + + +cdef extern from "rmm/mr/device/failure_callback_resource_adaptor.hpp" \ + namespace "rmm::mr" nogil: + ctypedef bool (*failure_callback_t)(size_t, void*) + cdef cppclass failure_callback_resource_adaptor[Upstream]( + device_memory_resource + ): + failure_callback_resource_adaptor( + Upstream* upstream_mr, + failure_callback_t callback, + void* callback_arg + ) except + + +cdef extern from "rmm/mr/device/prefetch_resource_adaptor.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass prefetch_resource_adaptor[Upstream](device_memory_resource): + prefetch_resource_adaptor(Upstream* upstream_mr) except + diff --git a/python/rmm/rmm/cpp/cpp_per_device_resource.pxd b/python/rmm/rmm/cpp/cpp_per_device_resource.pxd new file mode 100644 index 000000000..59f651687 --- /dev/null +++ b/python/rmm/rmm/cpp/cpp_per_device_resource.pxd @@ -0,0 +1,36 @@ +# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from rmm.cpp.cpp_memory_resource cimport device_memory_resource + + +cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil: + cdef cppclass cuda_device_id: + ctypedef int value_type + + cuda_device_id(value_type id) + + value_type value() + +cdef extern from "rmm/mr/device/per_device_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef device_memory_resource* set_current_device_resource( + device_memory_resource* new_mr + ) + cdef device_memory_resource* get_current_device_resource() + cdef device_memory_resource* set_per_device_resource( + cuda_device_id id, device_memory_resource* new_mr + ) + cdef device_memory_resource* get_per_device_resource ( + cuda_device_id id + ) diff --git a/python/rmm/rmm/python/CMakeLists.txt b/python/rmm/rmm/python/CMakeLists.txt new file mode 100644 index 000000000..558c0ce9a --- /dev/null +++ b/python/rmm/rmm/python/CMakeLists.txt @@ -0,0 +1,28 @@ +# ============================================================================= +# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +set(cython_sources device_buffer.pyx lib.pyx logger.pyx memory_resource.pyx cuda_stream.pyx + helper.pyx) +set(linked_libraries rmm::rmm) + +# Build all of the Cython targets +rapids_cython_create_modules(SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" + CXX) + +# mark all symbols in these Cython targets "hidden" by default, so they won't collide with symbols +# loaded from other DSOs +foreach(_cython_target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${_cython_target} PROPERTIES C_VISIBILITY_PRESET hidden + CXX_VISIBILITY_PRESET hidden) +endforeach() diff --git a/python/rmm/rmm/python/__init__.pxd b/python/rmm/rmm/python/__init__.pxd new file mode 100644 index 000000000..46753baa3 --- /dev/null +++ b/python/rmm/rmm/python/__init__.pxd @@ -0,0 +1,13 @@ +# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/rmm/rmm/python/__init__.py b/python/rmm/rmm/python/__init__.py new file mode 100644 index 000000000..0b8672ef6 --- /dev/null +++ b/python/rmm/rmm/python/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .device_buffer import DeviceBuffer diff --git a/python/rmm/rmm/python/cuda_stream.pyx b/python/rmm/rmm/python/cuda_stream.pyx new file mode 100644 index 000000000..f1529111d --- /dev/null +++ b/python/rmm/rmm/python/cuda_stream.pyx @@ -0,0 +1,36 @@ +# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cimport cython +from cuda.ccudart cimport cudaStream_t +from libcpp cimport bool + +from rmm.cpp.cpp_cuda_stream cimport cuda_stream + + +@cython.final +cdef class CudaStream: + """ + Wrapper around a CUDA stream with RAII semantics. + When a CudaStream instance is GC'd, the underlying + CUDA stream is destroyed. + """ + def __cinit__(self): + self.c_obj.reset(new cuda_stream()) + + cdef cudaStream_t value(self) except * nogil: + return self.c_obj.get()[0].value() + + cdef bool is_valid(self) except * nogil: + return self.c_obj.get()[0].is_valid() diff --git a/python/rmm/rmm/python/device_buffer.pxd b/python/rmm/rmm/python/device_buffer.pxd new file mode 100644 index 000000000..d8b56d2fb --- /dev/null +++ b/python/rmm/rmm/python/device_buffer.pxd @@ -0,0 +1,71 @@ +# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from libc.stdint cimport uintptr_t +from libcpp.memory cimport unique_ptr + +from rmm._cuda.stream cimport Stream +from rmm.cpp.cpp_device_buffer cimport device_buffer +from rmm.python.memory_resource cimport DeviceMemoryResource + + +cdef class DeviceBuffer: + cdef unique_ptr[device_buffer] c_obj + + # Holds a reference to the DeviceMemoryResource used for allocation. + # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is + # needed for deallocation + cdef DeviceMemoryResource mr + + # Holds a reference to the stream used by the underlying `device_buffer`. + # Ensures the stream does not get destroyed before this DeviceBuffer + cdef Stream stream + + @staticmethod + cdef DeviceBuffer c_from_unique_ptr( + unique_ptr[device_buffer] ptr, + Stream stream=*, + DeviceMemoryResource mr=*, + ) + + @staticmethod + cdef DeviceBuffer c_to_device(const unsigned char[::1] b, + Stream stream=*) except * + cpdef copy_to_host(self, ary=*, Stream stream=*) + cpdef copy_from_host(self, ary, Stream stream=*) + cpdef copy_from_device(self, cuda_ary, Stream stream=*) + cpdef bytes tobytes(self, Stream stream=*) + + cdef size_t c_size(self) except * + cpdef void reserve(self, size_t new_capacity, Stream stream=*) except * + cpdef void resize(self, size_t new_size, Stream stream=*) except * + cpdef size_t capacity(self) except * + cdef void* c_data(self) except * + + cdef device_buffer c_release(self) except * + +cpdef DeviceBuffer to_device(const unsigned char[::1] b, + Stream stream=*) +cpdef void copy_ptr_to_host(uintptr_t db, + unsigned char[::1] hb, + Stream stream=*) except * + +cpdef void copy_host_to_ptr(const unsigned char[::1] hb, + uintptr_t db, + Stream stream=*) except * + +cpdef void copy_device_to_ptr(uintptr_t d_src, + uintptr_t d_dst, + size_t count, + Stream stream=*) except * diff --git a/python/rmm/rmm/python/device_buffer.pyx b/python/rmm/rmm/python/device_buffer.pyx new file mode 100644 index 000000000..1d564ff33 --- /dev/null +++ b/python/rmm/rmm/python/device_buffer.pyx @@ -0,0 +1,559 @@ +# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np + +cimport cython +from cpython.bytes cimport PyBytes_FromStringAndSize +from libc.stdint cimport uintptr_t +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move + +from rmm._cuda.stream cimport Stream + +from rmm._cuda.stream import DEFAULT_STREAM + +cimport cuda.ccudart as ccudart +from cuda.ccudart cimport ( + cudaError, + cudaError_t, + cudaMemcpyAsync, + cudaMemcpyKind, + cudaStream_t, +) + +from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view +from rmm.cpp.cpp_device_buffer cimport ( + cuda_device_id, + device_buffer, + get_current_cuda_device, + prefetch, +) +from rmm.python.memory_resource cimport ( + DeviceMemoryResource, + device_memory_resource, + get_current_device_resource, +) + + +# The DeviceMemoryResource attribute could be released prematurely +# by the gc if the DeviceBuffer is in a reference cycle. Removing +# the tp_clear function with the no_gc_clear decoration prevents that. +# See https://github.com/rapidsai/rmm/pull/931 for details. +@cython.no_gc_clear +cdef class DeviceBuffer: + + def __cinit__(self, *, + uintptr_t ptr=0, + size_t size=0, + Stream stream=DEFAULT_STREAM, + DeviceMemoryResource mr=None): + """Construct a ``DeviceBuffer`` with optional size and data pointer + + Parameters + ---------- + ptr : int + pointer to some data on host or device to copy over + size : int + size of the buffer to allocate + (and possibly size of data to copy) + stream : optional + CUDA stream to use for construction and/or copying, + defaults to the CUDA default stream. A reference to the + stream is stored internally to ensure it doesn't go out of + scope while the DeviceBuffer is in use. Destroying the + underlying stream while the DeviceBuffer is in use will + result in undefined behavior. + mr : optional + DeviceMemoryResource for the allocation, if not provided + defaults to the current device resource. + + Note + ---- + If the pointer passed is non-null and ``stream`` is the default stream, + it is synchronized after the copy. However if a non-default ``stream`` + is provided, this function is fully asynchronous. + + Examples + -------- + >>> import rmm + >>> db = rmm.DeviceBuffer(size=5) + """ + cdef const void* c_ptr + cdef device_memory_resource * mr_ptr + # Save a reference to the MR and stream used for allocation + self.mr = get_current_device_resource() if mr is None else mr + self.stream = stream + + mr_ptr = self.mr.get_mr() + with nogil: + c_ptr = ptr + + if c_ptr == NULL or size == 0: + self.c_obj.reset(new device_buffer(size, stream.view(), mr_ptr)) + else: + self.c_obj.reset(new device_buffer(c_ptr, size, stream.view(), mr_ptr)) + + if stream.c_is_default(): + stream.c_synchronize() + + def __len__(self): + return self.size + + def __sizeof__(self): + return self.size + + def __bytes__(self): + return self.tobytes() + + @property + def nbytes(self): + """Gets the size of the buffer in bytes.""" + return self.size + + @property + def ptr(self): + """Gets a pointer to the underlying data.""" + return int(self.c_data()) + + @property + def size(self): + """Gets the size of the buffer in bytes.""" + return int(self.c_size()) + + def __reduce__(self): + return to_device, (self.copy_to_host(),) + + @property + def __cuda_array_interface__(self): + cdef dict intf = { + "data": (self.ptr, False), + "shape": (self.size,), + "strides": None, + "typestr": "|u1", + "version": 0 + } + return intf + + def prefetch(self, device=None, stream=None): + """Prefetch buffer data to the specified device on the specified stream. + + Assumes the storage for this DeviceBuffer is CUDA managed memory + (unified memory). If it is not, this function is a no-op. + + Parameters + ---------- + device : optional + The CUDA device to which to prefetch the memory for this buffer. + Defaults to the current CUDA device. To prefetch to the CPU, pass + :py:attr:`~cuda.cudart.cudaCpuDeviceId` as the device. + stream : optional + CUDA stream to use for prefetching. Defaults to self.stream + """ + cdef cuda_device_id dev = (get_current_cuda_device() + if device is None + else cuda_device_id(device)) + cdef Stream strm = self.stream if stream is None else stream + with nogil: + prefetch(self.c_obj.get()[0].data(), + self.c_obj.get()[0].size(), + dev, + strm.view()) + + def copy(self): + """Returns a copy of DeviceBuffer. + + Returns + ------- + A deep copy of existing ``DeviceBuffer`` + + Examples + -------- + >>> import rmm + >>> db = rmm.DeviceBuffer.to_device(b"abc") + >>> db_copy = db.copy() + >>> db.copy_to_host() + array([97, 98, 99], dtype=uint8) + >>> db_copy.copy_to_host() + array([97, 98, 99], dtype=uint8) + >>> assert db is not db_copy + >>> assert db.ptr != db_copy.ptr + """ + ret = DeviceBuffer(ptr=self.ptr, size=self.size, stream=self.stream) + ret.mr = self.mr + return ret + + def __copy__(self): + return self.copy() + + @staticmethod + cdef DeviceBuffer c_from_unique_ptr( + unique_ptr[device_buffer] ptr, + Stream stream=DEFAULT_STREAM, + DeviceMemoryResource mr=None, + ): + cdef DeviceBuffer buf = DeviceBuffer.__new__(DeviceBuffer) + if stream.c_is_default(): + stream.c_synchronize() + buf.c_obj = move(ptr) + buf.mr = get_current_device_resource() if mr is None else mr + buf.stream = stream + return buf + + @staticmethod + cdef DeviceBuffer c_to_device(const unsigned char[::1] b, + Stream stream=DEFAULT_STREAM) except *: + """Calls ``to_device`` function on arguments provided""" + return to_device(b, stream) + + @staticmethod + def to_device(const unsigned char[::1] b, + Stream stream=DEFAULT_STREAM): + """Calls ``to_device`` function on arguments provided.""" + return to_device(b, stream) + + cpdef copy_to_host(self, ary=None, Stream stream=DEFAULT_STREAM): + """Copy from a ``DeviceBuffer`` to a buffer on host. + + Parameters + ---------- + ary : ``bytes``-like buffer to write into + stream : CUDA stream to use for copying, default the default stream + + Examples + -------- + >>> import rmm + >>> db = rmm.DeviceBuffer.to_device(b"abc") + >>> hb = bytearray(db.nbytes) + >>> db.copy_to_host(hb) + >>> print(hb) + bytearray(b'abc') + >>> hb = db.copy_to_host() + >>> print(hb) + bytearray(b'abc') + """ + cdef const device_buffer* dbp = self.c_obj.get() + cdef size_t s = dbp.size() + + cdef unsigned char[::1] hb = ary + if hb is None: + # NumPy leverages huge pages under-the-hood, + # which speeds up the copy from device to host. + hb = ary = np.empty((s,), dtype="u1") + elif len(hb) < s: + raise ValueError( + "Argument `ary` is too small. Need space for %i bytes." % s + ) + + copy_ptr_to_host(dbp.data(), hb[:s], stream) + + return ary + + cpdef copy_from_host(self, ary, Stream stream=DEFAULT_STREAM): + """Copy from a buffer on host to ``self`` + + Parameters + ---------- + ary : ``bytes``-like buffer to copy from + stream : CUDA stream to use for copying, default the default stream + + Examples + -------- + >>> import rmm + >>> db = rmm.DeviceBuffer(size=10) + >>> hb = b"abcdef" + >>> db.copy_from_host(hb) + >>> hb = db.copy_to_host() + >>> print(hb) + array([97, 98, 99, 0, 0, 0, 0, 0, 0, 0], dtype=uint8) + """ + cdef device_buffer* dbp = self.c_obj.get() + + cdef const unsigned char[::1] hb = ary + cdef size_t s = len(hb) + if s > self.size: + raise ValueError( + "Argument `ary` is too large. Need space for %i bytes." % s + ) + + copy_host_to_ptr(hb[:s], dbp.data(), stream) + + cpdef copy_from_device(self, cuda_ary, + Stream stream=DEFAULT_STREAM): + """Copy from a buffer on host to ``self`` + + Parameters + ---------- + cuda_ary : object to copy from that has ``__cuda_array_interface__`` + stream : CUDA stream to use for copying, default the default stream + + Examples + -------- + >>> import rmm + >>> db = rmm.DeviceBuffer(size=5) + >>> db2 = rmm.DeviceBuffer.to_device(b"abc") + >>> db.copy_from_device(db2) + >>> hb = db.copy_to_host() + >>> print(hb) + array([97, 98, 99, 0, 0], dtype=uint8) + """ + if not hasattr(cuda_ary, "__cuda_array_interface__"): + raise ValueError( + "Expected object to support `__cuda_array_interface__` " + "protocol" + ) + + cuda_ary_interface = cuda_ary.__cuda_array_interface__ + shape = cuda_ary_interface["shape"] + strides = cuda_ary_interface.get("strides") + dtype = np.dtype(cuda_ary_interface["typestr"]) + + if len(shape) > 1: + raise ValueError( + "Only 1-D contiguous arrays are supported, got {}-D " + "array".format(str(len(shape))) + ) + + if strides is not None: + if strides[0] != dtype.itemsize: + raise ValueError( + "Only 1-D contiguous arrays are supported, got a " + "non-contiguous array" + ) + + cdef uintptr_t src_ptr = cuda_ary_interface["data"][0] + cdef size_t s = shape[0] * dtype.itemsize + if s > self.size: + raise ValueError( + "Argument `hb` is too large. Need space for %i bytes." % s + ) + + cdef device_buffer* dbp = self.c_obj.get() + + copy_device_to_ptr( + src_ptr, + dbp.data(), + s, + stream + ) + + cpdef bytes tobytes(self, Stream stream=DEFAULT_STREAM): + cdef const device_buffer* dbp = self.c_obj.get() + cdef size_t s = dbp.size() + + cdef bytes b = PyBytes_FromStringAndSize(NULL, s) + cdef unsigned char* p = b + cdef unsigned char[::1] mv = (p)[:s] + self.copy_to_host(mv, stream) + + return b + + cdef size_t c_size(self) except *: + return self.c_obj.get()[0].size() + + cpdef void reserve(self, + size_t new_capacity, + Stream stream=DEFAULT_STREAM) except *: + self.c_obj.get()[0].reserve(new_capacity, stream.view()) + + cpdef void resize(self, + size_t new_size, + Stream stream=DEFAULT_STREAM) except *: + self.c_obj.get()[0].resize(new_size, stream.view()) + + cpdef size_t capacity(self) except *: + return self.c_obj.get()[0].capacity() + + cdef void* c_data(self) except *: + return self.c_obj.get()[0].data() + + cdef device_buffer c_release(self) except *: + """ + Releases ownership of the data held by this DeviceBuffer. + """ + return move(cython.operator.dereference(self.c_obj)) + + +@cython.boundscheck(False) +cpdef DeviceBuffer to_device(const unsigned char[::1] b, + Stream stream=DEFAULT_STREAM): + """Return a new ``DeviceBuffer`` with a copy of the data. + + Parameters + ---------- + b : ``bytes``-like data on host to copy to device + stream : CUDA stream to use for copying, default the default stream + + Returns + ------- + ``DeviceBuffer`` with copy of data from host + + Examples + -------- + >>> import rmm + >>> db = rmm._lib.device_buffer.to_device(b"abc") + >>> print(bytes(db)) + b'abc' + """ + + if b is None: + raise TypeError( + "Argument 'b' has incorrect type" + " (expected bytes-like, got NoneType)" + ) + + cdef uintptr_t p = &b[0] + cdef size_t s = len(b) + return DeviceBuffer(ptr=p, size=s, stream=stream) + + +@cython.boundscheck(False) +cdef void _copy_async(const void* src, + void* dst, + size_t count, + ccudart.cudaMemcpyKind kind, + cuda_stream_view stream) except * nogil: + """ + Asynchronously copy data between host and/or device pointers. + + This is a convenience wrapper around cudaMemcpyAsync that + checks for errors. Only used for internal implementation. + + Parameters + ---------- + src : pointer to ``bytes``-like host buffer or device data to copy from + dst : pointer to ``bytes``-like host buffer or device data to copy into + count : the size in bytes to copy + kind : the kind of copy to perform + stream : CUDA stream to use for copying, default the default stream + """ + cdef cudaError_t err = cudaMemcpyAsync(dst, src, count, kind, + stream) + + if err != cudaError.cudaSuccess: + raise RuntimeError(f"Memcpy failed with error: {err}") + + +@cython.boundscheck(False) +cpdef void copy_ptr_to_host(uintptr_t db, + unsigned char[::1] hb, + Stream stream=DEFAULT_STREAM) except *: + """Copy from a device pointer to a buffer on host + + Parameters + ---------- + db : pointer to data on device to copy + hb : ``bytes``-like buffer to write into + stream : CUDA stream to use for copying, default the default stream + + Note + ---- + If ``stream`` is the default stream, it is synchronized after the copy. + However if a non-default ``stream`` is provided, this function is fully + asynchronous. + + Examples + -------- + >>> import rmm + >>> db = rmm.DeviceBuffer.to_device(b"abc") + >>> hb = bytearray(db.nbytes) + >>> rmm._lib.device_buffer.copy_ptr_to_host(db.ptr, hb) + >>> print(hb) + bytearray(b'abc') + """ + + if hb is None: + raise TypeError( + "Argument `hb` has incorrect type" + " (expected bytes-like, got NoneType)" + ) + + with nogil: + _copy_async(db, &hb[0], len(hb), + cudaMemcpyKind.cudaMemcpyDeviceToHost, stream.view()) + + if stream.c_is_default(): + stream.c_synchronize() + + +@cython.boundscheck(False) +cpdef void copy_host_to_ptr(const unsigned char[::1] hb, + uintptr_t db, + Stream stream=DEFAULT_STREAM) except *: + """Copy from a host pointer to a device pointer + + Parameters + ---------- + hb : ``bytes``-like host buffer to copy + db : pointer to data on device to write into + stream : CUDA stream to use for copying, default the default stream + + Note + ---- + If ``stream`` is the default stream, it is synchronized after the copy. + However if a non-default ``stream`` is provided, this function is fully + asynchronous. + + Examples + -------- + >>> import rmm + >>> db = rmm.DeviceBuffer(size=10) + >>> hb = b"abc" + >>> rmm._lib.device_buffer.copy_host_to_ptr(hb, db.ptr) + >>> hb = db.copy_to_host() + >>> print(hb) + array([97, 98, 99, 0, 0, 0, 0, 0, 0, 0], dtype=uint8) + """ + + if hb is None: + raise TypeError( + "Argument `hb` has incorrect type" + " (expected bytes-like, got NoneType)" + ) + + with nogil: + _copy_async(&hb[0], db, len(hb), + cudaMemcpyKind.cudaMemcpyHostToDevice, stream.view()) + + if stream.c_is_default(): + stream.c_synchronize() + + +@cython.boundscheck(False) +cpdef void copy_device_to_ptr(uintptr_t d_src, + uintptr_t d_dst, + size_t count, + Stream stream=DEFAULT_STREAM) except *: + """Copy from a device pointer to a device pointer + + Parameters + ---------- + d_src : pointer to data on device to copy from + d_dst : pointer to data on device to write into + count : the size in bytes to copy + stream : CUDA stream to use for copying, default the default stream + + Examples + -------- + >>> import rmm + >>> db = rmm.DeviceBuffer(size=5) + >>> db2 = rmm.DeviceBuffer.to_device(b"abc") + >>> rmm._lib.device_buffer.copy_device_to_ptr(db2.ptr, db.ptr, db2.size) + >>> hb = db.copy_to_host() + >>> hb + array([97, 98, 99, 0, 0], dtype=uint8) + """ + + with nogil: + _copy_async(d_src, d_dst, count, + cudaMemcpyKind.cudaMemcpyDeviceToDevice, stream.view()) diff --git a/python/rmm/rmm/python/helper.pxd b/python/rmm/rmm/python/helper.pxd new file mode 100644 index 000000000..8ca151c00 --- /dev/null +++ b/python/rmm/rmm/python/helper.pxd @@ -0,0 +1,16 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +cdef object parse_bytes(object s) except * diff --git a/python/rmm/rmm/python/helper.pyx b/python/rmm/rmm/python/helper.pyx new file mode 100644 index 000000000..d442ee341 --- /dev/null +++ b/python/rmm/rmm/python/helper.pyx @@ -0,0 +1,78 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Helper functions for rmm""" + +import re + + +cdef dict BYTE_SIZES = { + 'b': 1, + '': 1, + 'kb': 1000, + 'mb': 1000**2, + 'gb': 1000**3, + 'tb': 1000**4, + 'pb': 1000**5, + 'kib': 1024, + 'mib': 1024**2, + 'gib': 1024**3, + 'tib': 1024**4, + 'pib': 1024**5, +} + + +pattern = re.compile(r"^([0-9]+(?:\.[0-9]*)?)[\t ]*((?i:(?:[kmgtp]i?)?b))?$") + +cdef object parse_bytes(object s): + """Parse a string or integer into a number of bytes. + + Parameters + ---------- + s : int | str + Size in bytes. If an integer is provided, it is returned as-is. + A string is parsed as a floating point number with an (optional, + case-insensitive) byte-specifier, both SI prefixes (kb, mb, ..., pb) + and binary prefixes (kib, mib, ..., pib) are supported. + + Returns + ------- + Requested size in bytes as an integer. + + Raises + ------ + ValueError + If it is not possible to parse the input as a byte specification. + """ + cdef str suffix + cdef double n + cdef int multiplier + + if isinstance(s, int): + return s + + match = pattern.match(s) + + if match is None: + raise ValueError(f"Could not parse {s} as a byte specification") + + n = float(match.group(1)) + + suffix = match.group(2) + if suffix is None: + suffix = "" + + multiplier = BYTE_SIZES[suffix.lower()] + + return int(n*multiplier) diff --git a/python/rmm/rmm/python/lib.pxd b/python/rmm/rmm/python/lib.pxd new file mode 100644 index 000000000..b61e0d569 --- /dev/null +++ b/python/rmm/rmm/python/lib.pxd @@ -0,0 +1,17 @@ +# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from libcpp.utility cimport pair + +ctypedef pair[const char*, unsigned int] caller_pair diff --git a/python/rmm/rmm/python/lib.pyx b/python/rmm/rmm/python/lib.pyx new file mode 100644 index 000000000..46753baa3 --- /dev/null +++ b/python/rmm/rmm/python/lib.pyx @@ -0,0 +1,13 @@ +# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/rmm/rmm/python/logger.pyx b/python/rmm/rmm/python/logger.pyx new file mode 100644 index 000000000..2997ff633 --- /dev/null +++ b/python/rmm/rmm/python/logger.pyx @@ -0,0 +1,208 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings +from rmm.cpp.cpp_logging cimport logging_level, logger + + +def _validate_level_type(level): + if not isinstance(level, logging_level): + raise TypeError("level must be an instance of the logging_level enum") + + +def should_log(level): + """ + Check if a message at the given level would be logged. + + A message at the given level would be logged if the current debug logging + level is set to a level that is at least as verbose than the given level, + *and* the RMM module is compiled for a logging level at least as verbose. + If these conditions are not both met, this function will return false. + + Debug logging prints messages to a log file. See + `Debug Logging `_ + for more information. + + Parameters + ---------- + level : logging_level + The debug logging level. Valid values are instances of the + ``logging_level`` enum. + + Returns + ------- + should_log : bool + True if a message at the given level would be logged, False otherwise. + + Raises + ------ + TypeError + If the logging level is not an instance of the ``logging_level`` enum. + """ + _validate_level_type(level) + return logger().should_log(level) + + +def set_logging_level(level): + """ + Set the debug logging level. + + Debug logging prints messages to a log file. See + `Debug Logging `_ + for more information. + + Parameters + ---------- + level : logging_level + The debug logging level. Valid values are instances of the + ``logging_level`` enum. + + Raises + ------ + TypeError + If the logging level is not an instance of the ``logging_level`` enum. + + See Also + -------- + get_logging_level : Get the current debug logging level. + + Examples + -------- + >>> import rmm + >>> rmm.set_logging_level(rmm.logging_level.WARN) # set logging level to warn + """ + _validate_level_type(level) + logger().set_level(level) + + if not should_log(level): + warnings.warn(f"RMM will not log logging_level.{level.name}. This " + "may be because the C++ library is compiled for a " + "less-verbose logging level.") + + +def get_logging_level(): + """ + Get the current debug logging level. + + Debug logging prints messages to a log file. See + `Debug Logging `_ + for more information. + + Returns + ------- + level : logging_level + The current debug logging level, an instance of the ``logging_level`` + enum. + + See Also + -------- + set_logging_level : Set the debug logging level. + + Examples + -------- + >>> import rmm + >>> rmm.get_logging_level() # get current logging level + + """ + return logging_level(logger().level()) + + +def flush_logger(): + """ + Flush the debug logger. This will cause any buffered log messages to + be written to the log file. + + Debug logging prints messages to a log file. See + `Debug Logging `_ + for more information. + + See Also + -------- + set_flush_level : Set the flush level for the debug logger. + get_flush_level : Get the current debug logging flush level. + + Examples + -------- + >>> import rmm + >>> rmm.flush_logger() # flush the logger + """ + logger().flush() + + +def set_flush_level(level): + """ + Set the flush level for the debug logger. Messages of this level or higher + will automatically flush to the file. + + Debug logging prints messages to a log file. See + `Debug Logging `_ + for more information. + + Parameters + ---------- + level : logging_level + The debug logging level. Valid values are instances of the + ``logging_level`` enum. + + Raises + ------ + TypeError + If the logging level is not an instance of the ``logging_level`` enum. + + See Also + -------- + get_flush_level : Get the current debug logging flush level. + flush_logger : Flush the logger. + + Examples + -------- + >>> import rmm + >>> rmm.flush_on(rmm.logging_level.WARN) # set flush level to warn + """ + _validate_level_type(level) + logger().flush_on(level) + + if not should_log(level): + warnings.warn(f"RMM will not log logging_level.{level.name}. This " + "may be because the C++ library is compiled for a " + "less-verbose logging level.") + + +def get_flush_level(): + """ + Get the current debug logging flush level for the RMM logger. Messages of + this level or higher will automatically flush to the file. + + Debug logging prints messages to a log file. See + `Debug Logging `_ + for more information. + + Returns + ------- + logging_level + The current flush level, an instance of the ``logging_level`` + enum. + + See Also + -------- + set_flush_level : Set the flush level for the logger. + flush_logger : Flush the logger. + + Examples + -------- + >>> import rmm + >>> rmm.flush_level() # get current flush level + + """ + return logging_level(logger().flush_level()) diff --git a/python/rmm/rmm/python/memory_resource.pxd b/python/rmm/rmm/python/memory_resource.pxd new file mode 100644 index 000000000..0adda4eaf --- /dev/null +++ b/python/rmm/rmm/python/memory_resource.pxd @@ -0,0 +1,83 @@ +# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from libcpp.memory cimport shared_ptr + +from rmm.cpp.cpp_memory_resource cimport device_memory_resource + + +cdef class DeviceMemoryResource: + cdef shared_ptr[device_memory_resource] c_obj + cdef device_memory_resource* get_mr(self) noexcept nogil + +cdef class UpstreamResourceAdaptor(DeviceMemoryResource): + cdef readonly DeviceMemoryResource upstream_mr + + cpdef DeviceMemoryResource get_upstream(self) + +cdef class CudaMemoryResource(DeviceMemoryResource): + pass + +cdef class ManagedMemoryResource(DeviceMemoryResource): + pass + +cdef class SystemMemoryResource(DeviceMemoryResource): + pass + +cdef class SamHeadroomMemoryResource(DeviceMemoryResource): + pass + +cdef class CudaAsyncMemoryResource(DeviceMemoryResource): + pass + +cdef class PoolMemoryResource(UpstreamResourceAdaptor): + pass + +cdef class FixedSizeMemoryResource(UpstreamResourceAdaptor): + pass + +cdef class BinningMemoryResource(UpstreamResourceAdaptor): + + cdef readonly list _bin_mrs + + cpdef add_bin( + self, + size_t allocation_size, + DeviceMemoryResource bin_resource=*) + +cdef class CallbackMemoryResource(DeviceMemoryResource): + cdef object _allocate_func + cdef object _deallocate_func + +cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor): + pass + +cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor): + cdef object _log_file_name + cpdef get_file_name(self) + cpdef flush(self) + +cdef class StatisticsResourceAdaptor(UpstreamResourceAdaptor): + pass + +cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor): + pass + +cdef class FailureCallbackResourceAdaptor(UpstreamResourceAdaptor): + cdef object _callback + +cdef class PrefetchResourceAdaptor(UpstreamResourceAdaptor): + pass + +cpdef DeviceMemoryResource get_current_device_resource() diff --git a/python/rmm/rmm/python/memory_resource.pyx b/python/rmm/rmm/python/memory_resource.pyx new file mode 100644 index 000000000..46613667c --- /dev/null +++ b/python/rmm/rmm/python/memory_resource.pyx @@ -0,0 +1,1170 @@ +# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import warnings +# This import is needed for Cython typing in translate_python_except_to_cpp +# See https://github.com/cython/cython/issues/5589 +from builtins import BaseException +from collections import defaultdict + +cimport cython +from cython.operator cimport dereference as deref +from libc.stddef cimport size_t +from libc.stdint cimport int8_t, uintptr_t +from libcpp cimport bool +from libcpp.memory cimport make_unique, unique_ptr +from libcpp.optional cimport optional +from libcpp.pair cimport pair + +from cuda.cudart import cudaError_t + +from rmm._cuda.gpu import CUDARuntimeError, getDevice, setDevice + +from rmm._cuda.stream cimport Stream + +from rmm._cuda.stream import DEFAULT_STREAM + +from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view +from rmm.cpp.cpp_per_device_resource cimport ( + cuda_device_id, + set_per_device_resource as cpp_set_per_device_resource, +) +from rmm.python.helper cimport parse_bytes +from rmm.python.memory_resource cimport ( + available_device_memory as c_available_device_memory, + percent_of_free_device_memory as c_percent_of_free_device_memory, +) + +from rmm.statistics import Statistics + +from rmm.cpp.cpp_memory_resource cimport ( + CppExcept, + allocate_callback_t, + allocation_handle_type, + binning_memory_resource, + callback_memory_resource, + cuda_async_memory_resource, + cuda_memory_resource, + deallocate_callback_t, + device_memory_resource, + failure_callback_resource_adaptor, + failure_callback_t, + fixed_size_memory_resource, + limiting_resource_adaptor, + logging_resource_adaptor, + managed_memory_resource, + pool_memory_resource, + posix_file_descriptor, + prefetch_resource_adaptor, + sam_headroom_memory_resource, + statistics_resource_adaptor, + system_memory_resource, + throw_cpp_except, + tracking_resource_adaptor, + translate_python_except_to_cpp, +) + + +cdef class DeviceMemoryResource: + + cdef device_memory_resource* get_mr(self) noexcept nogil: + """Get the underlying C++ memory resource object.""" + return self.c_obj.get() + + def allocate(self, size_t nbytes, Stream stream=DEFAULT_STREAM): + """Allocate ``nbytes`` bytes of memory. + + Parameters + ---------- + nbytes : size_t + The size of the allocation in bytes + stream : Stream + Optional stream for the allocation + """ + return self.c_obj.get().allocate(nbytes, stream.view()) + + def deallocate(self, uintptr_t ptr, size_t nbytes, Stream stream=DEFAULT_STREAM): + """Deallocate memory pointed to by ``ptr`` of size ``nbytes``. + + Parameters + ---------- + ptr : uintptr_t + Pointer to be deallocated + nbytes : size_t + Size of the allocation in bytes + stream : Stream + Optional stream for the deallocation + """ + self.c_obj.get().deallocate((ptr), nbytes, stream.view()) + + +# See the note about `no_gc_clear` in `device_buffer.pyx`. +@cython.no_gc_clear +cdef class UpstreamResourceAdaptor(DeviceMemoryResource): + """Parent class for all memory resources that track an upstream. + + Upstream resource tracking requires maintaining a reference to the upstream + mr so that it is kept alive and may be accessed by any downstream resource + adaptors. + """ + + def __cinit__(self, DeviceMemoryResource upstream_mr, *args, **kwargs): + + if (upstream_mr is None): + raise Exception("Argument `upstream_mr` must not be None") + + self.upstream_mr = upstream_mr + + def __dealloc__(self): + # Must cleanup the base MR before any upstream MR + self.c_obj.reset() + + cpdef DeviceMemoryResource get_upstream(self): + return self.upstream_mr + + +cdef class CudaMemoryResource(DeviceMemoryResource): + def __cinit__(self): + self.c_obj.reset( + new cuda_memory_resource() + ) + + def __init__(self): + """ + Memory resource that uses ``cudaMalloc``/``cudaFree`` for + allocation/deallocation. + """ + pass + + +cdef class CudaAsyncMemoryResource(DeviceMemoryResource): + """ + Memory resource that uses ``cudaMallocAsync``/``cudaFreeAsync`` for + allocation/deallocation. + + Parameters + ---------- + initial_pool_size : int | str, optional + Initial pool size in bytes. By default, half the available memory + on the device is used. A string argument is parsed using `parse_bytes`. + release_threshold: int, optional + Release threshold in bytes. If the pool size grows beyond this + value, unused memory held by the pool will be released at the + next synchronization point. + enable_ipc: bool, optional + If True, enables export of POSIX file descriptor handles for the memory + allocated by this resource so that it can be used with CUDA IPC. + """ + def __cinit__( + self, + initial_pool_size=None, + release_threshold=None, + enable_ipc=False + ): + cdef optional[size_t] c_initial_pool_size = ( + optional[size_t]() + if initial_pool_size is None + else optional[size_t]( parse_bytes(initial_pool_size)) + ) + + cdef optional[size_t] c_release_threshold = ( + optional[size_t]() + if release_threshold is None + else optional[size_t]( release_threshold) + ) + + # If IPC memory handles are not supported, the constructor below will + # raise an error from C++. + cdef optional[allocation_handle_type] c_export_handle_type = ( + optional[allocation_handle_type]( + posix_file_descriptor + ) + if enable_ipc + else optional[allocation_handle_type]() + ) + + self.c_obj.reset( + new cuda_async_memory_resource( + c_initial_pool_size, + c_release_threshold, + c_export_handle_type + ) + ) + + +cdef class ManagedMemoryResource(DeviceMemoryResource): + def __cinit__(self): + self.c_obj.reset( + new managed_memory_resource() + ) + + def __init__(self): + """ + Memory resource that uses ``cudaMallocManaged``/``cudaFree`` for + allocation/deallocation. + """ + pass + + +cdef class SystemMemoryResource(DeviceMemoryResource): + def __cinit__(self): + self.c_obj.reset( + new system_memory_resource() + ) + + def __init__(self): + """ + Memory resource that uses ``malloc``/``free`` for + allocation/deallocation. + """ + pass + + +cdef class SamHeadroomMemoryResource(DeviceMemoryResource): + def __cinit__( + self, + size_t headroom + ): + self.c_obj.reset(new sam_headroom_memory_resource(headroom)) + + def __init__( + self, + size_t headroom + ): + """ + Memory resource that uses ``malloc``/``free`` for + allocation/deallocation. + + Parameters + ---------- + headroom : size_t + Size of the reserved GPU memory as headroom + """ + pass + + +cdef class PoolMemoryResource(UpstreamResourceAdaptor): + + def __cinit__( + self, + DeviceMemoryResource upstream_mr, + initial_pool_size=None, + maximum_pool_size=None + ): + cdef size_t c_initial_pool_size + cdef optional[size_t] c_maximum_pool_size + c_initial_pool_size = ( + c_percent_of_free_device_memory(50) if + initial_pool_size is None + else parse_bytes(initial_pool_size) + ) + c_maximum_pool_size = ( + optional[size_t]() if + maximum_pool_size is None + else optional[size_t]( parse_bytes(maximum_pool_size)) + ) + self.c_obj.reset( + new pool_memory_resource[device_memory_resource]( + upstream_mr.get_mr(), + c_initial_pool_size, + c_maximum_pool_size + ) + ) + + def __init__( + self, + DeviceMemoryResource upstream_mr, + object initial_pool_size=None, + object maximum_pool_size=None + ): + """ + Coalescing best-fit suballocator which uses a pool of memory allocated + from an upstream memory resource. + + Parameters + ---------- + upstream_mr : DeviceMemoryResource + The DeviceMemoryResource from which to allocate blocks for the + pool. + initial_pool_size : int | str, optional + Initial pool size in bytes. By default, half the available memory + on the device is used. + maximum_pool_size : int | str, optional + Maximum size in bytes, that the pool can grow to. + """ + pass + + def pool_size(self): + cdef pool_memory_resource[device_memory_resource]* c_mr = ( + (self.get_mr()) + ) + return c_mr.pool_size() + +cdef class FixedSizeMemoryResource(UpstreamResourceAdaptor): + def __cinit__( + self, + DeviceMemoryResource upstream_mr, + size_t block_size=1<<20, + size_t blocks_to_preallocate=128 + ): + self.c_obj.reset( + new fixed_size_memory_resource[device_memory_resource]( + upstream_mr.get_mr(), + block_size, + blocks_to_preallocate + ) + ) + + def __init__( + self, + DeviceMemoryResource upstream_mr, + size_t block_size=1<<20, + size_t blocks_to_preallocate=128 + ): + """ + Memory resource which allocates memory blocks of a single fixed size. + + Parameters + ---------- + upstream_mr : DeviceMemoryResource + The DeviceMemoryResource from which to allocate blocks for the + pool. + block_size : int, optional + The size of blocks to allocate (default is 1MiB). + blocks_to_preallocate : int, optional + The number of blocks to allocate to initialize the pool. + + Notes + ----- + Supports only allocations of size smaller than the configured + block_size. + """ + pass + + +cdef class BinningMemoryResource(UpstreamResourceAdaptor): + def __cinit__( + self, + DeviceMemoryResource upstream_mr, + int8_t min_size_exponent=-1, + int8_t max_size_exponent=-1, + ): + + self._bin_mrs = [] + + if (min_size_exponent == -1 or max_size_exponent == -1): + self.c_obj.reset( + new binning_memory_resource[device_memory_resource]( + upstream_mr.get_mr() + ) + ) + else: + self.c_obj.reset( + new binning_memory_resource[device_memory_resource]( + upstream_mr.get_mr(), + min_size_exponent, + max_size_exponent + ) + ) + + def __dealloc__(self): + + # Must cleanup the base MR before any upstream or referenced Bins + self.c_obj.reset() + + def __init__( + self, + DeviceMemoryResource upstream_mr, + int8_t min_size_exponent=-1, + int8_t max_size_exponent=-1, + ): + """ + Allocates memory from a set of specified "bin" sizes based on a + specified allocation size. + + If min_size_exponent and max_size_exponent are specified, initializes + with one or more FixedSizeMemoryResource bins in the range + ``[2**min_size_exponent, 2**max_size_exponent]``. + + Call :py:meth:`~.add_bin` to add additional bin allocators. + + Parameters + ---------- + upstream_mr : DeviceMemoryResource + The memory resource to use for allocations larger than any of the + bins. + min_size_exponent : size_t + The base-2 exponent of the minimum size FixedSizeMemoryResource + bin to create. + max_size_exponent : size_t + The base-2 exponent of the maximum size FixedSizeMemoryResource + bin to create. + """ + pass + + cpdef add_bin( + self, + size_t allocation_size, + DeviceMemoryResource bin_resource=None + ): + """ + Adds a bin of the specified maximum allocation size to this memory + resource. If specified, uses bin_resource for allocation for this bin. + If not specified, creates and uses a FixedSizeMemoryResource for + allocation for this bin. + + Allocations smaller than allocation_size and larger than the next + smaller bin size will use this fixed-size memory resource. + + Parameters + ---------- + allocation_size : size_t + The maximum allocation size in bytes for the created bin + bin_resource : DeviceMemoryResource + The resource to use for this bin (optional) + """ + if bin_resource is None: + (( + self.c_obj.get()))[0].add_bin(allocation_size) + else: + # Save the ref to the new bin resource to ensure its lifetime + self._bin_mrs.append(bin_resource) + + (( + self.c_obj.get()))[0].add_bin( + allocation_size, + bin_resource.get_mr()) + + @property + def bin_mrs(self) -> list: + """Get the list of binned memory resources.""" + return self._bin_mrs + + +cdef void* _allocate_callback_wrapper( + size_t nbytes, + cuda_stream_view stream, + void* ctx + # Note that this function is specifically designed to rethrow Python + # exceptions as C++ exceptions when called as a callback from C++, so it is + # noexcept from Cython's perspective. +) noexcept nogil: + cdef CppExcept err + with gil: + try: + return ((ctx)( + nbytes, + Stream._from_cudaStream_t(stream.value()) + )) + except BaseException as e: + err = translate_python_except_to_cpp(e) + throw_cpp_except(err) + +cdef void _deallocate_callback_wrapper( + void* ptr, + size_t nbytes, + cuda_stream_view stream, + void* ctx +) except * with gil: + (ctx)((ptr), nbytes, Stream._from_cudaStream_t(stream.value())) + + +cdef class CallbackMemoryResource(DeviceMemoryResource): + """ + A memory resource that uses the user-provided callables to do + memory allocation and deallocation. + + ``CallbackMemoryResource`` should really only be used for + debugging memory issues, as there is a significant performance + penalty associated with using a Python function for each memory + allocation and deallocation. + + Parameters + ---------- + allocate_func: callable + The allocation function must accept two arguments. An integer + representing the number of bytes to allocate and a Stream on + which to perform the allocation, and return an integer + representing the pointer to the allocated memory. + deallocate_func: callable + The deallocation function must accept three arguments. an integer + representing the pointer to the memory to free, a second + integer representing the number of bytes to free, and a Stream + on which to perform the deallocation. + + Examples + -------- + >>> import rmm + >>> base_mr = rmm.mr.CudaMemoryResource() + >>> def allocate_func(size, stream): + ... print(f"Allocating {size} bytes") + ... return base_mr.allocate(size, stream) + ... + >>> def deallocate_func(ptr, size, stream): + ... print(f"Deallocating {size} bytes") + ... return base_mr.deallocate(ptr, size, stream) + ... + >>> rmm.mr.set_current_device_resource( + rmm.mr.CallbackMemoryResource(allocate_func, deallocate_func) + ) + >>> dbuf = rmm.DeviceBuffer(size=256) + Allocating 256 bytes + >>> del dbuf + Deallocating 256 bytes + """ + def __init__( + self, + allocate_func, + deallocate_func, + ): + self._allocate_func = allocate_func + self._deallocate_func = deallocate_func + self.c_obj.reset( + new callback_memory_resource( + (_allocate_callback_wrapper), + (_deallocate_callback_wrapper), + (allocate_func), + (deallocate_func) + ) + ) + + +def _append_id(filename, id): + """ + Append ".dev" onto a filename before the extension + + Example: _append_id("hello.txt", 1) returns "hello.dev1.txt" + + Parameters + ---------- + filename : string + The filename, possibly with extension + id : int + The ID to append + """ + name, ext = os.path.splitext(filename) + return f"{name}.dev{id}{ext}" + + +cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor): + + def __cinit__( + self, + DeviceMemoryResource upstream_mr, + size_t allocation_limit + ): + self.c_obj.reset( + new limiting_resource_adaptor[device_memory_resource]( + upstream_mr.get_mr(), + allocation_limit + ) + ) + + def __init__( + self, + DeviceMemoryResource upstream_mr, + size_t allocation_limit + ): + """ + Memory resource that limits the total allocation amount possible + performed by an upstream memory resource. + + Parameters + ---------- + upstream_mr : DeviceMemoryResource + The upstream memory resource. + allocation_limit : size_t + Maximum memory allowed for this allocator. + """ + pass + + def get_allocated_bytes(self) -> size_t: + """ + Query the number of bytes that have been allocated. Note that this can + not be used to know how large of an allocation is possible due to both + possible fragmentation and also internal page sizes and alignment that + is not tracked by this allocator. + """ + return (( + self.c_obj.get()) + )[0].get_allocated_bytes() + + def get_allocation_limit(self) -> size_t: + """ + Query the maximum number of bytes that this allocator is allowed to + allocate. This is the limit on the allocator and not a representation + of the underlying device. The device may not be able to support this + limit. + """ + return (( + self.c_obj.get()) + )[0].get_allocation_limit() + + +cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor): + def __cinit__( + self, + DeviceMemoryResource upstream_mr, + object log_file_name=None + ): + if log_file_name is None: + log_file_name = os.getenv("RMM_LOG_FILE") + if not log_file_name: + raise ValueError( + "RMM log file must be specified either using " + "log_file_name= argument or RMM_LOG_FILE " + "environment variable" + ) + + # Append the device ID before the file extension + log_file_name = _append_id( + log_file_name, getDevice() + ) + log_file_name = os.path.abspath(log_file_name) + self._log_file_name = log_file_name + + self.c_obj.reset( + new logging_resource_adaptor[device_memory_resource]( + upstream_mr.get_mr(), + log_file_name.encode() + ) + ) + + def __init__( + self, + DeviceMemoryResource upstream_mr, + object log_file_name=None + ): + """ + Memory resource that logs information about allocations/deallocations + performed by an upstream memory resource. + + Parameters + ---------- + upstream : DeviceMemoryResource + The upstream memory resource. + log_file_name : str + Path to the file to which logs are written. + """ + pass + + cpdef flush(self): + (( + self.get_mr()))[0].flush() + + cpdef get_file_name(self): + return self._log_file_name + + def __dealloc__(self): + self.c_obj.reset() + +cdef class StatisticsResourceAdaptor(UpstreamResourceAdaptor): + + def __cinit__( + self, + DeviceMemoryResource upstream_mr + ): + self.c_obj.reset( + new statistics_resource_adaptor[device_memory_resource]( + upstream_mr.get_mr() + ) + ) + + def __init__( + self, + DeviceMemoryResource upstream_mr + ): + """ + Memory resource that tracks the current, peak and total + allocations/deallocations performed by an upstream memory resource. + Includes the ability to query these statistics at any time. + + A stack of counters is maintained. Use :meth:`push_counters` and + :meth:`pop_counters` to track statistics at different nesting levels. + + Parameters + ---------- + upstream : DeviceMemoryResource + The upstream memory resource. + """ + pass + + @property + def allocation_counts(self) -> Statistics: + """ + Gets the current, peak, and total allocated bytes and number of + allocations. + + The dictionary keys are ``current_bytes``, ``current_count``, + ``peak_bytes``, ``peak_count``, ``total_bytes``, and ``total_count``. + + Returns: + dict: Dictionary containing allocation counts and bytes. + """ + cdef statistics_resource_adaptor[device_memory_resource]* mr = \ + self.c_obj.get() + + counts = deref(mr).get_allocations_counter() + byte_counts = deref(mr).get_bytes_counter() + return Statistics( + current_bytes=byte_counts.value, + current_count=counts.value, + peak_bytes=byte_counts.peak, + peak_count=counts.peak, + total_bytes=byte_counts.total, + total_count=counts.total, + ) + + def pop_counters(self) -> Statistics: + """ + Pop a counter pair (bytes and allocations) from the stack + + Returns + ------- + The popped statistics + """ + cdef statistics_resource_adaptor[device_memory_resource]* mr = \ + self.c_obj.get() + + bytes_and_allocs = deref(mr).pop_counters() + return Statistics( + current_bytes=bytes_and_allocs.first.value, + current_count=bytes_and_allocs.second.value, + peak_bytes=bytes_and_allocs.first.peak, + peak_count=bytes_and_allocs.second.peak, + total_bytes=bytes_and_allocs.first.total, + total_count=bytes_and_allocs.second.total, + ) + + def push_counters(self) -> Statistics: + """ + Push a new counter pair (bytes and allocations) on the stack + + Returns + ------- + The statistics _before_ the push + """ + + cdef statistics_resource_adaptor[device_memory_resource]* mr = \ + self.c_obj.get() + + bytes_and_allocs = deref(mr).push_counters() + return Statistics( + current_bytes=bytes_and_allocs.first.value, + current_count=bytes_and_allocs.second.value, + peak_bytes=bytes_and_allocs.first.peak, + peak_count=bytes_and_allocs.second.peak, + total_bytes=bytes_and_allocs.first.total, + total_count=bytes_and_allocs.second.total, + ) + +cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor): + + def __cinit__( + self, + DeviceMemoryResource upstream_mr, + bool capture_stacks=False + ): + self.c_obj.reset( + new tracking_resource_adaptor[device_memory_resource]( + upstream_mr.get_mr(), + capture_stacks + ) + ) + + def __init__( + self, + DeviceMemoryResource upstream_mr, + bool capture_stacks=False + ): + """ + Memory resource that logs tracks allocations/deallocations + performed by an upstream memory resource. Includes the ability to + query all outstanding allocations with the stack trace, if desired. + + Parameters + ---------- + upstream : DeviceMemoryResource + The upstream memory resource. + capture_stacks : bool + Whether or not to capture the stack trace with each allocation. + """ + pass + + def get_allocated_bytes(self) -> size_t: + """ + Query the number of bytes that have been allocated. Note that this can + not be used to know how large of an allocation is possible due to both + possible fragmentation and also internal page sizes and alignment that + is not tracked by this allocator. + """ + return (( + self.c_obj.get()) + )[0].get_allocated_bytes() + + def get_outstanding_allocations_str(self) -> str: + """ + Returns a string containing information about the current outstanding + allocations. For each allocation, the address, size and optional + stack trace are shown. + """ + + return (( + self.c_obj.get()) + )[0].get_outstanding_allocations_str().decode('UTF-8') + + def log_outstanding_allocations(self): + """ + Logs the output of `get_outstanding_allocations_str` to the current + RMM log file if enabled. + """ + + (( + self.c_obj.get()))[0].log_outstanding_allocations() + + +# Note that this function is specifically designed to rethrow Python exceptions +# as C++ exceptions when called as a callback from C++, so it is noexcept from +# Cython's perspective. +cdef bool _oom_callback_function(size_t bytes, void *callback_arg) noexcept nogil: + cdef CppExcept err + with gil: + try: + return (callback_arg)(bytes) + except BaseException as e: + err = translate_python_except_to_cpp(e) + throw_cpp_except(err) + + +cdef class FailureCallbackResourceAdaptor(UpstreamResourceAdaptor): + + def __cinit__( + self, + DeviceMemoryResource upstream_mr, + object callback, + ): + self._callback = callback + self.c_obj.reset( + new failure_callback_resource_adaptor[device_memory_resource]( + upstream_mr.get_mr(), + _oom_callback_function, + callback + ) + ) + + def __init__( + self, + DeviceMemoryResource upstream_mr, + object callback, + ): + """ + Memory resource that call callback when memory allocation fails. + + Parameters + ---------- + upstream : DeviceMemoryResource + The upstream memory resource. + callback : callable + Function called when memory allocation fails. + """ + pass + +cdef class PrefetchResourceAdaptor(UpstreamResourceAdaptor): + + def __cinit__( + self, + DeviceMemoryResource upstream_mr + ): + self.c_obj.reset( + new prefetch_resource_adaptor[device_memory_resource]( + upstream_mr.get_mr() + ) + ) + + def __init__( + self, + DeviceMemoryResource upstream_mr + ): + """ + Memory resource that prefetches all allocations. + + Parameters + ---------- + upstream : DeviceMemoryResource + The upstream memory resource. + """ + pass + + +# Global per-device memory resources; dict of int:DeviceMemoryResource +cdef _per_device_mrs = defaultdict(CudaMemoryResource) + + +cpdef void _initialize( + bool pool_allocator=False, + bool managed_memory=False, + object initial_pool_size=None, + object maximum_pool_size=None, + object devices=0, + bool logging=False, + object log_file_name=None, +) except *: + """ + Initializes RMM library using the options passed + """ + if managed_memory: + upstream = ManagedMemoryResource + else: + upstream = CudaMemoryResource + + if pool_allocator: + typ = PoolMemoryResource + args = (upstream(),) + kwargs = dict( + initial_pool_size=None if initial_pool_size is None + else parse_bytes(initial_pool_size), + maximum_pool_size=None if maximum_pool_size is None + else parse_bytes(maximum_pool_size) + ) + else: + typ = upstream + args = () + kwargs = {} + + cdef DeviceMemoryResource mr + cdef int original_device + + # Save the current device so we can reset it + try: + original_device = getDevice() + except CUDARuntimeError as e: + if e.status == cudaError_t.cudaErrorNoDevice: + warnings.warn(e.msg) + else: + raise e + else: + # reset any previously specified per device resources + global _per_device_mrs + _per_device_mrs.clear() + + if devices is None: + devices = [0] + elif isinstance(devices, int): + devices = [devices] + + # create a memory resource per specified device + for device in devices: + setDevice(device) + + if logging: + mr = LoggingResourceAdaptor( + typ(*args, **kwargs), + log_file_name + ) + else: + mr = typ(*args, **kwargs) + + set_per_device_resource(device, mr) + + # reset CUDA device to original + setDevice(original_device) + + +cpdef get_per_device_resource(int device): + """ + Get the default memory resource for the specified device. + + If the returned memory resource is used when a different device is the + active CUDA device, behavior is undefined. + + Parameters + ---------- + device : int + The ID of the device for which to get the memory resource. + """ + global _per_device_mrs + return _per_device_mrs[device] + + +cpdef set_per_device_resource(int device, DeviceMemoryResource mr): + """ + Set the default memory resource for the specified device. + + Parameters + ---------- + device : int + The ID of the device for which to get the memory resource. + mr : DeviceMemoryResource + The memory resource to set. Must have been created while device was + the active CUDA device. + """ + global _per_device_mrs + _per_device_mrs[device] = mr + + # Since cuda_device_id does not have a default constructor, it must be heap + # allocated + cdef unique_ptr[cuda_device_id] device_id = \ + make_unique[cuda_device_id](device) + + cpp_set_per_device_resource(deref(device_id), mr.get_mr()) + + +cpdef set_current_device_resource(DeviceMemoryResource mr): + """ + Set the default memory resource for the current device. + + Parameters + ---------- + mr : DeviceMemoryResource + The memory resource to set. Must have been created while the current + device is the active CUDA device. + """ + set_per_device_resource(getDevice(), mr) + + +cpdef get_per_device_resource_type(int device): + """ + Get the memory resource type used for RMM device allocations on the + specified device. + + Parameters + ---------- + device : int + The device ID + """ + return type(get_per_device_resource(device)) + + +cpdef DeviceMemoryResource get_current_device_resource(): + """ + Get the memory resource used for RMM device allocations on the current + device. + + If the returned memory resource is used when a different device is the + active CUDA device, behavior is undefined. + """ + return get_per_device_resource(getDevice()) + + +cpdef get_current_device_resource_type(): + """ + Get the memory resource type used for RMM device allocations on the + current device. + """ + return type(get_current_device_resource()) + + +cpdef is_initialized(): + """ + Check whether RMM is initialized + """ + global _per_device_mrs + cdef DeviceMemoryResource each_mr + return all( + [each_mr.get_mr() is not NULL + for each_mr in _per_device_mrs.values()] + ) + + +cpdef _flush_logs(): + """ + Flush the logs of all currently initialized LoggingResourceAdaptor + memory resources + """ + global _per_device_mrs + cdef DeviceMemoryResource each_mr + for each_mr in _per_device_mrs.values(): + if isinstance(each_mr, LoggingResourceAdaptor): + each_mr.flush() + + +def enable_logging(log_file_name=None): + """ + Enable logging of run-time events for all devices. + + Parameters + ---------- + log_file_name: str, optional + Name of the log file. If not specified, the environment variable + RMM_LOG_FILE is used. A ValueError is thrown if neither is available. + A separate log file is produced for each device, + and the suffix `".dev{id}"` is automatically added to the log file + name. + + Notes + ----- + Note that if you use the environment variable CUDA_VISIBLE_DEVICES + with logging enabled, the suffix may not be what you expect. For + example, if you set CUDA_VISIBLE_DEVICES=1, the log file produced + will still have suffix `0`. Similarly, if you set + CUDA_VISIBLE_DEVICES=1,0 and use devices 0 and 1, the log file + with suffix `0` will correspond to the GPU with device ID `1`. + Use `rmm.get_log_filenames()` to get the log file names + corresponding to each device. + """ + global _per_device_mrs + + devices = [0] if not _per_device_mrs.keys() else _per_device_mrs.keys() + + for device in devices: + each_mr = _per_device_mrs[device] + if not isinstance(each_mr, LoggingResourceAdaptor): + set_per_device_resource( + device, + LoggingResourceAdaptor(each_mr, log_file_name) + ) + + +def disable_logging(): + """ + Disable logging if it was enabled previously using + `rmm.initialize()` or `rmm.enable_logging()`. + """ + global _per_device_mrs + for i, each_mr in _per_device_mrs.items(): + if isinstance(each_mr, LoggingResourceAdaptor): + set_per_device_resource(i, each_mr.get_upstream()) + + +def get_log_filenames(): + """ + Returns the log filename (or `None` if not writing logs) + for each device in use. + + Examples + -------- + >>> import rmm + >>> rmm.reinitialize(devices=[0, 1], logging=True, log_file_name="rmm.log") + >>> rmm.get_log_filenames() + {0: '/home/user/workspace/rapids/rmm/python/rmm.dev0.log', + 1: '/home/user/workspace/rapids/rmm/python/rmm.dev1.log'} + """ + global _per_device_mrs + + return { + i: each_mr.get_file_name() + if isinstance(each_mr, LoggingResourceAdaptor) + else None + for i, each_mr in _per_device_mrs.items() + } + + +def available_device_memory(): + """ + Returns a tuple of free and total device memory memory. + """ + cdef pair[size_t, size_t] res + res = c_available_device_memory() + return (res.first, res.second) From 3ed73225f9d95b340949b06f5e1c326c715e77e3 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 10 Sep 2024 21:27:39 -0700 Subject: [PATCH 02/13] seperate out the rest of the cython --- python/rmm/rmm/__init__.py | 10 ++--- python/rmm/rmm/_cuda/stream.pxd | 4 +- python/rmm/rmm/_cuda/stream.pyx | 4 +- python/rmm/rmm/_lib/__init__.py | 4 +- .../rmm/rmm/_lib/tests/test_device_buffer.pyx | 7 ++-- python/rmm/rmm/allocators/cupy.py | 4 +- python/rmm/rmm/allocators/numba.py | 4 +- python/rmm/rmm/allocators/torch.py | 4 +- python/rmm/rmm/cpp/CMakeLists.txt | 2 +- .../rmm/{python/lib.pyx => cpp/__init__.py} | 0 .../rmm/cpp/{cpp_logger.pyx => _logger.pxd} | 0 python/rmm/rmm/cpp/_logger.pyx | 15 ++++++++ python/rmm/rmm/cpp/cpp_memory_resource.pxd | 34 ----------------- .../{cpp_cuda_stream.pxd => cuda_stream.pxd} | 2 +- ...a_stream_pool.pxd => cuda_stream_pool.pxd} | 2 +- ...a_stream_view.pxd => cuda_stream_view.pxd} | 0 ...pp_device_buffer.pxd => device_buffer.pxd} | 4 +- ..._device_uvector.pxd => device_uvector.pxd} | 0 python/rmm/rmm/{python => cpp}/lib.pxd | 2 +- python/rmm/rmm/cpp/lib.pyx | 13 +++++++ ...emory_resource.pyx => memory_resource.pxd} | 23 +++++++++-- ...e_resource.pxd => per_device_resource.pxd} | 2 +- python/rmm/rmm/mr.py | 2 +- python/rmm/rmm/python/CMakeLists.txt | 3 +- python/rmm/rmm/python/cuda_stream.pxd | 27 +++++++++++++ python/rmm/rmm/python/cuda_stream.pyx | 2 +- python/rmm/rmm/python/device_buffer.pxd | 2 +- python/rmm/rmm/python/device_buffer.pyx | 14 +++---- python/rmm/rmm/python/logger.pyx | 5 ++- python/rmm/rmm/python/memory_resource.pxd | 2 +- python/rmm/rmm/python/memory_resource.pyx | 12 +++--- python/rmm/rmm/python/tests/__init__.py | 0 .../rmm/python/tests/test_device_buffer.pyx | 38 +++++++++++++++++++ python/rmm/rmm/tests/test_cython.py | 4 +- python/rmm/rmm/tests/test_rmm.py | 2 +- 35 files changed, 165 insertions(+), 88 deletions(-) rename python/rmm/rmm/{python/lib.pyx => cpp/__init__.py} (100%) rename python/rmm/rmm/cpp/{cpp_logger.pyx => _logger.pxd} (100%) create mode 100644 python/rmm/rmm/cpp/_logger.pyx delete mode 100644 python/rmm/rmm/cpp/cpp_memory_resource.pxd rename python/rmm/rmm/cpp/{cpp_cuda_stream.pxd => cuda_stream.pxd} (94%) rename python/rmm/rmm/cpp/{cpp_cuda_stream_pool.pxd => cuda_stream_pool.pxd} (93%) rename python/rmm/rmm/cpp/{cpp_cuda_stream_view.pxd => cuda_stream_view.pxd} (100%) rename python/rmm/rmm/cpp/{cpp_device_buffer.pxd => device_buffer.pxd} (94%) rename python/rmm/rmm/cpp/{cpp_device_uvector.pxd => device_uvector.pxd} (100%) rename python/rmm/rmm/{python => cpp}/lib.pxd (93%) create mode 100644 python/rmm/rmm/cpp/lib.pyx rename python/rmm/rmm/cpp/{cpp_memory_resource.pyx => memory_resource.pxd} (90%) rename python/rmm/rmm/cpp/{cpp_per_device_resource.pxd => per_device_resource.pxd} (95%) create mode 100644 python/rmm/rmm/python/cuda_stream.pxd create mode 100644 python/rmm/rmm/python/tests/__init__.py create mode 100644 python/rmm/rmm/python/tests/test_device_buffer.pyx diff --git a/python/rmm/rmm/__init__.py b/python/rmm/rmm/__init__.py index 1e3b5c8b1..61c5e4561 100644 --- a/python/rmm/rmm/__init__.py +++ b/python/rmm/rmm/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,8 +13,10 @@ # limitations under the License. from rmm import mr -from rmm._lib.device_buffer import DeviceBuffer -from rmm._lib.logger import ( +from rmm._version import __git_commit__, __version__ +from rmm.mr import disable_logging, enable_logging, get_log_filenames +from rmm.python.device_buffer import DeviceBuffer +from rmm.python.logger import ( flush_logger, get_flush_level, get_logging_level, @@ -23,8 +25,6 @@ set_logging_level, should_log, ) -from rmm._version import __git_commit__, __version__ -from rmm.mr import disable_logging, enable_logging, get_log_filenames from rmm.rmm import ( RMMError, is_initialized, diff --git a/python/rmm/rmm/_cuda/stream.pxd b/python/rmm/rmm/_cuda/stream.pxd index 3c3d3aa6f..c978074fc 100644 --- a/python/rmm/rmm/_cuda/stream.pxd +++ b/python/rmm/rmm/_cuda/stream.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ from cuda.ccudart cimport cudaStream_t from libc.stdint cimport uintptr_t from libcpp cimport bool -from rmm._lib.cuda_stream_view cimport cuda_stream_view +from rmm.cpp.cuda_stream_view cimport cuda_stream_view cdef class Stream: diff --git a/python/rmm/rmm/_cuda/stream.pyx b/python/rmm/rmm/_cuda/stream.pyx index 4d5ff5232..3c6c10c18 100644 --- a/python/rmm/rmm/_cuda/stream.pyx +++ b/python/rmm/rmm/_cuda/stream.pyx @@ -16,13 +16,13 @@ from cuda.ccudart cimport cudaStream_t from libc.stdint cimport uintptr_t from libcpp cimport bool -from rmm._lib.cuda_stream cimport CudaStream -from rmm._lib.cuda_stream_view cimport ( +from rmm.cpp.cuda_stream_view cimport ( cuda_stream_default, cuda_stream_legacy, cuda_stream_per_thread, cuda_stream_view, ) +from rmm.python.cuda_stream cimport CudaStream cdef class Stream: diff --git a/python/rmm/rmm/_lib/__init__.py b/python/rmm/rmm/_lib/__init__.py index 0b8672ef6..f9462af65 100644 --- a/python/rmm/rmm/_lib/__init__.py +++ b/python/rmm/rmm/_lib/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .device_buffer import DeviceBuffer +# from .device_buffer import DeviceBuffer diff --git a/python/rmm/rmm/_lib/tests/test_device_buffer.pyx b/python/rmm/rmm/_lib/tests/test_device_buffer.pyx index 733383827..0783dd9c6 100644 --- a/python/rmm/rmm/_lib/tests/test_device_buffer.pyx +++ b/python/rmm/rmm/_lib/tests/test_device_buffer.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,8 +16,9 @@ import numpy as np from libcpp.memory cimport make_unique -from rmm._lib.cuda_stream_view cimport cuda_stream_default -from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer +from rmm.cpp.cuda_stream_view cimport cuda_stream_default +from rmm.cpp.device_buffer cimport device_buffer +from rmm.python.device_buffer cimport DeviceBuffer def test_release(): diff --git a/python/rmm/rmm/allocators/cupy.py b/python/rmm/rmm/allocators/cupy.py index 89947c46b..8cd41b3b3 100644 --- a/python/rmm/rmm/allocators/cupy.py +++ b/python/rmm/rmm/allocators/cupy.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from rmm import _lib as librmm +from rmm import python as librmm from rmm._cuda.stream import Stream try: diff --git a/python/rmm/rmm/allocators/numba.py b/python/rmm/rmm/allocators/numba.py index 5e87b87b6..9838b8b59 100644 --- a/python/rmm/rmm/allocators/numba.py +++ b/python/rmm/rmm/allocators/numba.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ from numba import config, cuda from numba.cuda import HostOnlyCUDAMemoryManager, IpcHandle, MemoryPointer -from rmm import _lib as librmm +from rmm import python as librmm def _make_emm_plugin_finalizer(handle, allocations): diff --git a/python/rmm/rmm/allocators/torch.py b/python/rmm/rmm/allocators/torch.py index 753da66da..8df921ad8 100644 --- a/python/rmm/rmm/allocators/torch.py +++ b/python/rmm/rmm/allocators/torch.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ # is pure Python and will therefore be in the source directory. # Instead, we search relative to an arbitrary file in the compiled # package. We use the _lib.lib module because it is small. - from rmm._lib import lib + from rmm.cpp import lib sofile = pathlib.Path(lib.__file__).parent / "_torch_allocator.so" rmm_torch_allocator = CUDAPluggableAllocator( diff --git a/python/rmm/rmm/cpp/CMakeLists.txt b/python/rmm/rmm/cpp/CMakeLists.txt index 8ac924df4..a201187b8 100644 --- a/python/rmm/rmm/cpp/CMakeLists.txt +++ b/python/rmm/rmm/cpp/CMakeLists.txt @@ -12,7 +12,7 @@ # the License. # ============================================================================= -set(cython_sources cpp_logger.pyx cpp_memory_resource.pyx) +set(cython_sources lib.pyx _logger.pyx) set(linked_libraries rmm::rmm) # Build all of the Cython targets diff --git a/python/rmm/rmm/python/lib.pyx b/python/rmm/rmm/cpp/__init__.py similarity index 100% rename from python/rmm/rmm/python/lib.pyx rename to python/rmm/rmm/cpp/__init__.py diff --git a/python/rmm/rmm/cpp/cpp_logger.pyx b/python/rmm/rmm/cpp/_logger.pxd similarity index 100% rename from python/rmm/rmm/cpp/cpp_logger.pyx rename to python/rmm/rmm/cpp/_logger.pxd diff --git a/python/rmm/rmm/cpp/_logger.pyx b/python/rmm/rmm/cpp/_logger.pyx new file mode 100644 index 000000000..e848c6dda --- /dev/null +++ b/python/rmm/rmm/cpp/_logger.pyx @@ -0,0 +1,15 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._logger cimport logging_level # no-cython-lint diff --git a/python/rmm/rmm/cpp/cpp_memory_resource.pxd b/python/rmm/rmm/cpp/cpp_memory_resource.pxd deleted file mode 100644 index cef5d4737..000000000 --- a/python/rmm/rmm/cpp/cpp_memory_resource.pxd +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from libcpp.pair cimport pair - -from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view - - -cdef extern from "rmm/mr/device/device_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass device_memory_resource: - void* allocate(size_t bytes) except + - void* allocate(size_t bytes, cuda_stream_view stream) except + - void deallocate(void* ptr, size_t bytes) except + - void deallocate( - void* ptr, - size_t bytes, - cuda_stream_view stream - ) except + - -cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil: - size_t percent_of_free_device_memory(int percent) except + - pair[size_t, size_t] available_device_memory() except + diff --git a/python/rmm/rmm/cpp/cpp_cuda_stream.pxd b/python/rmm/rmm/cpp/cuda_stream.pxd similarity index 94% rename from python/rmm/rmm/cpp/cpp_cuda_stream.pxd rename to python/rmm/rmm/cpp/cuda_stream.pxd index 16b66ee2b..fcd84198c 100644 --- a/python/rmm/rmm/cpp/cpp_cuda_stream.pxd +++ b/python/rmm/rmm/cpp/cuda_stream.pxd @@ -15,7 +15,7 @@ from cuda.ccudart cimport cudaStream_t from libcpp cimport bool -from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view +from rmm.cpp.cuda_stream_view cimport cuda_stream_view cdef extern from "rmm/cuda_stream.hpp" namespace "rmm" nogil: diff --git a/python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd b/python/rmm/rmm/cpp/cuda_stream_pool.pxd similarity index 93% rename from python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd rename to python/rmm/rmm/cpp/cuda_stream_pool.pxd index 553b38514..f72cfd7f6 100644 --- a/python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd +++ b/python/rmm/rmm/cpp/cuda_stream_pool.pxd @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view +from rmm.cpp.cuda_stream_view cimport cuda_stream_view cdef extern from "rmm/cuda_stream_pool.hpp" namespace "rmm" nogil: diff --git a/python/rmm/rmm/cpp/cpp_cuda_stream_view.pxd b/python/rmm/rmm/cpp/cuda_stream_view.pxd similarity index 100% rename from python/rmm/rmm/cpp/cpp_cuda_stream_view.pxd rename to python/rmm/rmm/cpp/cuda_stream_view.pxd diff --git a/python/rmm/rmm/cpp/cpp_device_buffer.pxd b/python/rmm/rmm/cpp/device_buffer.pxd similarity index 94% rename from python/rmm/rmm/cpp/cpp_device_buffer.pxd rename to python/rmm/rmm/cpp/device_buffer.pxd index 1aa7634cf..a3801cf05 100644 --- a/python/rmm/rmm/cpp/cpp_device_buffer.pxd +++ b/python/rmm/rmm/cpp/device_buffer.pxd @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view -from rmm.cpp.cpp_memory_resource cimport device_memory_resource +from rmm.cpp.cuda_stream_view cimport cuda_stream_view +from rmm.cpp.memory_resource cimport device_memory_resource cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil: diff --git a/python/rmm/rmm/cpp/cpp_device_uvector.pxd b/python/rmm/rmm/cpp/device_uvector.pxd similarity index 100% rename from python/rmm/rmm/cpp/cpp_device_uvector.pxd rename to python/rmm/rmm/cpp/device_uvector.pxd diff --git a/python/rmm/rmm/python/lib.pxd b/python/rmm/rmm/cpp/lib.pxd similarity index 93% rename from python/rmm/rmm/python/lib.pxd rename to python/rmm/rmm/cpp/lib.pxd index b61e0d569..592574862 100644 --- a/python/rmm/rmm/python/lib.pxd +++ b/python/rmm/rmm/cpp/lib.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/rmm/rmm/cpp/lib.pyx b/python/rmm/rmm/cpp/lib.pyx new file mode 100644 index 000000000..46753baa3 --- /dev/null +++ b/python/rmm/rmm/cpp/lib.pyx @@ -0,0 +1,13 @@ +# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/rmm/rmm/cpp/cpp_memory_resource.pyx b/python/rmm/rmm/cpp/memory_resource.pxd similarity index 90% rename from python/rmm/rmm/cpp/cpp_memory_resource.pyx rename to python/rmm/rmm/cpp/memory_resource.pxd index 50e201517..5e457b653 100644 --- a/python/rmm/rmm/cpp/cpp_memory_resource.pyx +++ b/python/rmm/rmm/cpp/memory_resource.pxd @@ -23,13 +23,30 @@ from libcpp.optional cimport optional from libcpp.pair cimport pair from libcpp.string cimport string -from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view -from rmm.cpp.cpp_memory_resource cimport device_memory_resource +from rmm.cpp.cuda_stream_view cimport cuda_stream_view +from rmm.cpp.memory_resource cimport device_memory_resource + + +cdef extern from "rmm/mr/device/device_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass device_memory_resource: + void* allocate(size_t bytes) except + + void* allocate(size_t bytes, cuda_stream_view stream) except + + void deallocate(void* ptr, size_t bytes) except + + void deallocate( + void* ptr, + size_t bytes, + cuda_stream_view stream + ) except + + +cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil: + size_t percent_of_free_device_memory(int percent) except + + pair[size_t, size_t] available_device_memory() except + # Transparent handle of a C++ exception ctypedef pair[int, string] CppExcept -cdef CppExcept translate_python_except_to_cpp(err: BaseException) noexcept: +cdef inline CppExcept translate_python_except_to_cpp(err: BaseException) noexcept: """Translate a Python exception into a C++ exception handle The returned exception handle can then be thrown by `throw_cpp_except()`, diff --git a/python/rmm/rmm/cpp/cpp_per_device_resource.pxd b/python/rmm/rmm/cpp/per_device_resource.pxd similarity index 95% rename from python/rmm/rmm/cpp/cpp_per_device_resource.pxd rename to python/rmm/rmm/cpp/per_device_resource.pxd index 59f651687..ec43d5f99 100644 --- a/python/rmm/rmm/cpp/cpp_per_device_resource.pxd +++ b/python/rmm/rmm/cpp/per_device_resource.pxd @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from rmm.cpp.cpp_memory_resource cimport device_memory_resource +from rmm.cpp.memory_resource cimport device_memory_resource cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil: diff --git a/python/rmm/rmm/mr.py b/python/rmm/rmm/mr.py index 6eb94da0f..f820cf805 100644 --- a/python/rmm/rmm/mr.py +++ b/python/rmm/rmm/mr.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from rmm._lib.memory_resource import ( +from rmm.python.memory_resource import ( BinningMemoryResource, CallbackMemoryResource, CudaAsyncMemoryResource, diff --git a/python/rmm/rmm/python/CMakeLists.txt b/python/rmm/rmm/python/CMakeLists.txt index 558c0ce9a..0e88f01bb 100644 --- a/python/rmm/rmm/python/CMakeLists.txt +++ b/python/rmm/rmm/python/CMakeLists.txt @@ -12,8 +12,7 @@ # the License. # ============================================================================= -set(cython_sources device_buffer.pyx lib.pyx logger.pyx memory_resource.pyx cuda_stream.pyx - helper.pyx) +set(cython_sources device_buffer.pyx logger.pyx memory_resource.pyx cuda_stream.pyx helper.pyx) set(linked_libraries rmm::rmm) # Build all of the Cython targets diff --git a/python/rmm/rmm/python/cuda_stream.pxd b/python/rmm/rmm/python/cuda_stream.pxd new file mode 100644 index 000000000..169e560a4 --- /dev/null +++ b/python/rmm/rmm/python/cuda_stream.pxd @@ -0,0 +1,27 @@ +# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cimport cython +from cuda.ccudart cimport cudaStream_t +from libcpp cimport bool +from libcpp.memory cimport unique_ptr + +from rmm.cpp.cuda_stream cimport cuda_stream + + +@cython.final +cdef class CudaStream: + cdef unique_ptr[cuda_stream] c_obj + cdef cudaStream_t value(self) except * nogil + cdef bool is_valid(self) except * nogil diff --git a/python/rmm/rmm/python/cuda_stream.pyx b/python/rmm/rmm/python/cuda_stream.pyx index f1529111d..de02291d4 100644 --- a/python/rmm/rmm/python/cuda_stream.pyx +++ b/python/rmm/rmm/python/cuda_stream.pyx @@ -16,7 +16,7 @@ cimport cython from cuda.ccudart cimport cudaStream_t from libcpp cimport bool -from rmm.cpp.cpp_cuda_stream cimport cuda_stream +from rmm.cpp.cuda_stream cimport cuda_stream @cython.final diff --git a/python/rmm/rmm/python/device_buffer.pxd b/python/rmm/rmm/python/device_buffer.pxd index d8b56d2fb..2d4d932af 100644 --- a/python/rmm/rmm/python/device_buffer.pxd +++ b/python/rmm/rmm/python/device_buffer.pxd @@ -16,7 +16,7 @@ from libc.stdint cimport uintptr_t from libcpp.memory cimport unique_ptr from rmm._cuda.stream cimport Stream -from rmm.cpp.cpp_device_buffer cimport device_buffer +from rmm.cpp.device_buffer cimport device_buffer from rmm.python.memory_resource cimport DeviceMemoryResource diff --git a/python/rmm/rmm/python/device_buffer.pyx b/python/rmm/rmm/python/device_buffer.pyx index 1d564ff33..73b0d32ec 100644 --- a/python/rmm/rmm/python/device_buffer.pyx +++ b/python/rmm/rmm/python/device_buffer.pyx @@ -32,16 +32,16 @@ from cuda.ccudart cimport ( cudaStream_t, ) -from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view -from rmm.cpp.cpp_device_buffer cimport ( +from rmm.cpp.cuda_stream_view cimport cuda_stream_view +from rmm.cpp.device_buffer cimport ( cuda_device_id, device_buffer, get_current_cuda_device, prefetch, ) +from rmm.cpp.memory_resource cimport device_memory_resource from rmm.python.memory_resource cimport ( DeviceMemoryResource, - device_memory_resource, get_current_device_resource, ) @@ -401,7 +401,7 @@ cpdef DeviceBuffer to_device(const unsigned char[::1] b, Examples -------- >>> import rmm - >>> db = rmm._lib.device_buffer.to_device(b"abc") + >>> db = rmm.python.device_buffer.to_device(b"abc") >>> print(bytes(db)) b'abc' """ @@ -467,7 +467,7 @@ cpdef void copy_ptr_to_host(uintptr_t db, >>> import rmm >>> db = rmm.DeviceBuffer.to_device(b"abc") >>> hb = bytearray(db.nbytes) - >>> rmm._lib.device_buffer.copy_ptr_to_host(db.ptr, hb) + >>> rmm.python.device_buffer.copy_ptr_to_host(db.ptr, hb) >>> print(hb) bytearray(b'abc') """ @@ -509,7 +509,7 @@ cpdef void copy_host_to_ptr(const unsigned char[::1] hb, >>> import rmm >>> db = rmm.DeviceBuffer(size=10) >>> hb = b"abc" - >>> rmm._lib.device_buffer.copy_host_to_ptr(hb, db.ptr) + >>> rmm.python.device_buffer.copy_host_to_ptr(hb, db.ptr) >>> hb = db.copy_to_host() >>> print(hb) array([97, 98, 99, 0, 0, 0, 0, 0, 0, 0], dtype=uint8) @@ -548,7 +548,7 @@ cpdef void copy_device_to_ptr(uintptr_t d_src, >>> import rmm >>> db = rmm.DeviceBuffer(size=5) >>> db2 = rmm.DeviceBuffer.to_device(b"abc") - >>> rmm._lib.device_buffer.copy_device_to_ptr(db2.ptr, db.ptr, db2.size) + >>> rmm.python.device_buffer.copy_device_to_ptr(db2.ptr, db.ptr, db2.size) >>> hb = db.copy_to_host() >>> hb array([97, 98, 99, 0, 0], dtype=uint8) diff --git a/python/rmm/rmm/python/logger.pyx b/python/rmm/rmm/python/logger.pyx index 2997ff633..15ba47308 100644 --- a/python/rmm/rmm/python/logger.pyx +++ b/python/rmm/rmm/python/logger.pyx @@ -13,7 +13,10 @@ # limitations under the License. import warnings -from rmm.cpp.cpp_logging cimport logging_level, logger + +from rmm.cpp._logger cimport logger + +from rmm.cpp._logger import logging_level def _validate_level_type(level): diff --git a/python/rmm/rmm/python/memory_resource.pxd b/python/rmm/rmm/python/memory_resource.pxd index 0adda4eaf..4970d5230 100644 --- a/python/rmm/rmm/python/memory_resource.pxd +++ b/python/rmm/rmm/python/memory_resource.pxd @@ -14,7 +14,7 @@ from libcpp.memory cimport shared_ptr -from rmm.cpp.cpp_memory_resource cimport device_memory_resource +from rmm.cpp.memory_resource cimport device_memory_resource cdef class DeviceMemoryResource: diff --git a/python/rmm/rmm/python/memory_resource.pyx b/python/rmm/rmm/python/memory_resource.pyx index 46613667c..14585f36b 100644 --- a/python/rmm/rmm/python/memory_resource.pyx +++ b/python/rmm/rmm/python/memory_resource.pyx @@ -36,23 +36,20 @@ from rmm._cuda.stream cimport Stream from rmm._cuda.stream import DEFAULT_STREAM -from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view -from rmm.cpp.cpp_per_device_resource cimport ( +from rmm.cpp.cuda_stream_view cimport cuda_stream_view +from rmm.cpp.per_device_resource cimport ( cuda_device_id, set_per_device_resource as cpp_set_per_device_resource, ) from rmm.python.helper cimport parse_bytes -from rmm.python.memory_resource cimport ( - available_device_memory as c_available_device_memory, - percent_of_free_device_memory as c_percent_of_free_device_memory, -) from rmm.statistics import Statistics -from rmm.cpp.cpp_memory_resource cimport ( +from rmm.cpp.memory_resource cimport ( CppExcept, allocate_callback_t, allocation_handle_type, + available_device_memory as c_available_device_memory, binning_memory_resource, callback_memory_resource, cuda_async_memory_resource, @@ -65,6 +62,7 @@ from rmm.cpp.cpp_memory_resource cimport ( limiting_resource_adaptor, logging_resource_adaptor, managed_memory_resource, + percent_of_free_device_memory as c_percent_of_free_device_memory, pool_memory_resource, posix_file_descriptor, prefetch_resource_adaptor, diff --git a/python/rmm/rmm/python/tests/__init__.py b/python/rmm/rmm/python/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/python/rmm/rmm/python/tests/test_device_buffer.pyx b/python/rmm/rmm/python/tests/test_device_buffer.pyx new file mode 100644 index 000000000..0783dd9c6 --- /dev/null +++ b/python/rmm/rmm/python/tests/test_device_buffer.pyx @@ -0,0 +1,38 @@ +# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + +from libcpp.memory cimport make_unique + +from rmm.cpp.cuda_stream_view cimport cuda_stream_default +from rmm.cpp.device_buffer cimport device_buffer +from rmm.python.device_buffer cimport DeviceBuffer + + +def test_release(): + expect = DeviceBuffer.to_device(b'abc') + cdef DeviceBuffer buf = DeviceBuffer.to_device(b'abc') + + got = DeviceBuffer.c_from_unique_ptr( + make_unique[device_buffer](buf.c_release(), + cuda_stream_default.value()) + ) + np.testing.assert_equal(expect.copy_to_host(), got.copy_to_host()) + + +def test_size_after_release(): + cdef DeviceBuffer buf = DeviceBuffer.to_device(b'abc') + buf.c_release() + assert buf.size == 0 diff --git a/python/rmm/rmm/tests/test_cython.py b/python/rmm/rmm/tests/test_cython.py index 82eba2451..a0364942c 100644 --- a/python/rmm/rmm/tests/test_cython.py +++ b/python/rmm/rmm/tests/test_cython.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ def wrapped(*args, **kwargs): return wrapped -cython_test_modules = ["rmm._lib.tests.test_device_buffer"] +cython_test_modules = ["rmm.python.tests.test_device_buffer"] for mod in cython_test_modules: diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index c88d21b38..4d7b6c646 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -354,7 +354,7 @@ def test_rmm_pool_numba_stream(stream): rmm.reinitialize(pool_allocator=True) stream = rmm._cuda.stream.Stream(stream) - a = rmm._lib.device_buffer.DeviceBuffer(size=3, stream=stream) + a = rmm.python.device_buffer.DeviceBuffer(size=3, stream=stream) assert a.size == 3 assert a.ptr != 0 From afef5391c23512bb04f35c04cbf7bc1436f9ca1e Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 16 Sep 2024 10:10:41 -0700 Subject: [PATCH 03/13] remove _lib --- python/rmm/CMakeLists.txt | 1 - python/rmm/rmm/_lib/CMakeLists.txt | 36 - python/rmm/rmm/_lib/__init__.pxd | 13 - python/rmm/rmm/_lib/__init__.py | 15 - python/rmm/rmm/_lib/_torch_allocator.cpp | 64 - python/rmm/rmm/_lib/cuda_stream.pxd | 37 - python/rmm/rmm/_lib/cuda_stream.pyx | 34 - python/rmm/rmm/_lib/cuda_stream_pool.pxd | 25 - python/rmm/rmm/_lib/cuda_stream_view.pxd | 32 - python/rmm/rmm/_lib/device_buffer.pxd | 116 -- python/rmm/rmm/_lib/device_buffer.pyx | 552 ------- python/rmm/rmm/_lib/device_uvector.pxd | 39 - python/rmm/rmm/_lib/helper.pxd | 16 - python/rmm/rmm/_lib/helper.pyx | 78 - python/rmm/rmm/_lib/lib.pxd | 20 - python/rmm/rmm/_lib/lib.pyx | 13 - python/rmm/rmm/_lib/logger.pyx | 260 ---- python/rmm/rmm/_lib/memory_resource.pxd | 103 -- python/rmm/rmm/_lib/memory_resource.pyx | 1332 ----------------- python/rmm/rmm/_lib/per_device_resource.pxd | 23 - python/rmm/rmm/_lib/tests/__init__.py | 0 .../rmm/rmm/_lib/tests/test_device_buffer.pyx | 38 - 22 files changed, 2847 deletions(-) delete mode 100644 python/rmm/rmm/_lib/CMakeLists.txt delete mode 100644 python/rmm/rmm/_lib/__init__.pxd delete mode 100644 python/rmm/rmm/_lib/__init__.py delete mode 100644 python/rmm/rmm/_lib/_torch_allocator.cpp delete mode 100644 python/rmm/rmm/_lib/cuda_stream.pxd delete mode 100644 python/rmm/rmm/_lib/cuda_stream.pyx delete mode 100644 python/rmm/rmm/_lib/cuda_stream_pool.pxd delete mode 100644 python/rmm/rmm/_lib/cuda_stream_view.pxd delete mode 100644 python/rmm/rmm/_lib/device_buffer.pxd delete mode 100644 python/rmm/rmm/_lib/device_buffer.pyx delete mode 100644 python/rmm/rmm/_lib/device_uvector.pxd delete mode 100644 python/rmm/rmm/_lib/helper.pxd delete mode 100644 python/rmm/rmm/_lib/helper.pyx delete mode 100644 python/rmm/rmm/_lib/lib.pxd delete mode 100644 python/rmm/rmm/_lib/lib.pyx delete mode 100644 python/rmm/rmm/_lib/logger.pyx delete mode 100644 python/rmm/rmm/_lib/memory_resource.pxd delete mode 100644 python/rmm/rmm/_lib/memory_resource.pyx delete mode 100644 python/rmm/rmm/_lib/per_device_resource.pxd delete mode 100644 python/rmm/rmm/_lib/tests/__init__.py delete mode 100644 python/rmm/rmm/_lib/tests/test_device_buffer.pyx diff --git a/python/rmm/CMakeLists.txt b/python/rmm/CMakeLists.txt index 4c4e603ef..119888a4f 100644 --- a/python/rmm/CMakeLists.txt +++ b/python/rmm/CMakeLists.txt @@ -30,6 +30,5 @@ rapids_cython_init() add_compile_definitions("SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}") add_subdirectory(rmm/_cuda) -# add_subdirectory(rmm/_lib) add_subdirectory(rmm/cpp) add_subdirectory(rmm/python) diff --git a/python/rmm/rmm/_lib/CMakeLists.txt b/python/rmm/rmm/_lib/CMakeLists.txt deleted file mode 100644 index 7cdfed971..000000000 --- a/python/rmm/rmm/_lib/CMakeLists.txt +++ /dev/null @@ -1,36 +0,0 @@ -# ============================================================================= -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. -# ============================================================================= - -set(cython_sources device_buffer.pyx lib.pyx logger.pyx memory_resource.pyx cuda_stream.pyx - helper.pyx) -set(linked_libraries rmm::rmm) - -# Build all of the Cython targets -rapids_cython_create_modules(SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" - CXX) - -# mark all symbols in these Cython targets "hidden" by default, so they won't collide with symbols -# loaded from other DSOs -foreach(_cython_target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - set_target_properties(${_cython_target} PROPERTIES C_VISIBILITY_PRESET hidden - CXX_VISIBILITY_PRESET hidden) -endforeach() - -add_library(_torch_allocator SHARED _torch_allocator.cpp) -# Want the output to be called _torch_allocator.so -set_target_properties(_torch_allocator PROPERTIES PREFIX "" SUFFIX ".so") -target_link_libraries(_torch_allocator PRIVATE rmm::rmm) -cmake_path(RELATIVE_PATH CMAKE_CURRENT_SOURCE_DIR BASE_DIRECTORY "${PROJECT_SOURCE_DIR}" - OUTPUT_VARIABLE _torch_allocator_location) -install(TARGETS _torch_allocator DESTINATION "${_torch_allocator_location}") diff --git a/python/rmm/rmm/_lib/__init__.pxd b/python/rmm/rmm/_lib/__init__.pxd deleted file mode 100644 index 46753baa3..000000000 --- a/python/rmm/rmm/_lib/__init__.pxd +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/python/rmm/rmm/_lib/__init__.py b/python/rmm/rmm/_lib/__init__.py deleted file mode 100644 index f9462af65..000000000 --- a/python/rmm/rmm/_lib/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# from .device_buffer import DeviceBuffer diff --git a/python/rmm/rmm/_lib/_torch_allocator.cpp b/python/rmm/rmm/_lib/_torch_allocator.cpp deleted file mode 100644 index bfe94c2d0..000000000 --- a/python/rmm/rmm/_lib/_torch_allocator.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -#include - -// These signatures must match those required by CUDAPluggableAllocator in -// github.com/pytorch/pytorch/blob/main/torch/csrc/cuda/CUDAPluggableAllocator.h -// Since the loading is done at runtime via dlopen, no error checking -// can be performed for mismatching signatures. - -/** - * @brief Allocate memory of at least \p size bytes. - * - * @throws rmm::bad_alloc When the requested allocation cannot be satisfied. - * - * @param size The number of bytes to allocate - * @param device The device whose memory resource one should use - * @param stream CUDA stream to perform allocation on - * @return Pointer to the newly allocated memory - */ -extern "C" void* allocate(std::size_t size, int device, void* stream) -{ - rmm::cuda_device_id const device_id{device}; - rmm::cuda_set_device_raii with_device{device_id}; - auto mr = rmm::mr::get_per_device_resource_ref(device_id); - return mr.allocate_async( - size, rmm::CUDA_ALLOCATION_ALIGNMENT, rmm::cuda_stream_view{static_cast(stream)}); -} - -/** - * @brief Deallocate memory pointed to by \p ptr. - * - * @param ptr Pointer to be deallocated - * @param size The number of bytes in the allocation - * @param device The device whose memory resource one should use - * @param stream CUDA stream to perform deallocation on - */ -extern "C" void deallocate(void* ptr, std::size_t size, int device, void* stream) -{ - rmm::cuda_device_id const device_id{device}; - rmm::cuda_set_device_raii with_device{device_id}; - auto mr = rmm::mr::get_per_device_resource_ref(device_id); - mr.deallocate_async(ptr, - size, - rmm::CUDA_ALLOCATION_ALIGNMENT, - rmm::cuda_stream_view{static_cast(stream)}); -} diff --git a/python/rmm/rmm/_lib/cuda_stream.pxd b/python/rmm/rmm/_lib/cuda_stream.pxd deleted file mode 100644 index e224cf9af..000000000 --- a/python/rmm/rmm/_lib/cuda_stream.pxd +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -cimport cython -from cuda.ccudart cimport cudaStream_t -from libcpp cimport bool -from libcpp.memory cimport unique_ptr - -from rmm._lib.cuda_stream_view cimport cuda_stream_view - - -cdef extern from "rmm/cuda_stream.hpp" namespace "rmm" nogil: - cdef cppclass cuda_stream: - cuda_stream() except + - bool is_valid() except + - cudaStream_t value() except + - cuda_stream_view view() except + - void synchronize() except + - void synchronize_no_throw() - - -@cython.final -cdef class CudaStream: - cdef unique_ptr[cuda_stream] c_obj - cdef cudaStream_t value(self) except * nogil - cdef bool is_valid(self) except * nogil diff --git a/python/rmm/rmm/_lib/cuda_stream.pyx b/python/rmm/rmm/_lib/cuda_stream.pyx deleted file mode 100644 index 0861f0663..000000000 --- a/python/rmm/rmm/_lib/cuda_stream.pyx +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -cimport cython -from cuda.ccudart cimport cudaStream_t -from libcpp cimport bool - - -@cython.final -cdef class CudaStream: - """ - Wrapper around a CUDA stream with RAII semantics. - When a CudaStream instance is GC'd, the underlying - CUDA stream is destroyed. - """ - def __cinit__(self): - self.c_obj.reset(new cuda_stream()) - - cdef cudaStream_t value(self) except * nogil: - return self.c_obj.get()[0].value() - - cdef bool is_valid(self) except * nogil: - return self.c_obj.get()[0].is_valid() diff --git a/python/rmm/rmm/_lib/cuda_stream_pool.pxd b/python/rmm/rmm/_lib/cuda_stream_pool.pxd deleted file mode 100644 index 0286a9377..000000000 --- a/python/rmm/rmm/_lib/cuda_stream_pool.pxd +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -cimport cython - -from rmm._lib.cuda_stream_view cimport cuda_stream_view - - -cdef extern from "rmm/cuda_stream_pool.hpp" namespace "rmm" nogil: - cdef cppclass cuda_stream_pool: - cuda_stream_pool(size_t pool_size) - cuda_stream_view get_stream() - cuda_stream_view get_stream(size_t stream_id) except + - size_t get_pool_size() diff --git a/python/rmm/rmm/_lib/cuda_stream_view.pxd b/python/rmm/rmm/_lib/cuda_stream_view.pxd deleted file mode 100644 index bf0d33c24..000000000 --- a/python/rmm/rmm/_lib/cuda_stream_view.pxd +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from cuda.ccudart cimport cudaStream_t -from libcpp cimport bool - - -cdef extern from "rmm/cuda_stream_view.hpp" namespace "rmm" nogil: - cdef cppclass cuda_stream_view: - cuda_stream_view() - cuda_stream_view(cudaStream_t) - cudaStream_t value() - bool is_default() - bool is_per_thread_default() - void synchronize() except + - - cdef bool operator==(cuda_stream_view const, cuda_stream_view const) - - const cuda_stream_view cuda_stream_default - const cuda_stream_view cuda_stream_legacy - const cuda_stream_view cuda_stream_per_thread diff --git a/python/rmm/rmm/_lib/device_buffer.pxd b/python/rmm/rmm/_lib/device_buffer.pxd deleted file mode 100644 index 0da9ace0c..000000000 --- a/python/rmm/rmm/_lib/device_buffer.pxd +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from libc.stdint cimport uintptr_t -from libcpp.memory cimport unique_ptr - -from rmm._cuda.stream cimport Stream -from rmm._lib.cuda_stream_view cimport cuda_stream_view -from rmm._lib.memory_resource cimport ( - DeviceMemoryResource, - device_memory_resource, -) - - -cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil: - cdef cppclass cuda_device_id: - ctypedef int value_type - cuda_device_id() - cuda_device_id(value_type id) - value_type value() - - cdef cuda_device_id get_current_cuda_device() - -cdef extern from "rmm/prefetch.hpp" namespace "rmm" nogil: - cdef void prefetch(const void* ptr, - size_t bytes, - cuda_device_id device, - cuda_stream_view stream) except + - -cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: - cdef cppclass device_buffer: - device_buffer() - device_buffer( - size_t size, - cuda_stream_view stream, - device_memory_resource * - ) except + - device_buffer( - const void* source_data, - size_t size, - cuda_stream_view stream, - device_memory_resource * - ) except + - device_buffer( - const device_buffer buf, - cuda_stream_view stream, - device_memory_resource * - ) except + - void reserve(size_t new_capacity, cuda_stream_view stream) except + - void resize(size_t new_size, cuda_stream_view stream) except + - void shrink_to_fit(cuda_stream_view stream) except + - void* data() - size_t size() - size_t capacity() - - -cdef class DeviceBuffer: - cdef unique_ptr[device_buffer] c_obj - - # Holds a reference to the DeviceMemoryResource used for allocation. - # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is - # needed for deallocation - cdef DeviceMemoryResource mr - - # Holds a reference to the stream used by the underlying `device_buffer`. - # Ensures the stream does not get destroyed before this DeviceBuffer - cdef Stream stream - - @staticmethod - cdef DeviceBuffer c_from_unique_ptr( - unique_ptr[device_buffer] ptr, - Stream stream=*, - DeviceMemoryResource mr=*, - ) - - @staticmethod - cdef DeviceBuffer c_to_device(const unsigned char[::1] b, - Stream stream=*) except * - cpdef copy_to_host(self, ary=*, Stream stream=*) - cpdef copy_from_host(self, ary, Stream stream=*) - cpdef copy_from_device(self, cuda_ary, Stream stream=*) - cpdef bytes tobytes(self, Stream stream=*) - - cdef size_t c_size(self) except * - cpdef void reserve(self, size_t new_capacity, Stream stream=*) except * - cpdef void resize(self, size_t new_size, Stream stream=*) except * - cpdef size_t capacity(self) except * - cdef void* c_data(self) except * - - cdef device_buffer c_release(self) except * - -cpdef DeviceBuffer to_device(const unsigned char[::1] b, - Stream stream=*) -cpdef void copy_ptr_to_host(uintptr_t db, - unsigned char[::1] hb, - Stream stream=*) except * - -cpdef void copy_host_to_ptr(const unsigned char[::1] hb, - uintptr_t db, - Stream stream=*) except * - -cpdef void copy_device_to_ptr(uintptr_t d_src, - uintptr_t d_dst, - size_t count, - Stream stream=*) except * diff --git a/python/rmm/rmm/_lib/device_buffer.pyx b/python/rmm/rmm/_lib/device_buffer.pyx deleted file mode 100644 index 94a4dc771..000000000 --- a/python/rmm/rmm/_lib/device_buffer.pyx +++ /dev/null @@ -1,552 +0,0 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np - -cimport cython -from cpython.bytes cimport PyBytes_FromStringAndSize -from libc.stdint cimport uintptr_t -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - -from rmm._cuda.stream cimport Stream - -from rmm._cuda.stream import DEFAULT_STREAM - -cimport cuda.ccudart as ccudart -from cuda.ccudart cimport ( - cudaError, - cudaError_t, - cudaMemcpyAsync, - cudaMemcpyKind, - cudaStream_t, -) - -from rmm._lib.memory_resource cimport ( - DeviceMemoryResource, - device_memory_resource, - get_current_device_resource, -) - - -# The DeviceMemoryResource attribute could be released prematurely -# by the gc if the DeviceBuffer is in a reference cycle. Removing -# the tp_clear function with the no_gc_clear decoration prevents that. -# See https://github.com/rapidsai/rmm/pull/931 for details. -@cython.no_gc_clear -cdef class DeviceBuffer: - - def __cinit__(self, *, - uintptr_t ptr=0, - size_t size=0, - Stream stream=DEFAULT_STREAM, - DeviceMemoryResource mr=None): - """Construct a ``DeviceBuffer`` with optional size and data pointer - - Parameters - ---------- - ptr : int - pointer to some data on host or device to copy over - size : int - size of the buffer to allocate - (and possibly size of data to copy) - stream : optional - CUDA stream to use for construction and/or copying, - defaults to the CUDA default stream. A reference to the - stream is stored internally to ensure it doesn't go out of - scope while the DeviceBuffer is in use. Destroying the - underlying stream while the DeviceBuffer is in use will - result in undefined behavior. - mr : optional - DeviceMemoryResource for the allocation, if not provided - defaults to the current device resource. - - Note - ---- - If the pointer passed is non-null and ``stream`` is the default stream, - it is synchronized after the copy. However if a non-default ``stream`` - is provided, this function is fully asynchronous. - - Examples - -------- - >>> import rmm - >>> db = rmm.DeviceBuffer(size=5) - """ - cdef const void* c_ptr - cdef device_memory_resource * mr_ptr - # Save a reference to the MR and stream used for allocation - self.mr = get_current_device_resource() if mr is None else mr - self.stream = stream - - mr_ptr = self.mr.get_mr() - with nogil: - c_ptr = ptr - - if c_ptr == NULL or size == 0: - self.c_obj.reset(new device_buffer(size, stream.view(), mr_ptr)) - else: - self.c_obj.reset(new device_buffer(c_ptr, size, stream.view(), mr_ptr)) - - if stream.c_is_default(): - stream.c_synchronize() - - def __len__(self): - return self.size - - def __sizeof__(self): - return self.size - - def __bytes__(self): - return self.tobytes() - - @property - def nbytes(self): - """Gets the size of the buffer in bytes.""" - return self.size - - @property - def ptr(self): - """Gets a pointer to the underlying data.""" - return int(self.c_data()) - - @property - def size(self): - """Gets the size of the buffer in bytes.""" - return int(self.c_size()) - - def __reduce__(self): - return to_device, (self.copy_to_host(),) - - @property - def __cuda_array_interface__(self): - cdef dict intf = { - "data": (self.ptr, False), - "shape": (self.size,), - "strides": None, - "typestr": "|u1", - "version": 0 - } - return intf - - def prefetch(self, device=None, stream=None): - """Prefetch buffer data to the specified device on the specified stream. - - Assumes the storage for this DeviceBuffer is CUDA managed memory - (unified memory). If it is not, this function is a no-op. - - Parameters - ---------- - device : optional - The CUDA device to which to prefetch the memory for this buffer. - Defaults to the current CUDA device. To prefetch to the CPU, pass - :py:attr:`~cuda.cudart.cudaCpuDeviceId` as the device. - stream : optional - CUDA stream to use for prefetching. Defaults to self.stream - """ - cdef cuda_device_id dev = (get_current_cuda_device() - if device is None - else cuda_device_id(device)) - cdef Stream strm = self.stream if stream is None else stream - with nogil: - prefetch(self.c_obj.get()[0].data(), - self.c_obj.get()[0].size(), - dev, - strm.view()) - - def copy(self): - """Returns a copy of DeviceBuffer. - - Returns - ------- - A deep copy of existing ``DeviceBuffer`` - - Examples - -------- - >>> import rmm - >>> db = rmm.DeviceBuffer.to_device(b"abc") - >>> db_copy = db.copy() - >>> db.copy_to_host() - array([97, 98, 99], dtype=uint8) - >>> db_copy.copy_to_host() - array([97, 98, 99], dtype=uint8) - >>> assert db is not db_copy - >>> assert db.ptr != db_copy.ptr - """ - ret = DeviceBuffer(ptr=self.ptr, size=self.size, stream=self.stream) - ret.mr = self.mr - return ret - - def __copy__(self): - return self.copy() - - @staticmethod - cdef DeviceBuffer c_from_unique_ptr( - unique_ptr[device_buffer] ptr, - Stream stream=DEFAULT_STREAM, - DeviceMemoryResource mr=None, - ): - cdef DeviceBuffer buf = DeviceBuffer.__new__(DeviceBuffer) - if stream.c_is_default(): - stream.c_synchronize() - buf.c_obj = move(ptr) - buf.mr = get_current_device_resource() if mr is None else mr - buf.stream = stream - return buf - - @staticmethod - cdef DeviceBuffer c_to_device(const unsigned char[::1] b, - Stream stream=DEFAULT_STREAM) except *: - """Calls ``to_device`` function on arguments provided""" - return to_device(b, stream) - - @staticmethod - def to_device(const unsigned char[::1] b, - Stream stream=DEFAULT_STREAM): - """Calls ``to_device`` function on arguments provided.""" - return to_device(b, stream) - - cpdef copy_to_host(self, ary=None, Stream stream=DEFAULT_STREAM): - """Copy from a ``DeviceBuffer`` to a buffer on host. - - Parameters - ---------- - ary : ``bytes``-like buffer to write into - stream : CUDA stream to use for copying, default the default stream - - Examples - -------- - >>> import rmm - >>> db = rmm.DeviceBuffer.to_device(b"abc") - >>> hb = bytearray(db.nbytes) - >>> db.copy_to_host(hb) - >>> print(hb) - bytearray(b'abc') - >>> hb = db.copy_to_host() - >>> print(hb) - bytearray(b'abc') - """ - cdef const device_buffer* dbp = self.c_obj.get() - cdef size_t s = dbp.size() - - cdef unsigned char[::1] hb = ary - if hb is None: - # NumPy leverages huge pages under-the-hood, - # which speeds up the copy from device to host. - hb = ary = np.empty((s,), dtype="u1") - elif len(hb) < s: - raise ValueError( - "Argument `ary` is too small. Need space for %i bytes." % s - ) - - copy_ptr_to_host(dbp.data(), hb[:s], stream) - - return ary - - cpdef copy_from_host(self, ary, Stream stream=DEFAULT_STREAM): - """Copy from a buffer on host to ``self`` - - Parameters - ---------- - ary : ``bytes``-like buffer to copy from - stream : CUDA stream to use for copying, default the default stream - - Examples - -------- - >>> import rmm - >>> db = rmm.DeviceBuffer(size=10) - >>> hb = b"abcdef" - >>> db.copy_from_host(hb) - >>> hb = db.copy_to_host() - >>> print(hb) - array([97, 98, 99, 0, 0, 0, 0, 0, 0, 0], dtype=uint8) - """ - cdef device_buffer* dbp = self.c_obj.get() - - cdef const unsigned char[::1] hb = ary - cdef size_t s = len(hb) - if s > self.size: - raise ValueError( - "Argument `ary` is too large. Need space for %i bytes." % s - ) - - copy_host_to_ptr(hb[:s], dbp.data(), stream) - - cpdef copy_from_device(self, cuda_ary, - Stream stream=DEFAULT_STREAM): - """Copy from a buffer on host to ``self`` - - Parameters - ---------- - cuda_ary : object to copy from that has ``__cuda_array_interface__`` - stream : CUDA stream to use for copying, default the default stream - - Examples - -------- - >>> import rmm - >>> db = rmm.DeviceBuffer(size=5) - >>> db2 = rmm.DeviceBuffer.to_device(b"abc") - >>> db.copy_from_device(db2) - >>> hb = db.copy_to_host() - >>> print(hb) - array([97, 98, 99, 0, 0], dtype=uint8) - """ - if not hasattr(cuda_ary, "__cuda_array_interface__"): - raise ValueError( - "Expected object to support `__cuda_array_interface__` " - "protocol" - ) - - cuda_ary_interface = cuda_ary.__cuda_array_interface__ - shape = cuda_ary_interface["shape"] - strides = cuda_ary_interface.get("strides") - dtype = np.dtype(cuda_ary_interface["typestr"]) - - if len(shape) > 1: - raise ValueError( - "Only 1-D contiguous arrays are supported, got {}-D " - "array".format(str(len(shape))) - ) - - if strides is not None: - if strides[0] != dtype.itemsize: - raise ValueError( - "Only 1-D contiguous arrays are supported, got a " - "non-contiguous array" - ) - - cdef uintptr_t src_ptr = cuda_ary_interface["data"][0] - cdef size_t s = shape[0] * dtype.itemsize - if s > self.size: - raise ValueError( - "Argument `hb` is too large. Need space for %i bytes." % s - ) - - cdef device_buffer* dbp = self.c_obj.get() - - copy_device_to_ptr( - src_ptr, - dbp.data(), - s, - stream - ) - - cpdef bytes tobytes(self, Stream stream=DEFAULT_STREAM): - cdef const device_buffer* dbp = self.c_obj.get() - cdef size_t s = dbp.size() - - cdef bytes b = PyBytes_FromStringAndSize(NULL, s) - cdef unsigned char* p = b - cdef unsigned char[::1] mv = (p)[:s] - self.copy_to_host(mv, stream) - - return b - - cdef size_t c_size(self) except *: - return self.c_obj.get()[0].size() - - cpdef void reserve(self, - size_t new_capacity, - Stream stream=DEFAULT_STREAM) except *: - self.c_obj.get()[0].reserve(new_capacity, stream.view()) - - cpdef void resize(self, - size_t new_size, - Stream stream=DEFAULT_STREAM) except *: - self.c_obj.get()[0].resize(new_size, stream.view()) - - cpdef size_t capacity(self) except *: - return self.c_obj.get()[0].capacity() - - cdef void* c_data(self) except *: - return self.c_obj.get()[0].data() - - cdef device_buffer c_release(self) except *: - """ - Releases ownership of the data held by this DeviceBuffer. - """ - return move(cython.operator.dereference(self.c_obj)) - - -@cython.boundscheck(False) -cpdef DeviceBuffer to_device(const unsigned char[::1] b, - Stream stream=DEFAULT_STREAM): - """Return a new ``DeviceBuffer`` with a copy of the data. - - Parameters - ---------- - b : ``bytes``-like data on host to copy to device - stream : CUDA stream to use for copying, default the default stream - - Returns - ------- - ``DeviceBuffer`` with copy of data from host - - Examples - -------- - >>> import rmm - >>> db = rmm._lib.device_buffer.to_device(b"abc") - >>> print(bytes(db)) - b'abc' - """ - - if b is None: - raise TypeError( - "Argument 'b' has incorrect type" - " (expected bytes-like, got NoneType)" - ) - - cdef uintptr_t p = &b[0] - cdef size_t s = len(b) - return DeviceBuffer(ptr=p, size=s, stream=stream) - - -@cython.boundscheck(False) -cdef void _copy_async(const void* src, - void* dst, - size_t count, - ccudart.cudaMemcpyKind kind, - cuda_stream_view stream) except * nogil: - """ - Asynchronously copy data between host and/or device pointers. - - This is a convenience wrapper around cudaMemcpyAsync that - checks for errors. Only used for internal implementation. - - Parameters - ---------- - src : pointer to ``bytes``-like host buffer or device data to copy from - dst : pointer to ``bytes``-like host buffer or device data to copy into - count : the size in bytes to copy - kind : the kind of copy to perform - stream : CUDA stream to use for copying, default the default stream - """ - cdef cudaError_t err = cudaMemcpyAsync(dst, src, count, kind, - stream) - - if err != cudaError.cudaSuccess: - raise RuntimeError(f"Memcpy failed with error: {err}") - - -@cython.boundscheck(False) -cpdef void copy_ptr_to_host(uintptr_t db, - unsigned char[::1] hb, - Stream stream=DEFAULT_STREAM) except *: - """Copy from a device pointer to a buffer on host - - Parameters - ---------- - db : pointer to data on device to copy - hb : ``bytes``-like buffer to write into - stream : CUDA stream to use for copying, default the default stream - - Note - ---- - If ``stream`` is the default stream, it is synchronized after the copy. - However if a non-default ``stream`` is provided, this function is fully - asynchronous. - - Examples - -------- - >>> import rmm - >>> db = rmm.DeviceBuffer.to_device(b"abc") - >>> hb = bytearray(db.nbytes) - >>> rmm._lib.device_buffer.copy_ptr_to_host(db.ptr, hb) - >>> print(hb) - bytearray(b'abc') - """ - - if hb is None: - raise TypeError( - "Argument `hb` has incorrect type" - " (expected bytes-like, got NoneType)" - ) - - with nogil: - _copy_async(db, &hb[0], len(hb), - cudaMemcpyKind.cudaMemcpyDeviceToHost, stream.view()) - - if stream.c_is_default(): - stream.c_synchronize() - - -@cython.boundscheck(False) -cpdef void copy_host_to_ptr(const unsigned char[::1] hb, - uintptr_t db, - Stream stream=DEFAULT_STREAM) except *: - """Copy from a host pointer to a device pointer - - Parameters - ---------- - hb : ``bytes``-like host buffer to copy - db : pointer to data on device to write into - stream : CUDA stream to use for copying, default the default stream - - Note - ---- - If ``stream`` is the default stream, it is synchronized after the copy. - However if a non-default ``stream`` is provided, this function is fully - asynchronous. - - Examples - -------- - >>> import rmm - >>> db = rmm.DeviceBuffer(size=10) - >>> hb = b"abc" - >>> rmm._lib.device_buffer.copy_host_to_ptr(hb, db.ptr) - >>> hb = db.copy_to_host() - >>> print(hb) - array([97, 98, 99, 0, 0, 0, 0, 0, 0, 0], dtype=uint8) - """ - - if hb is None: - raise TypeError( - "Argument `hb` has incorrect type" - " (expected bytes-like, got NoneType)" - ) - - with nogil: - _copy_async(&hb[0], db, len(hb), - cudaMemcpyKind.cudaMemcpyHostToDevice, stream.view()) - - if stream.c_is_default(): - stream.c_synchronize() - - -@cython.boundscheck(False) -cpdef void copy_device_to_ptr(uintptr_t d_src, - uintptr_t d_dst, - size_t count, - Stream stream=DEFAULT_STREAM) except *: - """Copy from a device pointer to a device pointer - - Parameters - ---------- - d_src : pointer to data on device to copy from - d_dst : pointer to data on device to write into - count : the size in bytes to copy - stream : CUDA stream to use for copying, default the default stream - - Examples - -------- - >>> import rmm - >>> db = rmm.DeviceBuffer(size=5) - >>> db2 = rmm.DeviceBuffer.to_device(b"abc") - >>> rmm._lib.device_buffer.copy_device_to_ptr(db2.ptr, db.ptr, db2.size) - >>> hb = db.copy_to_host() - >>> hb - array([97, 98, 99, 0, 0], dtype=uint8) - """ - - with nogil: - _copy_async(d_src, d_dst, count, - cudaMemcpyKind.cudaMemcpyDeviceToDevice, stream.view()) diff --git a/python/rmm/rmm/_lib/device_uvector.pxd b/python/rmm/rmm/_lib/device_uvector.pxd deleted file mode 100644 index 29e122bbf..000000000 --- a/python/rmm/rmm/_lib/device_uvector.pxd +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from rmm._lib.cuda_stream_view cimport cuda_stream_view -from rmm._lib.device_buffer cimport device_buffer -from rmm._lib.memory_resource cimport device_memory_resource - - -cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: - cdef cppclass device_uvector[T]: - device_uvector(size_t size, cuda_stream_view stream) except + - T* element_ptr(size_t index) - void set_element(size_t element_index, const T& v, cuda_stream_view s) - void set_element_async( - size_t element_index, - const T& v, - cuda_stream_view s - ) except + - T front_element(cuda_stream_view s) except + - T back_element(cuda_stream_view s) except + - void reserve(size_t new_capacity, cuda_stream_view stream) except + - void resize(size_t new_size, cuda_stream_view stream) except + - void shrink_to_fit(cuda_stream_view stream) except + - device_buffer release() - size_t capacity() - T* data() - size_t size() - device_memory_resource* memory_resource() diff --git a/python/rmm/rmm/_lib/helper.pxd b/python/rmm/rmm/_lib/helper.pxd deleted file mode 100644 index 8ca151c00..000000000 --- a/python/rmm/rmm/_lib/helper.pxd +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -cdef object parse_bytes(object s) except * diff --git a/python/rmm/rmm/_lib/helper.pyx b/python/rmm/rmm/_lib/helper.pyx deleted file mode 100644 index d442ee341..000000000 --- a/python/rmm/rmm/_lib/helper.pyx +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Helper functions for rmm""" - -import re - - -cdef dict BYTE_SIZES = { - 'b': 1, - '': 1, - 'kb': 1000, - 'mb': 1000**2, - 'gb': 1000**3, - 'tb': 1000**4, - 'pb': 1000**5, - 'kib': 1024, - 'mib': 1024**2, - 'gib': 1024**3, - 'tib': 1024**4, - 'pib': 1024**5, -} - - -pattern = re.compile(r"^([0-9]+(?:\.[0-9]*)?)[\t ]*((?i:(?:[kmgtp]i?)?b))?$") - -cdef object parse_bytes(object s): - """Parse a string or integer into a number of bytes. - - Parameters - ---------- - s : int | str - Size in bytes. If an integer is provided, it is returned as-is. - A string is parsed as a floating point number with an (optional, - case-insensitive) byte-specifier, both SI prefixes (kb, mb, ..., pb) - and binary prefixes (kib, mib, ..., pib) are supported. - - Returns - ------- - Requested size in bytes as an integer. - - Raises - ------ - ValueError - If it is not possible to parse the input as a byte specification. - """ - cdef str suffix - cdef double n - cdef int multiplier - - if isinstance(s, int): - return s - - match = pattern.match(s) - - if match is None: - raise ValueError(f"Could not parse {s} as a byte specification") - - n = float(match.group(1)) - - suffix = match.group(2) - if suffix is None: - suffix = "" - - multiplier = BYTE_SIZES[suffix.lower()] - - return int(n*multiplier) diff --git a/python/rmm/rmm/_lib/lib.pxd b/python/rmm/rmm/_lib/lib.pxd deleted file mode 100644 index e35b672e4..000000000 --- a/python/rmm/rmm/_lib/lib.pxd +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from libc.stdint cimport uintptr_t -from libcpp cimport bool -from libcpp.utility cimport pair -from libcpp.vector cimport vector - -ctypedef pair[const char*, unsigned int] caller_pair diff --git a/python/rmm/rmm/_lib/lib.pyx b/python/rmm/rmm/_lib/lib.pyx deleted file mode 100644 index 46753baa3..000000000 --- a/python/rmm/rmm/_lib/lib.pyx +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/python/rmm/rmm/_lib/logger.pyx b/python/rmm/rmm/_lib/logger.pyx deleted file mode 100644 index 029bbdd79..000000000 --- a/python/rmm/rmm/_lib/logger.pyx +++ /dev/null @@ -1,260 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings - -from libcpp cimport bool - - -cdef extern from "spdlog/common.h" namespace "spdlog::level" nogil: - cpdef enum logging_level "spdlog::level::level_enum": - """ - The debug logging level for RMM. - - Debug logging prints messages to a log file. See - `Debug Logging `_ - for more information. - - Valid levels, in decreasing order of verbosity, are TRACE, DEBUG, - INFO, WARN, ERR, CRITICAL, and OFF. Default is INFO. - - Examples - -------- - >>> import rmm - >>> rmm.logging_level.DEBUG - - >>> rmm.logging_level.DEBUG.value - 1 - >>> rmm.logging_level.DEBUG.name - 'DEBUG' - - See Also - -------- - set_logging_level : Set the debug logging level - get_logging_level : Get the current debug logging level - """ - TRACE "spdlog::level::trace" - DEBUG "spdlog::level::debug" - INFO "spdlog::level::info" - WARN "spdlog::level::warn" - ERR "spdlog::level::err" - CRITICAL "spdlog::level::critical" - OFF "spdlog::level::off" - - -cdef extern from "spdlog/spdlog.h" namespace "spdlog" nogil: - cdef cppclass spdlog_logger "spdlog::logger": - spdlog_logger() except + - void set_level(logging_level level) - logging_level level() - void flush() except + - void flush_on(logging_level level) - logging_level flush_level() - bool should_log(logging_level msg_level) - - -cdef extern from "rmm/logger.hpp" namespace "rmm" nogil: - cdef spdlog_logger& logger() except + - - -def _validate_level_type(level): - if not isinstance(level, logging_level): - raise TypeError("level must be an instance of the logging_level enum") - - -def should_log(level): - """ - Check if a message at the given level would be logged. - - A message at the given level would be logged if the current debug logging - level is set to a level that is at least as verbose than the given level, - *and* the RMM module is compiled for a logging level at least as verbose. - If these conditions are not both met, this function will return false. - - Debug logging prints messages to a log file. See - `Debug Logging `_ - for more information. - - Parameters - ---------- - level : logging_level - The debug logging level. Valid values are instances of the - ``logging_level`` enum. - - Returns - ------- - should_log : bool - True if a message at the given level would be logged, False otherwise. - - Raises - ------ - TypeError - If the logging level is not an instance of the ``logging_level`` enum. - """ - _validate_level_type(level) - return logger().should_log(level) - - -def set_logging_level(level): - """ - Set the debug logging level. - - Debug logging prints messages to a log file. See - `Debug Logging `_ - for more information. - - Parameters - ---------- - level : logging_level - The debug logging level. Valid values are instances of the - ``logging_level`` enum. - - Raises - ------ - TypeError - If the logging level is not an instance of the ``logging_level`` enum. - - See Also - -------- - get_logging_level : Get the current debug logging level. - - Examples - -------- - >>> import rmm - >>> rmm.set_logging_level(rmm.logging_level.WARN) # set logging level to warn - """ - _validate_level_type(level) - logger().set_level(level) - - if not should_log(level): - warnings.warn(f"RMM will not log logging_level.{level.name}. This " - "may be because the C++ library is compiled for a " - "less-verbose logging level.") - - -def get_logging_level(): - """ - Get the current debug logging level. - - Debug logging prints messages to a log file. See - `Debug Logging `_ - for more information. - - Returns - ------- - level : logging_level - The current debug logging level, an instance of the ``logging_level`` - enum. - - See Also - -------- - set_logging_level : Set the debug logging level. - - Examples - -------- - >>> import rmm - >>> rmm.get_logging_level() # get current logging level - - """ - return logging_level(logger().level()) - - -def flush_logger(): - """ - Flush the debug logger. This will cause any buffered log messages to - be written to the log file. - - Debug logging prints messages to a log file. See - `Debug Logging `_ - for more information. - - See Also - -------- - set_flush_level : Set the flush level for the debug logger. - get_flush_level : Get the current debug logging flush level. - - Examples - -------- - >>> import rmm - >>> rmm.flush_logger() # flush the logger - """ - logger().flush() - - -def set_flush_level(level): - """ - Set the flush level for the debug logger. Messages of this level or higher - will automatically flush to the file. - - Debug logging prints messages to a log file. See - `Debug Logging `_ - for more information. - - Parameters - ---------- - level : logging_level - The debug logging level. Valid values are instances of the - ``logging_level`` enum. - - Raises - ------ - TypeError - If the logging level is not an instance of the ``logging_level`` enum. - - See Also - -------- - get_flush_level : Get the current debug logging flush level. - flush_logger : Flush the logger. - - Examples - -------- - >>> import rmm - >>> rmm.flush_on(rmm.logging_level.WARN) # set flush level to warn - """ - _validate_level_type(level) - logger().flush_on(level) - - if not should_log(level): - warnings.warn(f"RMM will not log logging_level.{level.name}. This " - "may be because the C++ library is compiled for a " - "less-verbose logging level.") - - -def get_flush_level(): - """ - Get the current debug logging flush level for the RMM logger. Messages of - this level or higher will automatically flush to the file. - - Debug logging prints messages to a log file. See - `Debug Logging `_ - for more information. - - Returns - ------- - logging_level - The current flush level, an instance of the ``logging_level`` - enum. - - See Also - -------- - set_flush_level : Set the flush level for the logger. - flush_logger : Flush the logger. - - Examples - -------- - >>> import rmm - >>> rmm.flush_level() # get current flush level - - """ - return logging_level(logger().flush_level()) diff --git a/python/rmm/rmm/_lib/memory_resource.pxd b/python/rmm/rmm/_lib/memory_resource.pxd deleted file mode 100644 index 000a3fe1e..000000000 --- a/python/rmm/rmm/_lib/memory_resource.pxd +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from libc.stdint cimport int8_t -from libcpp.memory cimport shared_ptr -from libcpp.pair cimport pair -from libcpp.string cimport string -from libcpp.vector cimport vector - -from rmm._lib.cuda_stream_view cimport cuda_stream_view - - -cdef extern from "rmm/mr/device/device_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass device_memory_resource: - void* allocate(size_t bytes) except + - void* allocate(size_t bytes, cuda_stream_view stream) except + - void deallocate(void* ptr, size_t bytes) except + - void deallocate( - void* ptr, - size_t bytes, - cuda_stream_view stream - ) except + - -cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil: - size_t percent_of_free_device_memory(int percent) except + - pair[size_t, size_t] available_device_memory() except + - -cdef class DeviceMemoryResource: - cdef shared_ptr[device_memory_resource] c_obj - cdef device_memory_resource* get_mr(self) noexcept nogil - -cdef class UpstreamResourceAdaptor(DeviceMemoryResource): - cdef readonly DeviceMemoryResource upstream_mr - - cpdef DeviceMemoryResource get_upstream(self) - -cdef class CudaMemoryResource(DeviceMemoryResource): - pass - -cdef class ManagedMemoryResource(DeviceMemoryResource): - pass - -cdef class SystemMemoryResource(DeviceMemoryResource): - pass - -cdef class SamHeadroomMemoryResource(DeviceMemoryResource): - pass - -cdef class CudaAsyncMemoryResource(DeviceMemoryResource): - pass - -cdef class PoolMemoryResource(UpstreamResourceAdaptor): - pass - -cdef class FixedSizeMemoryResource(UpstreamResourceAdaptor): - pass - -cdef class BinningMemoryResource(UpstreamResourceAdaptor): - - cdef readonly list _bin_mrs - - cpdef add_bin( - self, - size_t allocation_size, - DeviceMemoryResource bin_resource=*) - -cdef class CallbackMemoryResource(DeviceMemoryResource): - cdef object _allocate_func - cdef object _deallocate_func - -cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor): - pass - -cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor): - cdef object _log_file_name - cpdef get_file_name(self) - cpdef flush(self) - -cdef class StatisticsResourceAdaptor(UpstreamResourceAdaptor): - pass - -cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor): - pass - -cdef class FailureCallbackResourceAdaptor(UpstreamResourceAdaptor): - cdef object _callback - -cdef class PrefetchResourceAdaptor(UpstreamResourceAdaptor): - pass - -cpdef DeviceMemoryResource get_current_device_resource() diff --git a/python/rmm/rmm/_lib/memory_resource.pyx b/python/rmm/rmm/_lib/memory_resource.pyx deleted file mode 100644 index 231253e3f..000000000 --- a/python/rmm/rmm/_lib/memory_resource.pyx +++ /dev/null @@ -1,1332 +0,0 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import warnings -# This import is needed for Cython typing in translate_python_except_to_cpp -# See https://github.com/cython/cython/issues/5589 -from builtins import BaseException -from collections import defaultdict - -cimport cython -from cython.operator cimport dereference as deref -from libc.stddef cimport size_t -from libc.stdint cimport int8_t, int64_t, uintptr_t -from libcpp cimport bool -from libcpp.memory cimport make_unique, unique_ptr -from libcpp.optional cimport optional -from libcpp.pair cimport pair -from libcpp.string cimport string - -from cuda.cudart import cudaError_t - -from rmm._cuda.gpu import CUDARuntimeError, getDevice, setDevice - -from rmm._cuda.stream cimport Stream - -from rmm._cuda.stream import DEFAULT_STREAM - -from rmm._lib.cuda_stream_view cimport cuda_stream_view -from rmm._lib.helper cimport parse_bytes -from rmm._lib.memory_resource cimport ( - available_device_memory as c_available_device_memory, - percent_of_free_device_memory as c_percent_of_free_device_memory, -) -from rmm._lib.per_device_resource cimport ( - cuda_device_id, - set_per_device_resource as cpp_set_per_device_resource, -) - -from rmm.statistics import Statistics - -# Transparent handle of a C++ exception -ctypedef pair[int, string] CppExcept - -cdef CppExcept translate_python_except_to_cpp(err: BaseException) noexcept: - """Translate a Python exception into a C++ exception handle - - The returned exception handle can then be thrown by `throw_cpp_except()`, - which MUST be done without holding the GIL. - - This is useful when C++ calls a Python function and needs to catch or - propagate exceptions. - """ - if isinstance(err, MemoryError): - return CppExcept(0, str.encode(str(err))) - return CppExcept(-1, str.encode(str(err))) - -# Implementation of `throw_cpp_except()`, which throws a given `CppExcept`. -# This function MUST be called without the GIL otherwise the thrown C++ -# exception are translated back into a Python exception. -cdef extern from *: - """ - #include - #include - - void throw_cpp_except(std::pair res) { - switch(res.first) { - case 0: - throw rmm::out_of_memory(res.second); - default: - throw std::runtime_error(res.second); - } - } - """ - void throw_cpp_except(CppExcept) nogil - - -# NOTE: Keep extern declarations in .pyx file as much as possible to avoid -# leaking dependencies when importing RMM Cython .pxd files -cdef extern from "rmm/mr/device/cuda_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass cuda_memory_resource(device_memory_resource): - cuda_memory_resource() except + - -cdef extern from "rmm/mr/device/managed_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass managed_memory_resource(device_memory_resource): - managed_memory_resource() except + - -cdef extern from "rmm/mr/device/system_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass system_memory_resource(device_memory_resource): - system_memory_resource() except + - -cdef extern from "rmm/mr/device/sam_headroom_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass sam_headroom_memory_resource(device_memory_resource): - sam_headroom_memory_resource(size_t headroom) except + - -cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - - cdef cppclass cuda_async_memory_resource(device_memory_resource): - cuda_async_memory_resource( - optional[size_t] initial_pool_size, - optional[size_t] release_threshold, - optional[allocation_handle_type] export_handle_type) except + - -# TODO: when we adopt Cython 3.0 use enum class -cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \ - namespace \ - "rmm::mr::cuda_async_memory_resource::allocation_handle_type" \ - nogil: - enum allocation_handle_type \ - "rmm::mr::cuda_async_memory_resource::allocation_handle_type": - none - posix_file_descriptor - win32 - win32_kmt - - -cdef extern from "rmm/mr/device/pool_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass pool_memory_resource[Upstream](device_memory_resource): - pool_memory_resource( - Upstream* upstream_mr, - size_t initial_pool_size, - optional[size_t] maximum_pool_size) except + - size_t pool_size() - -cdef extern from "rmm/mr/device/fixed_size_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass fixed_size_memory_resource[Upstream](device_memory_resource): - fixed_size_memory_resource( - Upstream* upstream_mr, - size_t block_size, - size_t block_to_preallocate) except + - -cdef extern from "rmm/mr/device/callback_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - ctypedef void* (*allocate_callback_t)(size_t, cuda_stream_view, void*) - ctypedef void (*deallocate_callback_t)(void*, size_t, cuda_stream_view, void*) - - cdef cppclass callback_memory_resource(device_memory_resource): - callback_memory_resource( - allocate_callback_t allocate_callback, - deallocate_callback_t deallocate_callback, - void* allocate_callback_arg, - void* deallocate_callback_arg - ) except + - -cdef extern from "rmm/mr/device/binning_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass binning_memory_resource[Upstream](device_memory_resource): - binning_memory_resource(Upstream* upstream_mr) except + - binning_memory_resource( - Upstream* upstream_mr, - int8_t min_size_exponent, - int8_t max_size_exponent) except + - - void add_bin(size_t allocation_size) except + - void add_bin( - size_t allocation_size, - device_memory_resource* bin_resource) except + - -cdef extern from "rmm/mr/device/limiting_resource_adaptor.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass limiting_resource_adaptor[Upstream](device_memory_resource): - limiting_resource_adaptor( - Upstream* upstream_mr, - size_t allocation_limit) except + - - size_t get_allocated_bytes() except + - size_t get_allocation_limit() except + - -cdef extern from "rmm/mr/device/logging_resource_adaptor.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass logging_resource_adaptor[Upstream](device_memory_resource): - logging_resource_adaptor( - Upstream* upstream_mr, - string filename) except + - - void flush() except + - -cdef extern from "rmm/mr/device/statistics_resource_adaptor.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass statistics_resource_adaptor[Upstream](device_memory_resource): - struct counter: - counter() - - int64_t value - int64_t peak - int64_t total - - statistics_resource_adaptor(Upstream* upstream_mr) except + - - counter get_bytes_counter() except + - counter get_allocations_counter() except + - pair[counter, counter] pop_counters() except + - pair[counter, counter] push_counters() except + - -cdef extern from "rmm/mr/device/tracking_resource_adaptor.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass tracking_resource_adaptor[Upstream](device_memory_resource): - tracking_resource_adaptor( - Upstream* upstream_mr, - bool capture_stacks) except + - - size_t get_allocated_bytes() except + - string get_outstanding_allocations_str() except + - void log_outstanding_allocations() except + - -cdef extern from "rmm/mr/device/failure_callback_resource_adaptor.hpp" \ - namespace "rmm::mr" nogil: - ctypedef bool (*failure_callback_t)(size_t, void*) - cdef cppclass failure_callback_resource_adaptor[Upstream]( - device_memory_resource - ): - failure_callback_resource_adaptor( - Upstream* upstream_mr, - failure_callback_t callback, - void* callback_arg - ) except + - -cdef extern from "rmm/mr/device/prefetch_resource_adaptor.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass prefetch_resource_adaptor[Upstream](device_memory_resource): - prefetch_resource_adaptor(Upstream* upstream_mr) except + - - -cdef class DeviceMemoryResource: - - cdef device_memory_resource* get_mr(self) noexcept nogil: - """Get the underlying C++ memory resource object.""" - return self.c_obj.get() - - def allocate(self, size_t nbytes, Stream stream=DEFAULT_STREAM): - """Allocate ``nbytes`` bytes of memory. - - Parameters - ---------- - nbytes : size_t - The size of the allocation in bytes - stream : Stream - Optional stream for the allocation - """ - return self.c_obj.get().allocate(nbytes, stream.view()) - - def deallocate(self, uintptr_t ptr, size_t nbytes, Stream stream=DEFAULT_STREAM): - """Deallocate memory pointed to by ``ptr`` of size ``nbytes``. - - Parameters - ---------- - ptr : uintptr_t - Pointer to be deallocated - nbytes : size_t - Size of the allocation in bytes - stream : Stream - Optional stream for the deallocation - """ - self.c_obj.get().deallocate((ptr), nbytes, stream.view()) - - -# See the note about `no_gc_clear` in `device_buffer.pyx`. -@cython.no_gc_clear -cdef class UpstreamResourceAdaptor(DeviceMemoryResource): - """Parent class for all memory resources that track an upstream. - - Upstream resource tracking requires maintaining a reference to the upstream - mr so that it is kept alive and may be accessed by any downstream resource - adaptors. - """ - - def __cinit__(self, DeviceMemoryResource upstream_mr, *args, **kwargs): - - if (upstream_mr is None): - raise Exception("Argument `upstream_mr` must not be None") - - self.upstream_mr = upstream_mr - - def __dealloc__(self): - # Must cleanup the base MR before any upstream MR - self.c_obj.reset() - - cpdef DeviceMemoryResource get_upstream(self): - return self.upstream_mr - - -cdef class CudaMemoryResource(DeviceMemoryResource): - def __cinit__(self): - self.c_obj.reset( - new cuda_memory_resource() - ) - - def __init__(self): - """ - Memory resource that uses ``cudaMalloc``/``cudaFree`` for - allocation/deallocation. - """ - pass - - -cdef class CudaAsyncMemoryResource(DeviceMemoryResource): - """ - Memory resource that uses ``cudaMallocAsync``/``cudaFreeAsync`` for - allocation/deallocation. - - Parameters - ---------- - initial_pool_size : int | str, optional - Initial pool size in bytes. By default, half the available memory - on the device is used. A string argument is parsed using `parse_bytes`. - release_threshold: int, optional - Release threshold in bytes. If the pool size grows beyond this - value, unused memory held by the pool will be released at the - next synchronization point. - enable_ipc: bool, optional - If True, enables export of POSIX file descriptor handles for the memory - allocated by this resource so that it can be used with CUDA IPC. - """ - def __cinit__( - self, - initial_pool_size=None, - release_threshold=None, - enable_ipc=False - ): - cdef optional[size_t] c_initial_pool_size = ( - optional[size_t]() - if initial_pool_size is None - else optional[size_t]( parse_bytes(initial_pool_size)) - ) - - cdef optional[size_t] c_release_threshold = ( - optional[size_t]() - if release_threshold is None - else optional[size_t]( release_threshold) - ) - - # If IPC memory handles are not supported, the constructor below will - # raise an error from C++. - cdef optional[allocation_handle_type] c_export_handle_type = ( - optional[allocation_handle_type]( - posix_file_descriptor - ) - if enable_ipc - else optional[allocation_handle_type]() - ) - - self.c_obj.reset( - new cuda_async_memory_resource( - c_initial_pool_size, - c_release_threshold, - c_export_handle_type - ) - ) - - -cdef class ManagedMemoryResource(DeviceMemoryResource): - def __cinit__(self): - self.c_obj.reset( - new managed_memory_resource() - ) - - def __init__(self): - """ - Memory resource that uses ``cudaMallocManaged``/``cudaFree`` for - allocation/deallocation. - """ - pass - - -cdef class SystemMemoryResource(DeviceMemoryResource): - def __cinit__(self): - self.c_obj.reset( - new system_memory_resource() - ) - - def __init__(self): - """ - Memory resource that uses ``malloc``/``free`` for - allocation/deallocation. - """ - pass - - -cdef class SamHeadroomMemoryResource(DeviceMemoryResource): - def __cinit__( - self, - size_t headroom - ): - self.c_obj.reset(new sam_headroom_memory_resource(headroom)) - - def __init__( - self, - size_t headroom - ): - """ - Memory resource that uses ``malloc``/``free`` for - allocation/deallocation. - - Parameters - ---------- - headroom : size_t - Size of the reserved GPU memory as headroom - """ - pass - - -cdef class PoolMemoryResource(UpstreamResourceAdaptor): - - def __cinit__( - self, - DeviceMemoryResource upstream_mr, - initial_pool_size=None, - maximum_pool_size=None - ): - cdef size_t c_initial_pool_size - cdef optional[size_t] c_maximum_pool_size - c_initial_pool_size = ( - c_percent_of_free_device_memory(50) if - initial_pool_size is None - else parse_bytes(initial_pool_size) - ) - c_maximum_pool_size = ( - optional[size_t]() if - maximum_pool_size is None - else optional[size_t]( parse_bytes(maximum_pool_size)) - ) - self.c_obj.reset( - new pool_memory_resource[device_memory_resource]( - upstream_mr.get_mr(), - c_initial_pool_size, - c_maximum_pool_size - ) - ) - - def __init__( - self, - DeviceMemoryResource upstream_mr, - object initial_pool_size=None, - object maximum_pool_size=None - ): - """ - Coalescing best-fit suballocator which uses a pool of memory allocated - from an upstream memory resource. - - Parameters - ---------- - upstream_mr : DeviceMemoryResource - The DeviceMemoryResource from which to allocate blocks for the - pool. - initial_pool_size : int | str, optional - Initial pool size in bytes. By default, half the available memory - on the device is used. - maximum_pool_size : int | str, optional - Maximum size in bytes, that the pool can grow to. - """ - pass - - def pool_size(self): - cdef pool_memory_resource[device_memory_resource]* c_mr = ( - (self.get_mr()) - ) - return c_mr.pool_size() - -cdef class FixedSizeMemoryResource(UpstreamResourceAdaptor): - def __cinit__( - self, - DeviceMemoryResource upstream_mr, - size_t block_size=1<<20, - size_t blocks_to_preallocate=128 - ): - self.c_obj.reset( - new fixed_size_memory_resource[device_memory_resource]( - upstream_mr.get_mr(), - block_size, - blocks_to_preallocate - ) - ) - - def __init__( - self, - DeviceMemoryResource upstream_mr, - size_t block_size=1<<20, - size_t blocks_to_preallocate=128 - ): - """ - Memory resource which allocates memory blocks of a single fixed size. - - Parameters - ---------- - upstream_mr : DeviceMemoryResource - The DeviceMemoryResource from which to allocate blocks for the - pool. - block_size : int, optional - The size of blocks to allocate (default is 1MiB). - blocks_to_preallocate : int, optional - The number of blocks to allocate to initialize the pool. - - Notes - ----- - Supports only allocations of size smaller than the configured - block_size. - """ - pass - - -cdef class BinningMemoryResource(UpstreamResourceAdaptor): - def __cinit__( - self, - DeviceMemoryResource upstream_mr, - int8_t min_size_exponent=-1, - int8_t max_size_exponent=-1, - ): - - self._bin_mrs = [] - - if (min_size_exponent == -1 or max_size_exponent == -1): - self.c_obj.reset( - new binning_memory_resource[device_memory_resource]( - upstream_mr.get_mr() - ) - ) - else: - self.c_obj.reset( - new binning_memory_resource[device_memory_resource]( - upstream_mr.get_mr(), - min_size_exponent, - max_size_exponent - ) - ) - - def __dealloc__(self): - - # Must cleanup the base MR before any upstream or referenced Bins - self.c_obj.reset() - - def __init__( - self, - DeviceMemoryResource upstream_mr, - int8_t min_size_exponent=-1, - int8_t max_size_exponent=-1, - ): - """ - Allocates memory from a set of specified "bin" sizes based on a - specified allocation size. - - If min_size_exponent and max_size_exponent are specified, initializes - with one or more FixedSizeMemoryResource bins in the range - ``[2**min_size_exponent, 2**max_size_exponent]``. - - Call :py:meth:`~.add_bin` to add additional bin allocators. - - Parameters - ---------- - upstream_mr : DeviceMemoryResource - The memory resource to use for allocations larger than any of the - bins. - min_size_exponent : size_t - The base-2 exponent of the minimum size FixedSizeMemoryResource - bin to create. - max_size_exponent : size_t - The base-2 exponent of the maximum size FixedSizeMemoryResource - bin to create. - """ - pass - - cpdef add_bin( - self, - size_t allocation_size, - DeviceMemoryResource bin_resource=None - ): - """ - Adds a bin of the specified maximum allocation size to this memory - resource. If specified, uses bin_resource for allocation for this bin. - If not specified, creates and uses a FixedSizeMemoryResource for - allocation for this bin. - - Allocations smaller than allocation_size and larger than the next - smaller bin size will use this fixed-size memory resource. - - Parameters - ---------- - allocation_size : size_t - The maximum allocation size in bytes for the created bin - bin_resource : DeviceMemoryResource - The resource to use for this bin (optional) - """ - if bin_resource is None: - (( - self.c_obj.get()))[0].add_bin(allocation_size) - else: - # Save the ref to the new bin resource to ensure its lifetime - self._bin_mrs.append(bin_resource) - - (( - self.c_obj.get()))[0].add_bin( - allocation_size, - bin_resource.get_mr()) - - @property - def bin_mrs(self) -> list: - """Get the list of binned memory resources.""" - return self._bin_mrs - - -cdef void* _allocate_callback_wrapper( - size_t nbytes, - cuda_stream_view stream, - void* ctx - # Note that this function is specifically designed to rethrow Python - # exceptions as C++ exceptions when called as a callback from C++, so it is - # noexcept from Cython's perspective. -) noexcept nogil: - cdef CppExcept err - with gil: - try: - return ((ctx)( - nbytes, - Stream._from_cudaStream_t(stream.value()) - )) - except BaseException as e: - err = translate_python_except_to_cpp(e) - throw_cpp_except(err) - -cdef void _deallocate_callback_wrapper( - void* ptr, - size_t nbytes, - cuda_stream_view stream, - void* ctx -) except * with gil: - (ctx)((ptr), nbytes, Stream._from_cudaStream_t(stream.value())) - - -cdef class CallbackMemoryResource(DeviceMemoryResource): - """ - A memory resource that uses the user-provided callables to do - memory allocation and deallocation. - - ``CallbackMemoryResource`` should really only be used for - debugging memory issues, as there is a significant performance - penalty associated with using a Python function for each memory - allocation and deallocation. - - Parameters - ---------- - allocate_func: callable - The allocation function must accept two arguments. An integer - representing the number of bytes to allocate and a Stream on - which to perform the allocation, and return an integer - representing the pointer to the allocated memory. - deallocate_func: callable - The deallocation function must accept three arguments. an integer - representing the pointer to the memory to free, a second - integer representing the number of bytes to free, and a Stream - on which to perform the deallocation. - - Examples - -------- - >>> import rmm - >>> base_mr = rmm.mr.CudaMemoryResource() - >>> def allocate_func(size, stream): - ... print(f"Allocating {size} bytes") - ... return base_mr.allocate(size, stream) - ... - >>> def deallocate_func(ptr, size, stream): - ... print(f"Deallocating {size} bytes") - ... return base_mr.deallocate(ptr, size, stream) - ... - >>> rmm.mr.set_current_device_resource( - rmm.mr.CallbackMemoryResource(allocate_func, deallocate_func) - ) - >>> dbuf = rmm.DeviceBuffer(size=256) - Allocating 256 bytes - >>> del dbuf - Deallocating 256 bytes - """ - def __init__( - self, - allocate_func, - deallocate_func, - ): - self._allocate_func = allocate_func - self._deallocate_func = deallocate_func - self.c_obj.reset( - new callback_memory_resource( - (_allocate_callback_wrapper), - (_deallocate_callback_wrapper), - (allocate_func), - (deallocate_func) - ) - ) - - -def _append_id(filename, id): - """ - Append ".dev" onto a filename before the extension - - Example: _append_id("hello.txt", 1) returns "hello.dev1.txt" - - Parameters - ---------- - filename : string - The filename, possibly with extension - id : int - The ID to append - """ - name, ext = os.path.splitext(filename) - return f"{name}.dev{id}{ext}" - - -cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor): - - def __cinit__( - self, - DeviceMemoryResource upstream_mr, - size_t allocation_limit - ): - self.c_obj.reset( - new limiting_resource_adaptor[device_memory_resource]( - upstream_mr.get_mr(), - allocation_limit - ) - ) - - def __init__( - self, - DeviceMemoryResource upstream_mr, - size_t allocation_limit - ): - """ - Memory resource that limits the total allocation amount possible - performed by an upstream memory resource. - - Parameters - ---------- - upstream_mr : DeviceMemoryResource - The upstream memory resource. - allocation_limit : size_t - Maximum memory allowed for this allocator. - """ - pass - - def get_allocated_bytes(self) -> size_t: - """ - Query the number of bytes that have been allocated. Note that this can - not be used to know how large of an allocation is possible due to both - possible fragmentation and also internal page sizes and alignment that - is not tracked by this allocator. - """ - return (( - self.c_obj.get()) - )[0].get_allocated_bytes() - - def get_allocation_limit(self) -> size_t: - """ - Query the maximum number of bytes that this allocator is allowed to - allocate. This is the limit on the allocator and not a representation - of the underlying device. The device may not be able to support this - limit. - """ - return (( - self.c_obj.get()) - )[0].get_allocation_limit() - - -cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor): - def __cinit__( - self, - DeviceMemoryResource upstream_mr, - object log_file_name=None - ): - if log_file_name is None: - log_file_name = os.getenv("RMM_LOG_FILE") - if not log_file_name: - raise ValueError( - "RMM log file must be specified either using " - "log_file_name= argument or RMM_LOG_FILE " - "environment variable" - ) - - # Append the device ID before the file extension - log_file_name = _append_id( - log_file_name, getDevice() - ) - log_file_name = os.path.abspath(log_file_name) - self._log_file_name = log_file_name - - self.c_obj.reset( - new logging_resource_adaptor[device_memory_resource]( - upstream_mr.get_mr(), - log_file_name.encode() - ) - ) - - def __init__( - self, - DeviceMemoryResource upstream_mr, - object log_file_name=None - ): - """ - Memory resource that logs information about allocations/deallocations - performed by an upstream memory resource. - - Parameters - ---------- - upstream : DeviceMemoryResource - The upstream memory resource. - log_file_name : str - Path to the file to which logs are written. - """ - pass - - cpdef flush(self): - (( - self.get_mr()))[0].flush() - - cpdef get_file_name(self): - return self._log_file_name - - def __dealloc__(self): - self.c_obj.reset() - -cdef class StatisticsResourceAdaptor(UpstreamResourceAdaptor): - - def __cinit__( - self, - DeviceMemoryResource upstream_mr - ): - self.c_obj.reset( - new statistics_resource_adaptor[device_memory_resource]( - upstream_mr.get_mr() - ) - ) - - def __init__( - self, - DeviceMemoryResource upstream_mr - ): - """ - Memory resource that tracks the current, peak and total - allocations/deallocations performed by an upstream memory resource. - Includes the ability to query these statistics at any time. - - A stack of counters is maintained. Use :meth:`push_counters` and - :meth:`pop_counters` to track statistics at different nesting levels. - - Parameters - ---------- - upstream : DeviceMemoryResource - The upstream memory resource. - """ - pass - - @property - def allocation_counts(self) -> Statistics: - """ - Gets the current, peak, and total allocated bytes and number of - allocations. - - The dictionary keys are ``current_bytes``, ``current_count``, - ``peak_bytes``, ``peak_count``, ``total_bytes``, and ``total_count``. - - Returns: - dict: Dictionary containing allocation counts and bytes. - """ - cdef statistics_resource_adaptor[device_memory_resource]* mr = \ - self.c_obj.get() - - counts = deref(mr).get_allocations_counter() - byte_counts = deref(mr).get_bytes_counter() - return Statistics( - current_bytes=byte_counts.value, - current_count=counts.value, - peak_bytes=byte_counts.peak, - peak_count=counts.peak, - total_bytes=byte_counts.total, - total_count=counts.total, - ) - - def pop_counters(self) -> Statistics: - """ - Pop a counter pair (bytes and allocations) from the stack - - Returns - ------- - The popped statistics - """ - cdef statistics_resource_adaptor[device_memory_resource]* mr = \ - self.c_obj.get() - - bytes_and_allocs = deref(mr).pop_counters() - return Statistics( - current_bytes=bytes_and_allocs.first.value, - current_count=bytes_and_allocs.second.value, - peak_bytes=bytes_and_allocs.first.peak, - peak_count=bytes_and_allocs.second.peak, - total_bytes=bytes_and_allocs.first.total, - total_count=bytes_and_allocs.second.total, - ) - - def push_counters(self) -> Statistics: - """ - Push a new counter pair (bytes and allocations) on the stack - - Returns - ------- - The statistics _before_ the push - """ - - cdef statistics_resource_adaptor[device_memory_resource]* mr = \ - self.c_obj.get() - - bytes_and_allocs = deref(mr).push_counters() - return Statistics( - current_bytes=bytes_and_allocs.first.value, - current_count=bytes_and_allocs.second.value, - peak_bytes=bytes_and_allocs.first.peak, - peak_count=bytes_and_allocs.second.peak, - total_bytes=bytes_and_allocs.first.total, - total_count=bytes_and_allocs.second.total, - ) - -cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor): - - def __cinit__( - self, - DeviceMemoryResource upstream_mr, - bool capture_stacks=False - ): - self.c_obj.reset( - new tracking_resource_adaptor[device_memory_resource]( - upstream_mr.get_mr(), - capture_stacks - ) - ) - - def __init__( - self, - DeviceMemoryResource upstream_mr, - bool capture_stacks=False - ): - """ - Memory resource that logs tracks allocations/deallocations - performed by an upstream memory resource. Includes the ability to - query all outstanding allocations with the stack trace, if desired. - - Parameters - ---------- - upstream : DeviceMemoryResource - The upstream memory resource. - capture_stacks : bool - Whether or not to capture the stack trace with each allocation. - """ - pass - - def get_allocated_bytes(self) -> size_t: - """ - Query the number of bytes that have been allocated. Note that this can - not be used to know how large of an allocation is possible due to both - possible fragmentation and also internal page sizes and alignment that - is not tracked by this allocator. - """ - return (( - self.c_obj.get()) - )[0].get_allocated_bytes() - - def get_outstanding_allocations_str(self) -> str: - """ - Returns a string containing information about the current outstanding - allocations. For each allocation, the address, size and optional - stack trace are shown. - """ - - return (( - self.c_obj.get()) - )[0].get_outstanding_allocations_str().decode('UTF-8') - - def log_outstanding_allocations(self): - """ - Logs the output of `get_outstanding_allocations_str` to the current - RMM log file if enabled. - """ - - (( - self.c_obj.get()))[0].log_outstanding_allocations() - - -# Note that this function is specifically designed to rethrow Python exceptions -# as C++ exceptions when called as a callback from C++, so it is noexcept from -# Cython's perspective. -cdef bool _oom_callback_function(size_t bytes, void *callback_arg) noexcept nogil: - cdef CppExcept err - with gil: - try: - return (callback_arg)(bytes) - except BaseException as e: - err = translate_python_except_to_cpp(e) - throw_cpp_except(err) - - -cdef class FailureCallbackResourceAdaptor(UpstreamResourceAdaptor): - - def __cinit__( - self, - DeviceMemoryResource upstream_mr, - object callback, - ): - self._callback = callback - self.c_obj.reset( - new failure_callback_resource_adaptor[device_memory_resource]( - upstream_mr.get_mr(), - _oom_callback_function, - callback - ) - ) - - def __init__( - self, - DeviceMemoryResource upstream_mr, - object callback, - ): - """ - Memory resource that call callback when memory allocation fails. - - Parameters - ---------- - upstream : DeviceMemoryResource - The upstream memory resource. - callback : callable - Function called when memory allocation fails. - """ - pass - -cdef class PrefetchResourceAdaptor(UpstreamResourceAdaptor): - - def __cinit__( - self, - DeviceMemoryResource upstream_mr - ): - self.c_obj.reset( - new prefetch_resource_adaptor[device_memory_resource]( - upstream_mr.get_mr() - ) - ) - - def __init__( - self, - DeviceMemoryResource upstream_mr - ): - """ - Memory resource that prefetches all allocations. - - Parameters - ---------- - upstream : DeviceMemoryResource - The upstream memory resource. - """ - pass - - -# Global per-device memory resources; dict of int:DeviceMemoryResource -cdef _per_device_mrs = defaultdict(CudaMemoryResource) - - -cpdef void _initialize( - bool pool_allocator=False, - bool managed_memory=False, - object initial_pool_size=None, - object maximum_pool_size=None, - object devices=0, - bool logging=False, - object log_file_name=None, -) except *: - """ - Initializes RMM library using the options passed - """ - if managed_memory: - upstream = ManagedMemoryResource - else: - upstream = CudaMemoryResource - - if pool_allocator: - typ = PoolMemoryResource - args = (upstream(),) - kwargs = dict( - initial_pool_size=None if initial_pool_size is None - else parse_bytes(initial_pool_size), - maximum_pool_size=None if maximum_pool_size is None - else parse_bytes(maximum_pool_size) - ) - else: - typ = upstream - args = () - kwargs = {} - - cdef DeviceMemoryResource mr - cdef int original_device - - # Save the current device so we can reset it - try: - original_device = getDevice() - except CUDARuntimeError as e: - if e.status == cudaError_t.cudaErrorNoDevice: - warnings.warn(e.msg) - else: - raise e - else: - # reset any previously specified per device resources - global _per_device_mrs - _per_device_mrs.clear() - - if devices is None: - devices = [0] - elif isinstance(devices, int): - devices = [devices] - - # create a memory resource per specified device - for device in devices: - setDevice(device) - - if logging: - mr = LoggingResourceAdaptor( - typ(*args, **kwargs), - log_file_name - ) - else: - mr = typ(*args, **kwargs) - - set_per_device_resource(device, mr) - - # reset CUDA device to original - setDevice(original_device) - - -cpdef get_per_device_resource(int device): - """ - Get the default memory resource for the specified device. - - If the returned memory resource is used when a different device is the - active CUDA device, behavior is undefined. - - Parameters - ---------- - device : int - The ID of the device for which to get the memory resource. - """ - global _per_device_mrs - return _per_device_mrs[device] - - -cpdef set_per_device_resource(int device, DeviceMemoryResource mr): - """ - Set the default memory resource for the specified device. - - Parameters - ---------- - device : int - The ID of the device for which to get the memory resource. - mr : DeviceMemoryResource - The memory resource to set. Must have been created while device was - the active CUDA device. - """ - global _per_device_mrs - _per_device_mrs[device] = mr - - # Since cuda_device_id does not have a default constructor, it must be heap - # allocated - cdef unique_ptr[cuda_device_id] device_id = \ - make_unique[cuda_device_id](device) - - cpp_set_per_device_resource(deref(device_id), mr.get_mr()) - - -cpdef set_current_device_resource(DeviceMemoryResource mr): - """ - Set the default memory resource for the current device. - - Parameters - ---------- - mr : DeviceMemoryResource - The memory resource to set. Must have been created while the current - device is the active CUDA device. - """ - set_per_device_resource(getDevice(), mr) - - -cpdef get_per_device_resource_type(int device): - """ - Get the memory resource type used for RMM device allocations on the - specified device. - - Parameters - ---------- - device : int - The device ID - """ - return type(get_per_device_resource(device)) - - -cpdef DeviceMemoryResource get_current_device_resource(): - """ - Get the memory resource used for RMM device allocations on the current - device. - - If the returned memory resource is used when a different device is the - active CUDA device, behavior is undefined. - """ - return get_per_device_resource(getDevice()) - - -cpdef get_current_device_resource_type(): - """ - Get the memory resource type used for RMM device allocations on the - current device. - """ - return type(get_current_device_resource()) - - -cpdef is_initialized(): - """ - Check whether RMM is initialized - """ - global _per_device_mrs - cdef DeviceMemoryResource each_mr - return all( - [each_mr.get_mr() is not NULL - for each_mr in _per_device_mrs.values()] - ) - - -cpdef _flush_logs(): - """ - Flush the logs of all currently initialized LoggingResourceAdaptor - memory resources - """ - global _per_device_mrs - cdef DeviceMemoryResource each_mr - for each_mr in _per_device_mrs.values(): - if isinstance(each_mr, LoggingResourceAdaptor): - each_mr.flush() - - -def enable_logging(log_file_name=None): - """ - Enable logging of run-time events for all devices. - - Parameters - ---------- - log_file_name: str, optional - Name of the log file. If not specified, the environment variable - RMM_LOG_FILE is used. A ValueError is thrown if neither is available. - A separate log file is produced for each device, - and the suffix `".dev{id}"` is automatically added to the log file - name. - - Notes - ----- - Note that if you use the environment variable CUDA_VISIBLE_DEVICES - with logging enabled, the suffix may not be what you expect. For - example, if you set CUDA_VISIBLE_DEVICES=1, the log file produced - will still have suffix `0`. Similarly, if you set - CUDA_VISIBLE_DEVICES=1,0 and use devices 0 and 1, the log file - with suffix `0` will correspond to the GPU with device ID `1`. - Use `rmm.get_log_filenames()` to get the log file names - corresponding to each device. - """ - global _per_device_mrs - - devices = [0] if not _per_device_mrs.keys() else _per_device_mrs.keys() - - for device in devices: - each_mr = _per_device_mrs[device] - if not isinstance(each_mr, LoggingResourceAdaptor): - set_per_device_resource( - device, - LoggingResourceAdaptor(each_mr, log_file_name) - ) - - -def disable_logging(): - """ - Disable logging if it was enabled previously using - `rmm.initialize()` or `rmm.enable_logging()`. - """ - global _per_device_mrs - for i, each_mr in _per_device_mrs.items(): - if isinstance(each_mr, LoggingResourceAdaptor): - set_per_device_resource(i, each_mr.get_upstream()) - - -def get_log_filenames(): - """ - Returns the log filename (or `None` if not writing logs) - for each device in use. - - Examples - -------- - >>> import rmm - >>> rmm.reinitialize(devices=[0, 1], logging=True, log_file_name="rmm.log") - >>> rmm.get_log_filenames() - {0: '/home/user/workspace/rapids/rmm/python/rmm.dev0.log', - 1: '/home/user/workspace/rapids/rmm/python/rmm.dev1.log'} - """ - global _per_device_mrs - - return { - i: each_mr.get_file_name() - if isinstance(each_mr, LoggingResourceAdaptor) - else None - for i, each_mr in _per_device_mrs.items() - } - - -def available_device_memory(): - """ - Returns a tuple of free and total device memory memory. - """ - cdef pair[size_t, size_t] res - res = c_available_device_memory() - return (res.first, res.second) diff --git a/python/rmm/rmm/_lib/per_device_resource.pxd b/python/rmm/rmm/_lib/per_device_resource.pxd deleted file mode 100644 index c33217622..000000000 --- a/python/rmm/rmm/_lib/per_device_resource.pxd +++ /dev/null @@ -1,23 +0,0 @@ -from rmm._lib.memory_resource cimport device_memory_resource - - -cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil: - cdef cppclass cuda_device_id: - ctypedef int value_type - - cuda_device_id(value_type id) - - value_type value() - -cdef extern from "rmm/mr/device/per_device_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef device_memory_resource* set_current_device_resource( - device_memory_resource* new_mr - ) - cdef device_memory_resource* get_current_device_resource() - cdef device_memory_resource* set_per_device_resource( - cuda_device_id id, device_memory_resource* new_mr - ) - cdef device_memory_resource* get_per_device_resource ( - cuda_device_id id - ) diff --git a/python/rmm/rmm/_lib/tests/__init__.py b/python/rmm/rmm/_lib/tests/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/python/rmm/rmm/_lib/tests/test_device_buffer.pyx b/python/rmm/rmm/_lib/tests/test_device_buffer.pyx deleted file mode 100644 index 0783dd9c6..000000000 --- a/python/rmm/rmm/_lib/tests/test_device_buffer.pyx +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np - -from libcpp.memory cimport make_unique - -from rmm.cpp.cuda_stream_view cimport cuda_stream_default -from rmm.cpp.device_buffer cimport device_buffer -from rmm.python.device_buffer cimport DeviceBuffer - - -def test_release(): - expect = DeviceBuffer.to_device(b'abc') - cdef DeviceBuffer buf = DeviceBuffer.to_device(b'abc') - - got = DeviceBuffer.c_from_unique_ptr( - make_unique[device_buffer](buf.c_release(), - cuda_stream_default.value()) - ) - np.testing.assert_equal(expect.copy_to_host(), got.copy_to_host()) - - -def test_size_after_release(): - cdef DeviceBuffer buf = DeviceBuffer.to_device(b'abc') - buf.c_release() - assert buf.size == 0 From 15b1ae113b5d04f4e8724c41397393dd5dd0871a Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 16 Sep 2024 11:07:37 -0700 Subject: [PATCH 04/13] deprecate rmm._lib --- python/rmm/rmm/__init__.py | 15 +++++++++++++++ python/rmm/rmm/_lib/__init__.py | 22 ++++++++++++++++++++++ python/rmm/rmm/tests/test_rmm.py | 5 +++++ 3 files changed, 42 insertions(+) create mode 100644 python/rmm/rmm/_lib/__init__.py diff --git a/python/rmm/rmm/__init__.py b/python/rmm/rmm/__init__.py index 61c5e4561..3c55a5cbd 100644 --- a/python/rmm/rmm/__init__.py +++ b/python/rmm/rmm/__init__.py @@ -52,3 +52,18 @@ "should_log", "unregister_reinitialize_hook", ] + + +def __getattr__(name): + if name == "_lib": + import importlib + import warnings + + warnings.warn( + "The `rmm._lib` module is deprecated in will be removed in a future release. Use `rmm.python` instead.", + FutureWarning, + ) + module = importlib.import_module("rmm.python") + return module + else: + raise AttributeError(f"Module '{__name__}' has no attribute '{name}'") diff --git a/python/rmm/rmm/_lib/__init__.py b/python/rmm/rmm/_lib/__init__.py new file mode 100644 index 000000000..9b6d1daf4 --- /dev/null +++ b/python/rmm/rmm/_lib/__init__.py @@ -0,0 +1,22 @@ +# Copyright (c) 2018-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings + +warnings.warn( + "The `rmm._lib` module is deprecated in will be removed in a future release. Use `rmm.python` instead.", + FutureWarning, +) + +from rmm.python import * diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index 4d7b6c646..a1a973950 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -1076,3 +1076,8 @@ def test_available_device_memory(): assert initial_memory[1] == final_memory[1] assert initial_memory[0] > 0 assert final_memory[0] > 0 + + +def test_deprecate_rmm_lib(): + with pytest.warns(FutureWarning): + rmm._lib.device_buffer.DeviceBuffer(size=100) From a6a831608282300e0fa5b86fc8f4d5e3bfa46a63 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 17 Sep 2024 07:40:15 -0700 Subject: [PATCH 05/13] address review --- python/rmm/rmm/allocators/cupy.py | 4 ++-- python/rmm/rmm/allocators/numba.py | 4 ++-- python/rmm/rmm/tests/test_rmm.py | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/python/rmm/rmm/allocators/cupy.py b/python/rmm/rmm/allocators/cupy.py index 8cd41b3b3..94003541f 100644 --- a/python/rmm/rmm/allocators/cupy.py +++ b/python/rmm/rmm/allocators/cupy.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from rmm import python as librmm +from rmm import python as pylibrmm from rmm._cuda.stream import Stream try: @@ -34,7 +34,7 @@ def rmm_cupy_allocator(nbytes): raise ModuleNotFoundError("No module named 'cupy'") stream = Stream(obj=cupy.cuda.get_current_stream()) - buf = librmm.device_buffer.DeviceBuffer(size=nbytes, stream=stream) + buf = pylibrmm.device_buffer.DeviceBuffer(size=nbytes, stream=stream) dev_id = -1 if buf.ptr else cupy.cuda.device.get_device_id() mem = cupy.cuda.UnownedMemory( ptr=buf.ptr, size=buf.size, owner=buf, device_id=dev_id diff --git a/python/rmm/rmm/allocators/numba.py b/python/rmm/rmm/allocators/numba.py index 9838b8b59..d5bfdcf9f 100644 --- a/python/rmm/rmm/allocators/numba.py +++ b/python/rmm/rmm/allocators/numba.py @@ -19,7 +19,7 @@ from numba import config, cuda from numba.cuda import HostOnlyCUDAMemoryManager, IpcHandle, MemoryPointer -from rmm import python as librmm +from rmm import python as pylibrmm def _make_emm_plugin_finalizer(handle, allocations): @@ -70,7 +70,7 @@ def memalloc(self, size): """ Allocate an on-device array from the RMM pool. """ - buf = librmm.DeviceBuffer(size=size) + buf = pylibrmm.DeviceBuffer(size=size) ctx = self.context if config.CUDA_USE_NVIDIA_BINDING: diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index a1a973950..826071fce 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -1078,6 +1078,7 @@ def test_available_device_memory(): assert final_memory[0] > 0 +# TODO: Remove test when rmm._lib is removed in 24.12 def test_deprecate_rmm_lib(): with pytest.warns(FutureWarning): rmm._lib.device_buffer.DeviceBuffer(size=100) From 724b96237a660e4ee67f3195d2cbdb1d002a4381 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 18 Sep 2024 09:52:11 -0700 Subject: [PATCH 06/13] replace lib with _logger --- python/rmm/rmm/allocators/torch.py | 6 +++--- python/rmm/rmm/cpp/CMakeLists.txt | 2 +- python/rmm/rmm/cpp/lib.pxd | 17 ----------------- python/rmm/rmm/cpp/lib.pyx | 13 ------------- 4 files changed, 4 insertions(+), 34 deletions(-) delete mode 100644 python/rmm/rmm/cpp/lib.pxd delete mode 100644 python/rmm/rmm/cpp/lib.pyx diff --git a/python/rmm/rmm/allocators/torch.py b/python/rmm/rmm/allocators/torch.py index 8df921ad8..af675d8c8 100644 --- a/python/rmm/rmm/allocators/torch.py +++ b/python/rmm/rmm/allocators/torch.py @@ -28,10 +28,10 @@ # allocator .so relative to the current file because the current file # is pure Python and will therefore be in the source directory. # Instead, we search relative to an arbitrary file in the compiled - # package. We use the _lib.lib module because it is small. - from rmm.cpp import lib + # package. We use the cpp._logger module because it is small. + from rmm.cpp import _logger - sofile = pathlib.Path(lib.__file__).parent / "_torch_allocator.so" + sofile = pathlib.Path(_logger.__file__).parent / "_torch_allocator.so" rmm_torch_allocator = CUDAPluggableAllocator( str(sofile.absolute()), alloc_fn_name="allocate", diff --git a/python/rmm/rmm/cpp/CMakeLists.txt b/python/rmm/rmm/cpp/CMakeLists.txt index a201187b8..5da2a1a01 100644 --- a/python/rmm/rmm/cpp/CMakeLists.txt +++ b/python/rmm/rmm/cpp/CMakeLists.txt @@ -12,7 +12,7 @@ # the License. # ============================================================================= -set(cython_sources lib.pyx _logger.pyx) +set(cython_sources _logger.pyx) set(linked_libraries rmm::rmm) # Build all of the Cython targets diff --git a/python/rmm/rmm/cpp/lib.pxd b/python/rmm/rmm/cpp/lib.pxd deleted file mode 100644 index 592574862..000000000 --- a/python/rmm/rmm/cpp/lib.pxd +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from libcpp.utility cimport pair - -ctypedef pair[const char*, unsigned int] caller_pair diff --git a/python/rmm/rmm/cpp/lib.pyx b/python/rmm/rmm/cpp/lib.pyx deleted file mode 100644 index 46753baa3..000000000 --- a/python/rmm/rmm/cpp/lib.pyx +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. From 10dfc8e0f2f7c3ed195b9aa794155a1610059887 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Thu, 19 Sep 2024 08:37:59 -0700 Subject: [PATCH 07/13] remove __init__.pxd --- python/rmm/rmm/cpp/__init__.pxd | 13 ------------- python/rmm/rmm/python/__init__.pxd | 13 ------------- 2 files changed, 26 deletions(-) delete mode 100644 python/rmm/rmm/cpp/__init__.pxd delete mode 100644 python/rmm/rmm/python/__init__.pxd diff --git a/python/rmm/rmm/cpp/__init__.pxd b/python/rmm/rmm/cpp/__init__.pxd deleted file mode 100644 index 46753baa3..000000000 --- a/python/rmm/rmm/cpp/__init__.pxd +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/python/rmm/rmm/python/__init__.pxd b/python/rmm/rmm/python/__init__.pxd deleted file mode 100644 index 46753baa3..000000000 --- a/python/rmm/rmm/python/__init__.pxd +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. From 153cb3b4719f0be226335aeb5557d168a308e998 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 23 Sep 2024 18:44:23 -0700 Subject: [PATCH 08/13] Address review --- python/rmm/CMakeLists.txt | 4 +-- python/rmm/rmm/__init__.py | 8 ++--- python/rmm/rmm/_cuda/stream.pxd | 2 +- python/rmm/rmm/_cuda/stream.pyx | 4 +-- python/rmm/rmm/_lib/__init__.py | 4 +-- python/rmm/rmm/_lib/cuda_stream.pxd | 15 ++++++++ python/rmm/rmm/_lib/device_buffer.pxd | 21 ++++++++++++ python/rmm/rmm/_lib/helper.pxd | 15 ++++++++ python/rmm/rmm/_lib/memory_resource.pxd | 34 +++++++++++++++++++ python/rmm/rmm/allocators/cupy.py | 2 +- python/rmm/rmm/allocators/numba.py | 2 +- python/rmm/rmm/allocators/torch.py | 2 +- python/rmm/rmm/{cpp => librmm}/CMakeLists.txt | 0 python/rmm/rmm/{cpp => librmm}/__init__.py | 0 python/rmm/rmm/{cpp => librmm}/_logger.pxd | 0 python/rmm/rmm/{cpp => librmm}/_logger.pyx | 0 .../rmm/{cpp => librmm}/_torch_allocator.cpp | 0 .../rmm/rmm/{cpp => librmm}/cuda_stream.pxd | 2 +- .../rmm/{cpp => librmm}/cuda_stream_pool.pxd | 2 +- .../rmm/{cpp => librmm}/cuda_stream_view.pxd | 0 .../rmm/rmm/{cpp => librmm}/device_buffer.pxd | 4 +-- .../rmm/{cpp => librmm}/device_uvector.pxd | 6 ++-- .../rmm/{cpp => librmm}/memory_resource.pxd | 4 +-- .../{cpp => librmm}/per_device_resource.pxd | 2 +- python/rmm/rmm/mr.py | 2 +- .../rmm/{python => pylibrmm}/CMakeLists.txt | 0 .../rmm/rmm/{python => pylibrmm}/__init__.py | 0 .../rmm/{python => pylibrmm}/cuda_stream.pxd | 2 +- .../rmm/{python => pylibrmm}/cuda_stream.pyx | 2 +- .../{python => pylibrmm}/device_buffer.pxd | 4 +-- .../{python => pylibrmm}/device_buffer.pyx | 16 ++++----- .../rmm/rmm/{python => pylibrmm}/helper.pxd | 0 .../rmm/rmm/{python => pylibrmm}/helper.pyx | 0 .../rmm/rmm/{python => pylibrmm}/logger.pyx | 4 +-- .../{python => pylibrmm}/memory_resource.pxd | 2 +- .../{python => pylibrmm}/memory_resource.pyx | 8 ++--- .../{python => pylibrmm}/tests/__init__.py | 0 .../tests/test_device_buffer.pyx | 6 ++-- python/rmm/rmm/tests/test_cython.py | 2 +- python/rmm/rmm/tests/test_rmm.py | 2 +- 40 files changed, 134 insertions(+), 49 deletions(-) create mode 100644 python/rmm/rmm/_lib/cuda_stream.pxd create mode 100644 python/rmm/rmm/_lib/device_buffer.pxd create mode 100644 python/rmm/rmm/_lib/helper.pxd create mode 100644 python/rmm/rmm/_lib/memory_resource.pxd rename python/rmm/rmm/{cpp => librmm}/CMakeLists.txt (100%) rename python/rmm/rmm/{cpp => librmm}/__init__.py (100%) rename python/rmm/rmm/{cpp => librmm}/_logger.pxd (100%) rename python/rmm/rmm/{cpp => librmm}/_logger.pyx (100%) rename python/rmm/rmm/{cpp => librmm}/_torch_allocator.cpp (100%) rename python/rmm/rmm/{cpp => librmm}/cuda_stream.pxd (94%) rename python/rmm/rmm/{cpp => librmm}/cuda_stream_pool.pxd (93%) rename python/rmm/rmm/{cpp => librmm}/cuda_stream_view.pxd (100%) rename python/rmm/rmm/{cpp => librmm}/device_buffer.pxd (94%) rename python/rmm/rmm/{cpp => librmm}/device_uvector.pxd (89%) rename python/rmm/rmm/{cpp => librmm}/memory_resource.pxd (98%) rename python/rmm/rmm/{cpp => librmm}/per_device_resource.pxd (95%) rename python/rmm/rmm/{python => pylibrmm}/CMakeLists.txt (100%) rename python/rmm/rmm/{python => pylibrmm}/__init__.py (100%) rename python/rmm/rmm/{python => pylibrmm}/cuda_stream.pxd (94%) rename python/rmm/rmm/{python => pylibrmm}/cuda_stream.pyx (95%) rename python/rmm/rmm/{python => pylibrmm}/device_buffer.pxd (95%) rename python/rmm/rmm/{python => pylibrmm}/device_buffer.pyx (97%) rename python/rmm/rmm/{python => pylibrmm}/helper.pxd (100%) rename python/rmm/rmm/{python => pylibrmm}/helper.pyx (100%) rename python/rmm/rmm/{python => pylibrmm}/logger.pyx (98%) rename python/rmm/rmm/{python => pylibrmm}/memory_resource.pxd (97%) rename python/rmm/rmm/{python => pylibrmm}/memory_resource.pyx (99%) rename python/rmm/rmm/{python => pylibrmm}/tests/__init__.py (100%) rename python/rmm/rmm/{python => pylibrmm}/tests/test_device_buffer.pyx (87%) diff --git a/python/rmm/CMakeLists.txt b/python/rmm/CMakeLists.txt index 119888a4f..ac8495e14 100644 --- a/python/rmm/CMakeLists.txt +++ b/python/rmm/CMakeLists.txt @@ -30,5 +30,5 @@ rapids_cython_init() add_compile_definitions("SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}") add_subdirectory(rmm/_cuda) -add_subdirectory(rmm/cpp) -add_subdirectory(rmm/python) +add_subdirectory(rmm/librmm) +add_subdirectory(rmm/pylibrmm) diff --git a/python/rmm/rmm/__init__.py b/python/rmm/rmm/__init__.py index 3c55a5cbd..c52818a75 100644 --- a/python/rmm/rmm/__init__.py +++ b/python/rmm/rmm/__init__.py @@ -15,8 +15,8 @@ from rmm import mr from rmm._version import __git_commit__, __version__ from rmm.mr import disable_logging, enable_logging, get_log_filenames -from rmm.python.device_buffer import DeviceBuffer -from rmm.python.logger import ( +from rmm.pylibrmm.device_buffer import DeviceBuffer +from rmm.pylibrmm.logger import ( flush_logger, get_flush_level, get_logging_level, @@ -60,10 +60,10 @@ def __getattr__(name): import warnings warnings.warn( - "The `rmm._lib` module is deprecated in will be removed in a future release. Use `rmm.python` instead.", + "The `rmm._lib` module is deprecated in will be removed in a future release. Use `rmm.pylibrmm` instead.", FutureWarning, ) - module = importlib.import_module("rmm.python") + module = importlib.import_module("rmm.pylibrmm") return module else: raise AttributeError(f"Module '{__name__}' has no attribute '{name}'") diff --git a/python/rmm/rmm/_cuda/stream.pxd b/python/rmm/rmm/_cuda/stream.pxd index c978074fc..e91e2ce58 100644 --- a/python/rmm/rmm/_cuda/stream.pxd +++ b/python/rmm/rmm/_cuda/stream.pxd @@ -16,7 +16,7 @@ from cuda.ccudart cimport cudaStream_t from libc.stdint cimport uintptr_t from libcpp cimport bool -from rmm.cpp.cuda_stream_view cimport cuda_stream_view +from rmm.librmm.cuda_stream_view cimport cuda_stream_view cdef class Stream: diff --git a/python/rmm/rmm/_cuda/stream.pyx b/python/rmm/rmm/_cuda/stream.pyx index 3c6c10c18..37dcbd610 100644 --- a/python/rmm/rmm/_cuda/stream.pyx +++ b/python/rmm/rmm/_cuda/stream.pyx @@ -16,13 +16,13 @@ from cuda.ccudart cimport cudaStream_t from libc.stdint cimport uintptr_t from libcpp cimport bool -from rmm.cpp.cuda_stream_view cimport ( +from rmm.librmm.cuda_stream_view cimport ( cuda_stream_default, cuda_stream_legacy, cuda_stream_per_thread, cuda_stream_view, ) -from rmm.python.cuda_stream cimport CudaStream +from rmm.pylibrmm.cuda_stream cimport CudaStream cdef class Stream: diff --git a/python/rmm/rmm/_lib/__init__.py b/python/rmm/rmm/_lib/__init__.py index 9b6d1daf4..965d8e917 100644 --- a/python/rmm/rmm/_lib/__init__.py +++ b/python/rmm/rmm/_lib/__init__.py @@ -15,8 +15,8 @@ import warnings warnings.warn( - "The `rmm._lib` module is deprecated in will be removed in a future release. Use `rmm.python` instead.", + "The `rmm._lib` module is deprecated in will be removed in a future release. Use `rmm.pylibrmm` instead.", FutureWarning, ) -from rmm.python import * +from rmm.pylibrmm import * diff --git a/python/rmm/rmm/_lib/cuda_stream.pxd b/python/rmm/rmm/_lib/cuda_stream.pxd new file mode 100644 index 000000000..f7b5759bd --- /dev/null +++ b/python/rmm/rmm/_lib/cuda_stream.pxd @@ -0,0 +1,15 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.pylibrmm.cuda_stream cimport CudaStream diff --git a/python/rmm/rmm/_lib/device_buffer.pxd b/python/rmm/rmm/_lib/device_buffer.pxd new file mode 100644 index 000000000..66c19d990 --- /dev/null +++ b/python/rmm/rmm/_lib/device_buffer.pxd @@ -0,0 +1,21 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.pylibrmm.device_buffer cimport ( + DeviceBuffer, + copy_device_to_ptr, + copy_host_to_ptr, + copy_ptr_to_host, + to_device, +) diff --git a/python/rmm/rmm/_lib/helper.pxd b/python/rmm/rmm/_lib/helper.pxd new file mode 100644 index 000000000..4a5159435 --- /dev/null +++ b/python/rmm/rmm/_lib/helper.pxd @@ -0,0 +1,15 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.pylibrmm.helper cimport parse_bytes diff --git a/python/rmm/rmm/_lib/memory_resource.pxd b/python/rmm/rmm/_lib/memory_resource.pxd new file mode 100644 index 000000000..79a28cac2 --- /dev/null +++ b/python/rmm/rmm/_lib/memory_resource.pxd @@ -0,0 +1,34 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.pylibrmm.memory_resource cimport ( + BinningMemoryResource, + CallbackMemoryResource, + CudaAsyncMemoryResource, + CudaMemoryResource, + DeviceMemoryResource, + FailureCallbackResourceAdaptor, + FixedSizeMemoryResource, + LimitingResourceAdaptor, + LoggingResourceAdaptor, + ManagedMemoryResource, + PoolMemoryResource, + PrefetchResourceAdaptor, + SamHeadroomMemoryResource, + StatisticsResourceAdaptor, + SystemMemoryResource, + TrackingResourceAdaptor, + UpstreamResourceAdaptor, + get_current_device_resource, +) diff --git a/python/rmm/rmm/allocators/cupy.py b/python/rmm/rmm/allocators/cupy.py index 94003541f..780ff2abf 100644 --- a/python/rmm/rmm/allocators/cupy.py +++ b/python/rmm/rmm/allocators/cupy.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from rmm import python as pylibrmm +from rmm import pylibrmm from rmm._cuda.stream import Stream try: diff --git a/python/rmm/rmm/allocators/numba.py b/python/rmm/rmm/allocators/numba.py index d5bfdcf9f..fd9bacb5a 100644 --- a/python/rmm/rmm/allocators/numba.py +++ b/python/rmm/rmm/allocators/numba.py @@ -19,7 +19,7 @@ from numba import config, cuda from numba.cuda import HostOnlyCUDAMemoryManager, IpcHandle, MemoryPointer -from rmm import python as pylibrmm +from rmm import pylibrmm def _make_emm_plugin_finalizer(handle, allocations): diff --git a/python/rmm/rmm/allocators/torch.py b/python/rmm/rmm/allocators/torch.py index af675d8c8..462a06e80 100644 --- a/python/rmm/rmm/allocators/torch.py +++ b/python/rmm/rmm/allocators/torch.py @@ -29,7 +29,7 @@ # is pure Python and will therefore be in the source directory. # Instead, we search relative to an arbitrary file in the compiled # package. We use the cpp._logger module because it is small. - from rmm.cpp import _logger + from rmm.librmm import _logger sofile = pathlib.Path(_logger.__file__).parent / "_torch_allocator.so" rmm_torch_allocator = CUDAPluggableAllocator( diff --git a/python/rmm/rmm/cpp/CMakeLists.txt b/python/rmm/rmm/librmm/CMakeLists.txt similarity index 100% rename from python/rmm/rmm/cpp/CMakeLists.txt rename to python/rmm/rmm/librmm/CMakeLists.txt diff --git a/python/rmm/rmm/cpp/__init__.py b/python/rmm/rmm/librmm/__init__.py similarity index 100% rename from python/rmm/rmm/cpp/__init__.py rename to python/rmm/rmm/librmm/__init__.py diff --git a/python/rmm/rmm/cpp/_logger.pxd b/python/rmm/rmm/librmm/_logger.pxd similarity index 100% rename from python/rmm/rmm/cpp/_logger.pxd rename to python/rmm/rmm/librmm/_logger.pxd diff --git a/python/rmm/rmm/cpp/_logger.pyx b/python/rmm/rmm/librmm/_logger.pyx similarity index 100% rename from python/rmm/rmm/cpp/_logger.pyx rename to python/rmm/rmm/librmm/_logger.pyx diff --git a/python/rmm/rmm/cpp/_torch_allocator.cpp b/python/rmm/rmm/librmm/_torch_allocator.cpp similarity index 100% rename from python/rmm/rmm/cpp/_torch_allocator.cpp rename to python/rmm/rmm/librmm/_torch_allocator.cpp diff --git a/python/rmm/rmm/cpp/cuda_stream.pxd b/python/rmm/rmm/librmm/cuda_stream.pxd similarity index 94% rename from python/rmm/rmm/cpp/cuda_stream.pxd rename to python/rmm/rmm/librmm/cuda_stream.pxd index fcd84198c..3f2ac3361 100644 --- a/python/rmm/rmm/cpp/cuda_stream.pxd +++ b/python/rmm/rmm/librmm/cuda_stream.pxd @@ -15,7 +15,7 @@ from cuda.ccudart cimport cudaStream_t from libcpp cimport bool -from rmm.cpp.cuda_stream_view cimport cuda_stream_view +from rmm.librmm.cuda_stream_view cimport cuda_stream_view cdef extern from "rmm/cuda_stream.hpp" namespace "rmm" nogil: diff --git a/python/rmm/rmm/cpp/cuda_stream_pool.pxd b/python/rmm/rmm/librmm/cuda_stream_pool.pxd similarity index 93% rename from python/rmm/rmm/cpp/cuda_stream_pool.pxd rename to python/rmm/rmm/librmm/cuda_stream_pool.pxd index f72cfd7f6..4f2cbb36d 100644 --- a/python/rmm/rmm/cpp/cuda_stream_pool.pxd +++ b/python/rmm/rmm/librmm/cuda_stream_pool.pxd @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rmm.cpp.cuda_stream_view cimport cuda_stream_view +from rmm.librmm.cuda_stream_view cimport cuda_stream_view cdef extern from "rmm/cuda_stream_pool.hpp" namespace "rmm" nogil: diff --git a/python/rmm/rmm/cpp/cuda_stream_view.pxd b/python/rmm/rmm/librmm/cuda_stream_view.pxd similarity index 100% rename from python/rmm/rmm/cpp/cuda_stream_view.pxd rename to python/rmm/rmm/librmm/cuda_stream_view.pxd diff --git a/python/rmm/rmm/cpp/device_buffer.pxd b/python/rmm/rmm/librmm/device_buffer.pxd similarity index 94% rename from python/rmm/rmm/cpp/device_buffer.pxd rename to python/rmm/rmm/librmm/device_buffer.pxd index a3801cf05..1c503ac9a 100644 --- a/python/rmm/rmm/cpp/device_buffer.pxd +++ b/python/rmm/rmm/librmm/device_buffer.pxd @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rmm.cpp.cuda_stream_view cimport cuda_stream_view -from rmm.cpp.memory_resource cimport device_memory_resource +from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from rmm.librmm.memory_resource cimport device_memory_resource cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil: diff --git a/python/rmm/rmm/cpp/device_uvector.pxd b/python/rmm/rmm/librmm/device_uvector.pxd similarity index 89% rename from python/rmm/rmm/cpp/device_uvector.pxd rename to python/rmm/rmm/librmm/device_uvector.pxd index 2cb647e3c..f560a9e38 100644 --- a/python/rmm/rmm/cpp/device_uvector.pxd +++ b/python/rmm/rmm/librmm/device_uvector.pxd @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rmm.cpp.cuda_stream_view cimport cuda_stream_view -from rmm.cpp.device_buffer cimport device_buffer -from rmm.cpp.memory_resource cimport device_memory_resource +from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from rmm.librmm.device_buffer cimport device_buffer +from rmm.librmm.memory_resource cimport device_memory_resource cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: diff --git a/python/rmm/rmm/cpp/memory_resource.pxd b/python/rmm/rmm/librmm/memory_resource.pxd similarity index 98% rename from python/rmm/rmm/cpp/memory_resource.pxd rename to python/rmm/rmm/librmm/memory_resource.pxd index 5e457b653..a8e932e6c 100644 --- a/python/rmm/rmm/cpp/memory_resource.pxd +++ b/python/rmm/rmm/librmm/memory_resource.pxd @@ -23,8 +23,8 @@ from libcpp.optional cimport optional from libcpp.pair cimport pair from libcpp.string cimport string -from rmm.cpp.cuda_stream_view cimport cuda_stream_view -from rmm.cpp.memory_resource cimport device_memory_resource +from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from rmm.librmm.memory_resource cimport device_memory_resource cdef extern from "rmm/mr/device/device_memory_resource.hpp" \ diff --git a/python/rmm/rmm/cpp/per_device_resource.pxd b/python/rmm/rmm/librmm/per_device_resource.pxd similarity index 95% rename from python/rmm/rmm/cpp/per_device_resource.pxd rename to python/rmm/rmm/librmm/per_device_resource.pxd index ec43d5f99..63ee29056 100644 --- a/python/rmm/rmm/cpp/per_device_resource.pxd +++ b/python/rmm/rmm/librmm/per_device_resource.pxd @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from rmm.cpp.memory_resource cimport device_memory_resource +from rmm.librmm.memory_resource cimport device_memory_resource cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil: diff --git a/python/rmm/rmm/mr.py b/python/rmm/rmm/mr.py index f820cf805..3f0c3fce3 100644 --- a/python/rmm/rmm/mr.py +++ b/python/rmm/rmm/mr.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from rmm.python.memory_resource import ( +from rmm.pylibrmm.memory_resource import ( BinningMemoryResource, CallbackMemoryResource, CudaAsyncMemoryResource, diff --git a/python/rmm/rmm/python/CMakeLists.txt b/python/rmm/rmm/pylibrmm/CMakeLists.txt similarity index 100% rename from python/rmm/rmm/python/CMakeLists.txt rename to python/rmm/rmm/pylibrmm/CMakeLists.txt diff --git a/python/rmm/rmm/python/__init__.py b/python/rmm/rmm/pylibrmm/__init__.py similarity index 100% rename from python/rmm/rmm/python/__init__.py rename to python/rmm/rmm/pylibrmm/__init__.py diff --git a/python/rmm/rmm/python/cuda_stream.pxd b/python/rmm/rmm/pylibrmm/cuda_stream.pxd similarity index 94% rename from python/rmm/rmm/python/cuda_stream.pxd rename to python/rmm/rmm/pylibrmm/cuda_stream.pxd index 169e560a4..dd38387c2 100644 --- a/python/rmm/rmm/python/cuda_stream.pxd +++ b/python/rmm/rmm/pylibrmm/cuda_stream.pxd @@ -17,7 +17,7 @@ from cuda.ccudart cimport cudaStream_t from libcpp cimport bool from libcpp.memory cimport unique_ptr -from rmm.cpp.cuda_stream cimport cuda_stream +from rmm.librmm.cuda_stream cimport cuda_stream @cython.final diff --git a/python/rmm/rmm/python/cuda_stream.pyx b/python/rmm/rmm/pylibrmm/cuda_stream.pyx similarity index 95% rename from python/rmm/rmm/python/cuda_stream.pyx rename to python/rmm/rmm/pylibrmm/cuda_stream.pyx index de02291d4..d6aa4edc7 100644 --- a/python/rmm/rmm/python/cuda_stream.pyx +++ b/python/rmm/rmm/pylibrmm/cuda_stream.pyx @@ -16,7 +16,7 @@ cimport cython from cuda.ccudart cimport cudaStream_t from libcpp cimport bool -from rmm.cpp.cuda_stream cimport cuda_stream +from rmm.librmm.cuda_stream cimport cuda_stream @cython.final diff --git a/python/rmm/rmm/python/device_buffer.pxd b/python/rmm/rmm/pylibrmm/device_buffer.pxd similarity index 95% rename from python/rmm/rmm/python/device_buffer.pxd rename to python/rmm/rmm/pylibrmm/device_buffer.pxd index 2d4d932af..a0d287423 100644 --- a/python/rmm/rmm/python/device_buffer.pxd +++ b/python/rmm/rmm/pylibrmm/device_buffer.pxd @@ -16,8 +16,8 @@ from libc.stdint cimport uintptr_t from libcpp.memory cimport unique_ptr from rmm._cuda.stream cimport Stream -from rmm.cpp.device_buffer cimport device_buffer -from rmm.python.memory_resource cimport DeviceMemoryResource +from rmm.librmm.device_buffer cimport device_buffer +from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource cdef class DeviceBuffer: diff --git a/python/rmm/rmm/python/device_buffer.pyx b/python/rmm/rmm/pylibrmm/device_buffer.pyx similarity index 97% rename from python/rmm/rmm/python/device_buffer.pyx rename to python/rmm/rmm/pylibrmm/device_buffer.pyx index 73b0d32ec..76fbceef8 100644 --- a/python/rmm/rmm/python/device_buffer.pyx +++ b/python/rmm/rmm/pylibrmm/device_buffer.pyx @@ -32,15 +32,15 @@ from cuda.ccudart cimport ( cudaStream_t, ) -from rmm.cpp.cuda_stream_view cimport cuda_stream_view -from rmm.cpp.device_buffer cimport ( +from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from rmm.librmm.device_buffer cimport ( cuda_device_id, device_buffer, get_current_cuda_device, prefetch, ) -from rmm.cpp.memory_resource cimport device_memory_resource -from rmm.python.memory_resource cimport ( +from rmm.librmm.memory_resource cimport device_memory_resource +from rmm.pylibrmm.memory_resource cimport ( DeviceMemoryResource, get_current_device_resource, ) @@ -401,7 +401,7 @@ cpdef DeviceBuffer to_device(const unsigned char[::1] b, Examples -------- >>> import rmm - >>> db = rmm.python.device_buffer.to_device(b"abc") + >>> db = rmm.pylibrmm.device_buffer.to_device(b"abc") >>> print(bytes(db)) b'abc' """ @@ -467,7 +467,7 @@ cpdef void copy_ptr_to_host(uintptr_t db, >>> import rmm >>> db = rmm.DeviceBuffer.to_device(b"abc") >>> hb = bytearray(db.nbytes) - >>> rmm.python.device_buffer.copy_ptr_to_host(db.ptr, hb) + >>> rmm.pylibrmm.device_buffer.copy_ptr_to_host(db.ptr, hb) >>> print(hb) bytearray(b'abc') """ @@ -509,7 +509,7 @@ cpdef void copy_host_to_ptr(const unsigned char[::1] hb, >>> import rmm >>> db = rmm.DeviceBuffer(size=10) >>> hb = b"abc" - >>> rmm.python.device_buffer.copy_host_to_ptr(hb, db.ptr) + >>> rmm.pylibrmm.device_buffer.copy_host_to_ptr(hb, db.ptr) >>> hb = db.copy_to_host() >>> print(hb) array([97, 98, 99, 0, 0, 0, 0, 0, 0, 0], dtype=uint8) @@ -548,7 +548,7 @@ cpdef void copy_device_to_ptr(uintptr_t d_src, >>> import rmm >>> db = rmm.DeviceBuffer(size=5) >>> db2 = rmm.DeviceBuffer.to_device(b"abc") - >>> rmm.python.device_buffer.copy_device_to_ptr(db2.ptr, db.ptr, db2.size) + >>> rmm.pylibrmm.device_buffer.copy_device_to_ptr(db2.ptr, db.ptr, db2.size) >>> hb = db.copy_to_host() >>> hb array([97, 98, 99, 0, 0], dtype=uint8) diff --git a/python/rmm/rmm/python/helper.pxd b/python/rmm/rmm/pylibrmm/helper.pxd similarity index 100% rename from python/rmm/rmm/python/helper.pxd rename to python/rmm/rmm/pylibrmm/helper.pxd diff --git a/python/rmm/rmm/python/helper.pyx b/python/rmm/rmm/pylibrmm/helper.pyx similarity index 100% rename from python/rmm/rmm/python/helper.pyx rename to python/rmm/rmm/pylibrmm/helper.pyx diff --git a/python/rmm/rmm/python/logger.pyx b/python/rmm/rmm/pylibrmm/logger.pyx similarity index 98% rename from python/rmm/rmm/python/logger.pyx rename to python/rmm/rmm/pylibrmm/logger.pyx index 15ba47308..119e1c92f 100644 --- a/python/rmm/rmm/python/logger.pyx +++ b/python/rmm/rmm/pylibrmm/logger.pyx @@ -14,9 +14,9 @@ import warnings -from rmm.cpp._logger cimport logger +from rmm.librmm._logger cimport logger -from rmm.cpp._logger import logging_level +from rmm.librmm._logger import logging_level def _validate_level_type(level): diff --git a/python/rmm/rmm/python/memory_resource.pxd b/python/rmm/rmm/pylibrmm/memory_resource.pxd similarity index 97% rename from python/rmm/rmm/python/memory_resource.pxd rename to python/rmm/rmm/pylibrmm/memory_resource.pxd index 4970d5230..985d5d31b 100644 --- a/python/rmm/rmm/python/memory_resource.pxd +++ b/python/rmm/rmm/pylibrmm/memory_resource.pxd @@ -14,7 +14,7 @@ from libcpp.memory cimport shared_ptr -from rmm.cpp.memory_resource cimport device_memory_resource +from rmm.librmm.memory_resource cimport device_memory_resource cdef class DeviceMemoryResource: diff --git a/python/rmm/rmm/python/memory_resource.pyx b/python/rmm/rmm/pylibrmm/memory_resource.pyx similarity index 99% rename from python/rmm/rmm/python/memory_resource.pyx rename to python/rmm/rmm/pylibrmm/memory_resource.pyx index 14585f36b..021125567 100644 --- a/python/rmm/rmm/python/memory_resource.pyx +++ b/python/rmm/rmm/pylibrmm/memory_resource.pyx @@ -36,16 +36,16 @@ from rmm._cuda.stream cimport Stream from rmm._cuda.stream import DEFAULT_STREAM -from rmm.cpp.cuda_stream_view cimport cuda_stream_view -from rmm.cpp.per_device_resource cimport ( +from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from rmm.librmm.per_device_resource cimport ( cuda_device_id, set_per_device_resource as cpp_set_per_device_resource, ) -from rmm.python.helper cimport parse_bytes +from rmm.pylibrmm.helper cimport parse_bytes from rmm.statistics import Statistics -from rmm.cpp.memory_resource cimport ( +from rmm.librmm.memory_resource cimport ( CppExcept, allocate_callback_t, allocation_handle_type, diff --git a/python/rmm/rmm/python/tests/__init__.py b/python/rmm/rmm/pylibrmm/tests/__init__.py similarity index 100% rename from python/rmm/rmm/python/tests/__init__.py rename to python/rmm/rmm/pylibrmm/tests/__init__.py diff --git a/python/rmm/rmm/python/tests/test_device_buffer.pyx b/python/rmm/rmm/pylibrmm/tests/test_device_buffer.pyx similarity index 87% rename from python/rmm/rmm/python/tests/test_device_buffer.pyx rename to python/rmm/rmm/pylibrmm/tests/test_device_buffer.pyx index 0783dd9c6..ec2ff4def 100644 --- a/python/rmm/rmm/python/tests/test_device_buffer.pyx +++ b/python/rmm/rmm/pylibrmm/tests/test_device_buffer.pyx @@ -16,9 +16,9 @@ import numpy as np from libcpp.memory cimport make_unique -from rmm.cpp.cuda_stream_view cimport cuda_stream_default -from rmm.cpp.device_buffer cimport device_buffer -from rmm.python.device_buffer cimport DeviceBuffer +from rmm.librmm.cuda_stream_view cimport cuda_stream_default +from rmm.librmm.device_buffer cimport device_buffer +from rmm.pylibrmm.device_buffer cimport DeviceBuffer def test_release(): diff --git a/python/rmm/rmm/tests/test_cython.py b/python/rmm/rmm/tests/test_cython.py index a0364942c..5df933435 100644 --- a/python/rmm/rmm/tests/test_cython.py +++ b/python/rmm/rmm/tests/test_cython.py @@ -29,7 +29,7 @@ def wrapped(*args, **kwargs): return wrapped -cython_test_modules = ["rmm.python.tests.test_device_buffer"] +cython_test_modules = ["rmm.pylibrmm.tests.test_device_buffer"] for mod in cython_test_modules: diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index 826071fce..7704cac25 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -354,7 +354,7 @@ def test_rmm_pool_numba_stream(stream): rmm.reinitialize(pool_allocator=True) stream = rmm._cuda.stream.Stream(stream) - a = rmm.python.device_buffer.DeviceBuffer(size=3, stream=stream) + a = rmm.pylibrmm.device_buffer.DeviceBuffer(size=3, stream=stream) assert a.size == 3 assert a.ptr != 0 From 0d0ea02d1ad98150606d710586841498d4017fc9 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 24 Sep 2024 12:47:54 -0700 Subject: [PATCH 09/13] address reviews --- python/rmm/rmm/_lib/cuda_stream.pxd | 1 + python/rmm/rmm/_lib/cuda_stream_pool.pxd | 15 +++++ python/rmm/rmm/_lib/cuda_stream_view.pxd | 15 +++++ python/rmm/rmm/_lib/device_buffer.pxd | 6 ++ python/rmm/rmm/_lib/device_uvector.pxd | 15 +++++ python/rmm/rmm/_lib/logger.pxd | 75 +++++++++++++++++++++ python/rmm/rmm/_lib/memory_resource.pxd | 27 ++++++++ python/rmm/rmm/_lib/per_device_resource.pxd | 21 ++++++ python/rmm/rmm/allocators/torch.py | 2 +- python/rmm/rmm/librmm/_logger.pyx | 2 +- python/rmm/rmm/librmm/memory_resource.pxd | 2 - 11 files changed, 177 insertions(+), 4 deletions(-) create mode 100644 python/rmm/rmm/_lib/cuda_stream_pool.pxd create mode 100644 python/rmm/rmm/_lib/cuda_stream_view.pxd create mode 100644 python/rmm/rmm/_lib/device_uvector.pxd create mode 100644 python/rmm/rmm/_lib/logger.pxd create mode 100644 python/rmm/rmm/_lib/per_device_resource.pxd diff --git a/python/rmm/rmm/_lib/cuda_stream.pxd b/python/rmm/rmm/_lib/cuda_stream.pxd index f7b5759bd..afc365fbb 100644 --- a/python/rmm/rmm/_lib/cuda_stream.pxd +++ b/python/rmm/rmm/_lib/cuda_stream.pxd @@ -12,4 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. +from rmm.librmm.cuda_stream cimport cuda_stream from rmm.pylibrmm.cuda_stream cimport CudaStream diff --git a/python/rmm/rmm/_lib/cuda_stream_pool.pxd b/python/rmm/rmm/_lib/cuda_stream_pool.pxd new file mode 100644 index 000000000..4da59cc68 --- /dev/null +++ b/python/rmm/rmm/_lib/cuda_stream_pool.pxd @@ -0,0 +1,15 @@ +# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.librmm.cuda_stream_pool cimport cuda_stream_pool diff --git a/python/rmm/rmm/_lib/cuda_stream_view.pxd b/python/rmm/rmm/_lib/cuda_stream_view.pxd new file mode 100644 index 000000000..78480420f --- /dev/null +++ b/python/rmm/rmm/_lib/cuda_stream_view.pxd @@ -0,0 +1,15 @@ +# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.librmm.cuda_stream_view cimport cuda_stream_view diff --git a/python/rmm/rmm/_lib/device_buffer.pxd b/python/rmm/rmm/_lib/device_buffer.pxd index 66c19d990..22833b1b8 100644 --- a/python/rmm/rmm/_lib/device_buffer.pxd +++ b/python/rmm/rmm/_lib/device_buffer.pxd @@ -12,6 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from rmm.librmm.device_buffer cimport ( + cuda_device_id, + device_buffer, + get_current_cuda_device, + prefetch, +) from rmm.pylibrmm.device_buffer cimport ( DeviceBuffer, copy_device_to_ptr, diff --git a/python/rmm/rmm/_lib/device_uvector.pxd b/python/rmm/rmm/_lib/device_uvector.pxd new file mode 100644 index 000000000..230b0afb3 --- /dev/null +++ b/python/rmm/rmm/_lib/device_uvector.pxd @@ -0,0 +1,15 @@ +# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.librmm.device_uvector cimport device_uvector diff --git a/python/rmm/rmm/_lib/logger.pxd b/python/rmm/rmm/_lib/logger.pxd new file mode 100644 index 000000000..27936d09c --- /dev/null +++ b/python/rmm/rmm/_lib/logger.pxd @@ -0,0 +1,75 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.librmm.logger cimport logger, logging_level, spdlog_logger +from rmm.pylibrmm.logger cimport ( + _validate_level_type, + flush_logger, + get_flush_level, + get_logging_level, + set_flush_level, + set_logging_level, + should_log, +) + + +cdef extern from "spdlog/common.h" namespace "spdlog::level" nogil: + cpdef enum logging_level "spdlog::level::level_enum": + """ + The debug logging level for RMM. + + Debug logging prints messages to a log file. See + `Debug Logging `_ + for more information. + + Valid levels, in decreasing order of verbosity, are TRACE, DEBUG, + INFO, WARN, ERR, CRITICAL, and OFF. Default is INFO. + + Examples + -------- + >>> import rmm + >>> rmm.logging_level.DEBUG + + >>> rmm.logging_level.DEBUG.value + 1 + >>> rmm.logging_level.DEBUG.name + 'DEBUG' + + See Also + -------- + set_logging_level : Set the debug logging level + get_logging_level : Get the current debug logging level + """ + TRACE "spdlog::level::trace" + DEBUG "spdlog::level::debug" + INFO "spdlog::level::info" + WARN "spdlog::level::warn" + ERR "spdlog::level::err" + CRITICAL "spdlog::level::critical" + OFF "spdlog::level::off" + + +cdef extern from "spdlog/spdlog.h" namespace "spdlog" nogil: + cdef cppclass spdlog_logger "spdlog::logger": + spdlog_logger() except + + void set_level(logging_level level) + logging_level level() + void flush() except + + void flush_on(logging_level level) + logging_level flush_level() + bool should_log(logging_level msg_level) + + +cdef extern from "rmm/logger.hpp" namespace "rmm" nogil: + cdef spdlog_logger& logger() except + diff --git a/python/rmm/rmm/_lib/memory_resource.pxd b/python/rmm/rmm/_lib/memory_resource.pxd index 79a28cac2..983063914 100644 --- a/python/rmm/rmm/_lib/memory_resource.pxd +++ b/python/rmm/rmm/_lib/memory_resource.pxd @@ -12,6 +12,33 @@ # See the License for the specific language governing permissions and # limitations under the License. +from rmm.librmm.memory_resource cimport ( + CppExcept, + allocate_callback_t, + allocation_handle_type, + available_device_memory, + binning_memory_resource, + callback_memory_resource, + cuda_async_memory_resource, + cuda_memory_resource, + deallocate_callback_t, + device_memory_resource, + failure_callback_resource_adaptor, + failure_callback_t, + fixed_size_memory_resource, + limiting_resource_adaptor, + logging_resource_adaptor, + managed_memory_resource, + percent_of_free_device_memory, + pool_memory_resource, + prefetch_resource_adaptor, + sam_headroom_memory_resource, + statistics_resource_adaptor, + system_memory_resource, + throw_cpp_except, + tracking_resource_adaptor, + translate_python_except_to_cpp, +) from rmm.pylibrmm.memory_resource cimport ( BinningMemoryResource, CallbackMemoryResource, diff --git a/python/rmm/rmm/_lib/per_device_resource.pxd b/python/rmm/rmm/_lib/per_device_resource.pxd new file mode 100644 index 000000000..29487f503 --- /dev/null +++ b/python/rmm/rmm/_lib/per_device_resource.pxd @@ -0,0 +1,21 @@ +# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.librmm.per_device_resource cimport ( + cuda_device_id, + get_current_device_resource, + get_per_device_resource, + set_current_device_resource, + set_per_device_resource, +) diff --git a/python/rmm/rmm/allocators/torch.py b/python/rmm/rmm/allocators/torch.py index 462a06e80..eee0e9df9 100644 --- a/python/rmm/rmm/allocators/torch.py +++ b/python/rmm/rmm/allocators/torch.py @@ -28,7 +28,7 @@ # allocator .so relative to the current file because the current file # is pure Python and will therefore be in the source directory. # Instead, we search relative to an arbitrary file in the compiled - # package. We use the cpp._logger module because it is small. + # package. We use the librmm._logger module because it is small. from rmm.librmm import _logger sofile = pathlib.Path(_logger.__file__).parent / "_torch_allocator.so" diff --git a/python/rmm/rmm/librmm/_logger.pyx b/python/rmm/rmm/librmm/_logger.pyx index e848c6dda..4392cb106 100644 --- a/python/rmm/rmm/librmm/_logger.pyx +++ b/python/rmm/rmm/librmm/_logger.pyx @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._logger cimport logging_level # no-cython-lint +from rmm.librmm._logger cimport logging_level # no-cython-lint diff --git a/python/rmm/rmm/librmm/memory_resource.pxd b/python/rmm/rmm/librmm/memory_resource.pxd index a8e932e6c..9ddaf04b9 100644 --- a/python/rmm/rmm/librmm/memory_resource.pxd +++ b/python/rmm/rmm/librmm/memory_resource.pxd @@ -79,8 +79,6 @@ cdef extern from *: void throw_cpp_except(CppExcept) nogil -# NOTE: Keep extern declarations in .pyx file as much as possible to avoid -# leaking dependencies when importing RMM Cython .pxd files cdef extern from "rmm/mr/device/cuda_memory_resource.hpp" \ namespace "rmm::mr" nogil: cdef cppclass cuda_memory_resource(device_memory_resource): From bfe271ce86e4aabbc7e111cab4d8e84b03d4ba23 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 24 Sep 2024 20:48:16 -0700 Subject: [PATCH 10/13] add missing imports, clean up --- python/rmm/rmm/_lib/cuda_stream_view.pxd | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/rmm/rmm/_lib/cuda_stream_view.pxd b/python/rmm/rmm/_lib/cuda_stream_view.pxd index 78480420f..c336b0fe8 100644 --- a/python/rmm/rmm/_lib/cuda_stream_view.pxd +++ b/python/rmm/rmm/_lib/cuda_stream_view.pxd @@ -12,4 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from rmm.librmm.cuda_stream_view cimport ( + cuda_stream_default, + cuda_stream_legacy, + cuda_stream_per_thread, + cuda_stream_view, +) From efe3a48a5e456123c7ef82140dfef5e9197d30e8 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 25 Sep 2024 07:57:19 -0700 Subject: [PATCH 11/13] address review --- python/rmm/rmm/__init__.py | 5 ----- python/rmm/rmm/_lib/__init__.py | 7 ------- python/rmm/rmm/tests/test_rmm.py | 6 ------ 3 files changed, 18 deletions(-) diff --git a/python/rmm/rmm/__init__.py b/python/rmm/rmm/__init__.py index c52818a75..b23ad68f9 100644 --- a/python/rmm/rmm/__init__.py +++ b/python/rmm/rmm/__init__.py @@ -57,12 +57,7 @@ def __getattr__(name): if name == "_lib": import importlib - import warnings - warnings.warn( - "The `rmm._lib` module is deprecated in will be removed in a future release. Use `rmm.pylibrmm` instead.", - FutureWarning, - ) module = importlib.import_module("rmm.pylibrmm") return module else: diff --git a/python/rmm/rmm/_lib/__init__.py b/python/rmm/rmm/_lib/__init__.py index 965d8e917..7cfddab60 100644 --- a/python/rmm/rmm/_lib/__init__.py +++ b/python/rmm/rmm/_lib/__init__.py @@ -12,11 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -import warnings - -warnings.warn( - "The `rmm._lib` module is deprecated in will be removed in a future release. Use `rmm.pylibrmm` instead.", - FutureWarning, -) - from rmm.pylibrmm import * diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index 7704cac25..c03b9e501 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -1076,9 +1076,3 @@ def test_available_device_memory(): assert initial_memory[1] == final_memory[1] assert initial_memory[0] > 0 assert final_memory[0] > 0 - - -# TODO: Remove test when rmm._lib is removed in 24.12 -def test_deprecate_rmm_lib(): - with pytest.warns(FutureWarning): - rmm._lib.device_buffer.DeviceBuffer(size=100) From 30b4ebceab7128c218fddea3c5aa6adff942046a Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 1 Oct 2024 12:02:58 -0700 Subject: [PATCH 12/13] fix docs --- python/rmm/docs/guide.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/rmm/docs/guide.md b/python/rmm/docs/guide.md index 22c0dc023..c7e940497 100644 --- a/python/rmm/docs/guide.md +++ b/python/rmm/docs/guide.md @@ -236,17 +236,17 @@ Common to both usages is that they modify the currently active RMM memory resour >>> # We start with the default cuda memory resource >>> rmm.mr.get_current_device_resource() - + >>> # When using statistics, we get a StatisticsResourceAdaptor with the context >>> with rmm.statistics.statistics(): ... rmm.mr.get_current_device_resource() - + >>> # We can also enable statistics globally >>> rmm.statistics.enable_statistics() >>> print(rmm.mr.get_current_device_resource()) - + ``` With statistics enabled, you can query statistics of the current and peak bytes and number of allocations performed by the current RMM memory resource: From 6ee07a855481864bc8c1f15e4feb987adef690bc Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 2 Oct 2024 16:38:46 -0700 Subject: [PATCH 13/13] address review --- .gitignore | 11 ++++--- python/rmm/rmm/_lib/logger.pxd | 53 +--------------------------------- 2 files changed, 8 insertions(+), 56 deletions(-) diff --git a/.gitignore b/.gitignore index 2d0b150e1..36aafe643 100644 --- a/.gitignore +++ b/.gitignore @@ -22,10 +22,13 @@ rmm.egg-info/ python/build python/*/build python/rmm/docs/_build -python/rmm/**/_lib/**/*.cpp -!python/rmm/_lib/_torch_allocator.cpp -python/rmm/**/_lib/**/*.h -python/rmm/**/_lib/.nfs* +python/rmm/**/librmmm/**/*.cpp +!python/rmm/librmmm/_torch_allocator.cpp +python/rmm/**/librmm/**/*.h +python/rmm/**/librmm/.nfs* +python/rmm/**/pylibrmmm/**/*.cpp +python/rmm/**/pylibrmmm/**/*.h +python/rmm/**/pylibrmmm/.nfs* python/rmm/_cuda/*.cpp python/rmm/tests/*.cpp python/rmm/*.ipynb diff --git a/python/rmm/rmm/_lib/logger.pxd b/python/rmm/rmm/_lib/logger.pxd index 27936d09c..bef05c903 100644 --- a/python/rmm/rmm/_lib/logger.pxd +++ b/python/rmm/rmm/_lib/logger.pxd @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rmm.librmm.logger cimport logger, logging_level, spdlog_logger +from rmm.librmm._logger cimport logger, logging_level, spdlog_logger from rmm.pylibrmm.logger cimport ( _validate_level_type, flush_logger, @@ -22,54 +22,3 @@ from rmm.pylibrmm.logger cimport ( set_logging_level, should_log, ) - - -cdef extern from "spdlog/common.h" namespace "spdlog::level" nogil: - cpdef enum logging_level "spdlog::level::level_enum": - """ - The debug logging level for RMM. - - Debug logging prints messages to a log file. See - `Debug Logging `_ - for more information. - - Valid levels, in decreasing order of verbosity, are TRACE, DEBUG, - INFO, WARN, ERR, CRITICAL, and OFF. Default is INFO. - - Examples - -------- - >>> import rmm - >>> rmm.logging_level.DEBUG - - >>> rmm.logging_level.DEBUG.value - 1 - >>> rmm.logging_level.DEBUG.name - 'DEBUG' - - See Also - -------- - set_logging_level : Set the debug logging level - get_logging_level : Get the current debug logging level - """ - TRACE "spdlog::level::trace" - DEBUG "spdlog::level::debug" - INFO "spdlog::level::info" - WARN "spdlog::level::warn" - ERR "spdlog::level::err" - CRITICAL "spdlog::level::critical" - OFF "spdlog::level::off" - - -cdef extern from "spdlog/spdlog.h" namespace "spdlog" nogil: - cdef cppclass spdlog_logger "spdlog::logger": - spdlog_logger() except + - void set_level(logging_level level) - logging_level level() - void flush() except + - void flush_on(logging_level level) - logging_level flush_level() - bool should_log(logging_level msg_level) - - -cdef extern from "rmm/logger.hpp" namespace "rmm" nogil: - cdef spdlog_logger& logger() except +