diff --git a/CMakeLists.txt b/CMakeLists.txt index 6b9c9fd66..9e8f47f5a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,6 +81,7 @@ endif(CUDA_STATIC_RUNTIME) target_link_libraries(rmm INTERFACE rmm::Thrust) target_link_libraries(rmm INTERFACE spdlog::spdlog_header_only) +target_link_libraries(rmm INTERFACE dl) target_compile_features(rmm INTERFACE cxx_std_17 $) # Set logging level. Must go before including gtests and benchmarks. diff --git a/include/rmm/detail/stack_trace.hpp b/include/rmm/detail/stack_trace.hpp index 6a5ecfbb9..358e95080 100644 --- a/include/rmm/detail/stack_trace.hpp +++ b/include/rmm/detail/stack_trace.hpp @@ -25,6 +25,8 @@ #include #if defined(RMM_ENABLE_STACK_TRACES) +#include +#include #include #include #include @@ -60,12 +62,32 @@ class stack_trace { #if defined(RMM_ENABLE_STACK_TRACES) std::unique_ptr strings( backtrace_symbols(st.stack_ptrs.data(), st.stack_ptrs.size()), &::free); + if (strings.get() == nullptr) { os << "But no stack trace could be found!" << std::endl; } else { - ///@todo: support for demangling of C++ symbol names + // Iterate over the stack pointers converting to a string for (std::size_t i = 0; i < st.stack_ptrs.size(); ++i) { - os << "#" << i << " in " << strings.get()[i] << std::endl; + // Leading index + os << "#" << i << " in "; + + auto const str = [&] { + Dl_info info; + if (dladdr(st.stack_ptrs[i], &info)) { + int status = -1; // Demangle the name. This can occasionally fail + + std::unique_ptr demangled( + abi::__cxa_demangle(info.dli_sname, nullptr, 0, &status), &::free); + // If it fails, fallback to the dli_name. + if (status == 0 or info.dli_sname) { + auto name = status == 0 ? demangled.get() : info.dli_sname; + return name + std::string(" from ") + info.dli_fname; + } + } + return std::string(strings.get()[i]); + }(); + + os << str << std::endl; } } #else diff --git a/include/rmm/mr/device/statistics_resource_adaptor.hpp b/include/rmm/mr/device/statistics_resource_adaptor.hpp new file mode 100644 index 000000000..d71dc52d4 --- /dev/null +++ b/include/rmm/mr/device/statistics_resource_adaptor.hpp @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +namespace rmm { +namespace mr { +/** + * @brief Resource that uses `Upstream` to allocate memory and tracks statistics + * on memory allocations. + * + * An instance of this resource can be constructed with an existing, upstream + * resource in order to satisfy allocation requests, but any existing + * allocations will be untracked. Tracking statistics stores the current, peak + * and total memory allocations for both the number of bytes and number of calls + * to the memory resource. `statistics_resource_adaptor` is intended as a debug + * adaptor and shouldn't be used in performance-sensitive code. + * + * @tparam Upstream Type of the upstream resource used for + * allocation/deallocation. + */ +template +class statistics_resource_adaptor final : public device_memory_resource { + public: + // can be a std::shared_mutex once C++17 is adopted + using read_lock_t = std::shared_lock; + using write_lock_t = std::unique_lock; + + /** + * @brief Utility struct for counting the current, peak, and total value of a number + */ + struct counter { + int64_t value{0}; // Current value + int64_t peak{0}; // Max value of `value` + int64_t total{0}; // Sum of all added values + + counter& operator+=(int64_t x) + { + value += x; + total += x; + peak = std::max(value, peak); + return *this; + } + + counter& operator-=(int64_t x) + { + value -= x; + return *this; + } + }; + + /** + * @brief Construct a new statistics resource adaptor using `upstream` to satisfy + * allocation requests. + * + * @throws `rmm::logic_error` if `upstream == nullptr` + * + * @param upstream The resource used for allocating/deallocating device memory + */ + statistics_resource_adaptor(Upstream* upstream) : upstream_{upstream} + { + RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer."); + } + + statistics_resource_adaptor() = delete; + virtual ~statistics_resource_adaptor() = default; + statistics_resource_adaptor(statistics_resource_adaptor const&) = delete; + statistics_resource_adaptor(statistics_resource_adaptor&&) = default; + statistics_resource_adaptor& operator=(statistics_resource_adaptor const&) = delete; + statistics_resource_adaptor& operator=(statistics_resource_adaptor&&) = default; + + /** + * @brief Return pointer to the upstream resource. + * + * @return Upstream* Pointer to the upstream resource. + */ + Upstream* get_upstream() const noexcept { return upstream_; } + + /** + * @brief Checks whether the upstream resource supports streams. + * + * @return true The upstream resource supports streams + * @return false The upstream resource does not support streams. + */ + bool supports_streams() const noexcept override { return upstream_->supports_streams(); } + + /** + * @brief Query whether the resource supports the get_mem_info API. + * + * @return bool true if the upstream resource supports get_mem_info, false otherwise. + */ + bool supports_get_mem_info() const noexcept override + { + return upstream_->supports_get_mem_info(); + } + + /** + * @brief Returns a `counter` struct for this adaptor containing the current, + * peak, and total number of allocated bytes for this + * adaptor since it was created. + * + * @return counter struct containing bytes count + */ + counter get_bytes_counter() const noexcept + { + read_lock_t lock(mtx_); + + return bytes_; + } + + /** + * @brief Returns a `counter` struct for this adaptor containing the current, + * peak, and total number of allocation counts for this adaptor since it was + * created. + * + * @return counter struct containing allocations count + */ + counter get_allocations_counter() const noexcept + { + read_lock_t lock(mtx_); + + return allocations_; + } + + private: + /** + * @brief Allocates memory of size at least `bytes` using the upstream + * resource as long as it fits inside the allocation limit. + * + * The returned pointer has at least 256B alignment. + * + * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled + * by the upstream resource. + * + * @param bytes The size, in bytes, of the allocation + * @param stream Stream on which to perform the allocation + * @return void* Pointer to the newly allocated memory + */ + void* do_allocate(std::size_t bytes, cuda_stream_view stream) override + { + void* p = upstream_->allocate(bytes, stream); + + // increment the stats + { + write_lock_t lock(mtx_); + + // Increment the allocation_count_ while we have the lock + bytes_ += bytes; + allocations_ += 1; + } + + return p; + } + + /** + * @brief Free allocation of size `bytes` pointed to by `p` + * + * @throws Nothing. + * + * @param p Pointer to be deallocated + * @param bytes Size of the allocation + * @param stream Stream on which to perform the deallocation + */ + void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + { + upstream_->deallocate(p, bytes, stream); + + { + write_lock_t lock(mtx_); + + // Decrement the current allocated counts. + bytes_ -= bytes; + allocations_ -= 1; + } + } + + /** + * @brief Compare the upstream resource to another. + * + * @throws Nothing. + * + * @param other The other resource to compare to + * @return true If the two resources are equivalent + * @return false If the two resources are not equal + */ + bool do_is_equal(device_memory_resource const& other) const noexcept override + { + if (this == &other) + return true; + else { + auto cast = dynamic_cast const*>(&other); + return cast != nullptr ? upstream_->is_equal(*cast->get_upstream()) + : upstream_->is_equal(other); + } + } + + /** + * @brief Get free and available memory from upstream resource. + * + * @throws `rmm::cuda_error` if unable to retrieve memory info. + * + * @param stream Stream on which to get the mem info. + * @return std::pair contaiing free_size and total_size of memory + */ + std::pair do_get_mem_info(cuda_stream_view stream) const override + { + return upstream_->get_mem_info(stream); + } + + counter bytes_; // peak, current and total allocated bytes + counter allocations_; // peak, current and total allocation count + std::shared_timed_mutex mutable mtx_; // mutex for thread safe access to allocations_ + Upstream* upstream_; // the upstream resource used for satisfying allocation requests +}; + +/** + * @brief Convenience factory to return a `statistics_resource_adaptor` around the + * upstream resource `upstream`. + * + * @tparam Upstream Type of the upstream `device_memory_resource`. + * @param upstream Pointer to the upstream resource + */ +template +statistics_resource_adaptor make_statistics_adaptor(Upstream* upstream) +{ + return statistics_resource_adaptor{upstream}; +} + +} // namespace mr +} // namespace rmm diff --git a/include/rmm/mr/device/tracking_resource_adaptor.hpp b/include/rmm/mr/device/tracking_resource_adaptor.hpp index d9873f493..0e2ca9c6e 100644 --- a/include/rmm/mr/device/tracking_resource_adaptor.hpp +++ b/include/rmm/mr/device/tracking_resource_adaptor.hpp @@ -81,7 +81,7 @@ class tracking_resource_adaptor final : public device_memory_resource { } tracking_resource_adaptor() = delete; - ~tracking_resource_adaptor() = default; + virtual ~tracking_resource_adaptor() = default; tracking_resource_adaptor(tracking_resource_adaptor const&) = delete; tracking_resource_adaptor(tracking_resource_adaptor&&) = default; tracking_resource_adaptor& operator=(tracking_resource_adaptor const&) = delete; @@ -136,15 +136,22 @@ class tracking_resource_adaptor final : public device_memory_resource { std::size_t get_allocated_bytes() const noexcept { return allocated_bytes_; } /** - * @brief Log any outstanding allocations via RMM_LOG_DEBUG + * @brief Gets a string containing the outstanding allocation pointers, their + * size, and optionally the stack trace for when each pointer was allocated. + * + * Stack traces are only included if this resource adaptor was created with + * `capture_stack == true`. Otherwise, outstanding allocation pointers will be + * shown with their size and empty stack traces. * + * @return std::string Containing the outstanding allocation pointers. */ - void log_outstanding_allocations() const + std::string get_outstanding_allocations_str() const { -#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG read_lock_t lock(mtx_); - if (not allocations_.empty()) { - std::ostringstream oss; + + std::ostringstream oss; + + if (!allocations_.empty()) { for (auto const& al : allocations_) { oss << al.first << ": " << al.second.allocation_size << " B"; if (al.second.strace != nullptr) { @@ -152,8 +159,19 @@ class tracking_resource_adaptor final : public device_memory_resource { } oss << std::endl; } - RMM_LOG_DEBUG("Outstanding Allocations: {}", oss.str()); } + + return oss.str(); + } + + /** + * @brief Log any outstanding allocations via RMM_LOG_DEBUG + * + */ + void log_outstanding_allocations() const + { +#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG + RMM_LOG_DEBUG("Outstanding Allocations: {}", get_outstanding_allocations_str()); #endif // SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG } @@ -199,7 +217,33 @@ class tracking_resource_adaptor final : public device_memory_resource { upstream_->deallocate(p, bytes, stream); { write_lock_t lock(mtx_); - allocations_.erase(p); + + const auto found = allocations_.find(p); + + // Ensure the allocation is found and the number of bytes match + if (found == allocations_.end()) { + // Don't throw but log an error. Throwing in a descructor (or any noexcept) will call + // std::terminate + RMM_LOG_ERROR( + "Deallocating a pointer that was not tracked. Ptr: {:p} [{}B], Current Num. Allocations: " + "{}", + fmt::ptr(p), + bytes, + this->allocations_.size()); + } else { + allocations_.erase(found); + + auto allocated_bytes = found->second.allocation_size; + + if (allocated_bytes != bytes) { + // Don't throw but log an error. Throwing in a descructor (or any noexcept) will call + // std::terminate + RMM_LOG_ERROR( + "Alloc bytes ({}) and Dealloc bytes ({}) do not match", allocated_bytes, bytes); + + bytes = allocated_bytes; + } + } } allocated_bytes_ -= bytes; } diff --git a/python/rmm/_lib/memory_resource.pxd b/python/rmm/_lib/memory_resource.pxd index d1855f92a..e690b5ed6 100644 --- a/python/rmm/_lib/memory_resource.pxd +++ b/python/rmm/_lib/memory_resource.pxd @@ -50,4 +50,10 @@ cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor): cpdef get_file_name(self) cpdef flush(self) +cdef class StatisticsResourceAdaptor(UpstreamResourceAdaptor): + pass + +cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor): + pass + cpdef DeviceMemoryResource get_current_device_resource() diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index 955968041..23038c703 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -4,7 +4,7 @@ import warnings from collections import defaultdict from cython.operator cimport dereference as deref -from libc.stdint cimport int8_t +from libc.stdint cimport int8_t, int64_t from libcpp cimport bool from libcpp.cast cimport dynamic_cast from libcpp.memory cimport make_shared, make_unique, shared_ptr, unique_ptr @@ -83,6 +83,34 @@ cdef extern from "rmm/mr/device/logging_resource_adaptor.hpp" \ void flush() except + +cdef extern from "rmm/mr/device/statistics_resource_adaptor.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass statistics_resource_adaptor[Upstream]( + device_memory_resource): + struct counter: + counter() + + int64_t value + int64_t peak + int64_t total + + statistics_resource_adaptor( + Upstream* upstream_mr) except + + + counter get_bytes_counter() except + + counter get_allocations_counter() except + + +cdef extern from "rmm/mr/device/tracking_resource_adaptor.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass tracking_resource_adaptor[Upstream](device_memory_resource): + tracking_resource_adaptor( + Upstream* upstream_mr, + bool capture_stacks) except + + + size_t get_allocated_bytes() except + + string get_outstanding_allocations_str() except + + void log_outstanding_allocations() except + + cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil: cdef cppclass cuda_device_id: @@ -456,6 +484,122 @@ cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor): def __dealloc__(self): self.c_obj.reset() +cdef class StatisticsResourceAdaptor(UpstreamResourceAdaptor): + + def __cinit__( + self, + DeviceMemoryResource upstream_mr + ): + self.c_obj.reset( + new statistics_resource_adaptor[device_memory_resource]( + upstream_mr.get_mr() + ) + ) + + def __init__( + self, + DeviceMemoryResource upstream_mr + ): + """ + Memory resource that tracks the current, peak and total + allocations/deallocations performed by an upstream memory resource. + Includes the ability to query these statistics at any time. + + Parameters + ---------- + upstream : DeviceMemoryResource + The upstream memory resource. + """ + pass + + @property + def allocation_counts(self) -> dict: + """ + Gets the current, peak, and total allocated bytes and number of + allocations. + + Returns: + dict: Dictionary containing allocation counts and bytes. + """ + + counts = (( + self.c_obj.get()))[0].get_allocations_counter() + byte_counts = (( + self.c_obj.get()))[0].get_bytes_counter() + + return { + "current_bytes": byte_counts.value, + "current_count": counts.value, + "peak_bytes": byte_counts.peak, + "peak_count": counts.peak, + "total_bytes": byte_counts.total, + "total_count": counts.total, + } + +cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor): + + def __cinit__( + self, + DeviceMemoryResource upstream_mr, + bool capture_stacks=False + ): + self.c_obj.reset( + new tracking_resource_adaptor[device_memory_resource]( + upstream_mr.get_mr(), + capture_stacks + ) + ) + + def __init__( + self, + DeviceMemoryResource upstream_mr, + bool capture_stacks=False + ): + """ + Memory resource that logs tracks allocations/deallocations + performed by an upstream memory resource. Includes the ability to + query all outstanding allocations with the stack trace, if desired. + + Parameters + ---------- + upstream : DeviceMemoryResource + The upstream memory resource. + capture_stacks : bool + Whether or not to capture the stack trace with each allocation. + """ + pass + + def get_allocated_bytes(self) -> size_t: + """ + Query the number of bytes that have been allocated. Note that this can + not be used to know how large of an allocation is possible due to both + possible fragmentation and also internal page sizes and alignment that + is not tracked by this allocator. + """ + return (( + self.c_obj.get()) + )[0].get_allocated_bytes() + + def get_outstanding_allocations_str(self) -> str: + """ + Returns a string containing information about the current outstanding + allocations. For each allocation, the address, size and optional + stack trace are shown. + """ + + return (( + self.c_obj.get()) + )[0].get_outstanding_allocations_str().decode('UTF-8') + + def log_outstanding_allocations(self): + """ + Logs the output of `get_outstanding_allocations_str` to the current + RMM log file if enabled. + """ + + (( + self.c_obj.get()))[0].log_outstanding_allocations() + # Global per-device memory resources; dict of int:DeviceMemoryResource cdef _per_device_mrs = defaultdict(CudaMemoryResource) diff --git a/python/rmm/mr.py b/python/rmm/mr.py index 15cec90b0..607ab12c3 100644 --- a/python/rmm/mr.py +++ b/python/rmm/mr.py @@ -8,6 +8,8 @@ LoggingResourceAdaptor, ManagedMemoryResource, PoolMemoryResource, + StatisticsResourceAdaptor, + TrackingResourceAdaptor, _flush_logs, _initialize, disable_logging, @@ -31,6 +33,8 @@ "LoggingResourceAdaptor", "ManagedMemoryResource", "PoolMemoryResource", + "StatisticsResourceAdaptor", + "TrackingResourceAdaptor", "_flush_logs", "_initialize", "set_per_device_resource", diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index 9a89e0df8..3533bd495 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -553,3 +553,109 @@ def test_cuda_async_memory_resource_threshold(nelem, alloc): rmm.mr.set_current_device_resource(mr) array_tester("u1", nelem, alloc) # should not trigger release array_tester("u1", 2 * nelem, alloc) # should trigger release + + +def test_statistics_resource_adaptor(): + + cuda_mr = rmm.mr.CudaMemoryResource() + + mr = rmm.mr.StatisticsResourceAdaptor(cuda_mr) + + rmm.mr.set_current_device_resource(mr) + + buffers = [rmm.DeviceBuffer(size=1000) for _ in range(10)] + + for i in range(9, 0, -2): + del buffers[i] + + assert mr.allocation_counts == { + "current_bytes": 5000, + "current_count": 5, + "peak_bytes": 10000, + "peak_count": 10, + "total_bytes": 10000, + "total_count": 10, + } + + # Push a new Tracking adaptor + mr2 = rmm.mr.StatisticsResourceAdaptor(mr) + rmm.mr.set_current_device_resource(mr2) + + for _ in range(2): + buffers.append(rmm.DeviceBuffer(size=1000)) + + assert mr2.allocation_counts == { + "current_bytes": 2000, + "current_count": 2, + "peak_bytes": 2000, + "peak_count": 2, + "total_bytes": 2000, + "total_count": 2, + } + assert mr.allocation_counts == { + "current_bytes": 7000, + "current_count": 7, + "peak_bytes": 10000, + "peak_count": 10, + "total_bytes": 12000, + "total_count": 12, + } + + del buffers + gc.collect() + + assert mr2.allocation_counts == { + "current_bytes": 0, + "current_count": 0, + "peak_bytes": 2000, + "peak_count": 2, + "total_bytes": 2000, + "total_count": 2, + } + assert mr.allocation_counts == { + "current_bytes": 0, + "current_count": 0, + "peak_bytes": 10000, + "peak_count": 10, + "total_bytes": 12000, + "total_count": 12, + } + + +def test_tracking_resource_adaptor(): + + cuda_mr = rmm.mr.CudaMemoryResource() + + mr = rmm.mr.TrackingResourceAdaptor(cuda_mr, capture_stacks=True) + + rmm.mr.set_current_device_resource(mr) + + buffers = [rmm.DeviceBuffer(size=1000) for _ in range(10)] + + for i in range(9, 0, -2): + del buffers[i] + + assert mr.get_allocated_bytes() == 5000 + + # Push a new Tracking adaptor + mr2 = rmm.mr.TrackingResourceAdaptor(mr, capture_stacks=True) + rmm.mr.set_current_device_resource(mr2) + + for _ in range(2): + buffers.append(rmm.DeviceBuffer(size=1000)) + + assert mr2.get_allocated_bytes() == 2000 + assert mr.get_allocated_bytes() == 7000 + + # Ensure we get back a non-empty string for the allocations + assert len(mr.get_outstanding_allocations_str()) > 0 + + del buffers + gc.collect() + + assert mr2.get_allocated_bytes() == 0 + assert mr.get_allocated_bytes() == 0 + + # make sure the allocations string is now empty + assert len(mr2.get_outstanding_allocations_str()) == 0 + assert len(mr.get_outstanding_allocations_str()) == 0 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d04c88543..f4f993be6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -110,6 +110,12 @@ ConfigureTest(POLYMORPHIC_ALLOCATOR_TEST ConfigureTest(STREAM_ADAPTOR_TEST "${CMAKE_CURRENT_SOURCE_DIR}/mr/device/stream_allocator_adaptor_tests.cpp") +# statistics adaptor tests + +set(STATISTICS_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/mr/device/statistics_mr_tests.cpp") + +ConfigureTest(STATISTICS_TEST "${STATISTICS_TEST_SRC}") + # tracking adaptor tests set(TRACKING_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/mr/device/tracking_mr_tests.cpp") diff --git a/tests/mr/device/statistics_mr_tests.cpp b/tests/mr/device/statistics_mr_tests.cpp new file mode 100644 index 000000000..fa7297608 --- /dev/null +++ b/tests/mr/device/statistics_mr_tests.cpp @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include "mr_test.hpp" + +#include + +namespace rmm { +namespace test { +namespace { + +using statistics_adaptor = rmm::mr::statistics_resource_adaptor; + +TEST(StatisticsTest, ThrowOnNullUpstream) +{ + auto construct_nullptr = []() { statistics_adaptor mr{nullptr}; }; + EXPECT_THROW(construct_nullptr(), rmm::logic_error); +} + +TEST(StatisticsTest, Empty) +{ + statistics_adaptor mr{rmm::mr::get_current_device_resource()}; + + EXPECT_EQ(mr.get_bytes_counter().peak, 0); + EXPECT_EQ(mr.get_bytes_counter().total, 0); + EXPECT_EQ(mr.get_bytes_counter().value, 0); + + EXPECT_EQ(mr.get_allocations_counter().peak, 0); + EXPECT_EQ(mr.get_allocations_counter().total, 0); + EXPECT_EQ(mr.get_allocations_counter().value, 0); +} + +TEST(StatisticsTest, AllFreed) +{ + statistics_adaptor mr{rmm::mr::get_current_device_resource()}; + std::vector allocations; + for (int i = 0; i < 10; ++i) { + allocations.push_back(mr.allocate(10_MiB)); + } + for (auto p : allocations) { + mr.deallocate(p, 10_MiB); + } + + // Counter values should be 0 + EXPECT_EQ(mr.get_bytes_counter().value, 0); + EXPECT_EQ(mr.get_allocations_counter().value, 0); +} + +TEST(StatisticsTest, PeakAllocations) +{ + statistics_adaptor mr{rmm::mr::get_current_device_resource()}; + std::vector allocations; + for (std::size_t i = 0; i < 10; ++i) { + allocations.push_back(mr.allocate(10_MiB)); + } + // Delete every other allocation + for (auto &&it = allocations.begin(); it != allocations.end(); ++it) { + mr.deallocate(*it, 10_MiB); + it = allocations.erase(it); + } + + auto current_alloc_counts = mr.get_allocations_counter(); + auto current_alloc_bytes = mr.get_bytes_counter(); + + // Verify current allocations + EXPECT_EQ(current_alloc_bytes.value, 50_MiB); + EXPECT_EQ(current_alloc_counts.value, 5); + + // Verify peak allocations + EXPECT_EQ(current_alloc_bytes.peak, 100_MiB); + EXPECT_EQ(current_alloc_counts.peak, 10); + + // Verify total allocations + EXPECT_EQ(current_alloc_bytes.total, 100_MiB); + EXPECT_EQ(current_alloc_counts.total, 10); + + // Add 10 more to increase the peak + for (std::size_t i = 0; i < 10; ++i) { + allocations.push_back(mr.allocate(10_MiB)); + } + + // Deallocate all remaining + for (std::size_t i = 0; i < allocations.size(); ++i) { + mr.deallocate(allocations[i], 10_MiB); + } + allocations.clear(); + + current_alloc_counts = mr.get_allocations_counter(); + current_alloc_bytes = mr.get_bytes_counter(); + + // Verify current allocations + EXPECT_EQ(current_alloc_bytes.value, 0); + EXPECT_EQ(current_alloc_counts.value, 0); + + // Verify peak allocations + EXPECT_EQ(current_alloc_bytes.peak, 150_MiB); + EXPECT_EQ(current_alloc_counts.peak, 15); + + // Verify total allocations + EXPECT_EQ(current_alloc_bytes.total, 200_MiB); + EXPECT_EQ(current_alloc_counts.total, 20); +} + +TEST(StatisticsTest, MultiTracking) +{ + statistics_adaptor mr{rmm::mr::get_current_device_resource()}; + rmm::mr::set_current_device_resource(&mr); + + std::vector> allocations; + for (std::size_t i = 0; i < 10; ++i) { + allocations.emplace_back( + std::make_shared(10_MiB, rmm::cuda_stream_default)); + } + + EXPECT_EQ(mr.get_allocations_counter().value, 10); + + statistics_adaptor inner_mr{rmm::mr::get_current_device_resource()}; + rmm::mr::set_current_device_resource(&inner_mr); + + for (std::size_t i = 0; i < 5; ++i) { + allocations.emplace_back( + std::make_shared(10_MiB, rmm::cuda_stream_default)); + } + + // Check the allocated bytes for both MRs + EXPECT_EQ(mr.get_allocations_counter().value, 15); + EXPECT_EQ(inner_mr.get_allocations_counter().value, 5); + + EXPECT_EQ(mr.get_bytes_counter().value, 150_MiB); + EXPECT_EQ(inner_mr.get_bytes_counter().value, 50_MiB); + + // Clear the allocations, causing all memory to be freed + allocations.clear(); + + // The current allocations for both MRs should be 0 + EXPECT_EQ(mr.get_allocations_counter().value, 0); + EXPECT_EQ(inner_mr.get_allocations_counter().value, 0); + + EXPECT_EQ(mr.get_bytes_counter().value, 0); + EXPECT_EQ(inner_mr.get_bytes_counter().value, 0); + + // Finally, verify the peak and total values + EXPECT_EQ(mr.get_bytes_counter().peak, 150_MiB); + EXPECT_EQ(inner_mr.get_bytes_counter().peak, 50_MiB); + + EXPECT_EQ(mr.get_allocations_counter().peak, 15); + EXPECT_EQ(inner_mr.get_allocations_counter().peak, 5); + + // Reset the current device resource + rmm::mr::set_current_device_resource(mr.get_upstream()); +} + +TEST(StatisticsTest, NegativeInnerTracking) +{ + // This tests the unlikely scenario where pointers are deallocated on an inner + // wrapped memory resource. This can happen if the MR is not saved with the + // memory pointer + statistics_adaptor mr{rmm::mr::get_current_device_resource()}; + std::vector allocations; + for (std::size_t i = 0; i < 10; ++i) { + allocations.push_back(mr.allocate(10_MiB)); + } + + EXPECT_EQ(mr.get_allocations_counter().value, 10); + + statistics_adaptor inner_mr{&mr}; + + // Add more allocations + for (std::size_t i = 0; i < 5; ++i) { + allocations.push_back(inner_mr.allocate(10_MiB)); + } + + // Check the outstanding allocations + EXPECT_EQ(mr.get_allocations_counter().value, 15); + EXPECT_EQ(inner_mr.get_allocations_counter().value, 5); + + // Check the current counts + EXPECT_EQ(mr.get_bytes_counter().value, 150_MiB); + EXPECT_EQ(inner_mr.get_bytes_counter().value, 50_MiB); + + EXPECT_EQ(mr.get_allocations_counter().value, 15); + EXPECT_EQ(inner_mr.get_allocations_counter().value, 5); + + // Deallocate all allocations using the inner_mr + for (std::size_t i = 0; i < allocations.size(); ++i) { + inner_mr.deallocate(allocations[i], 10_MiB); + } + allocations.clear(); + + // Check the current counts are 0 for the outer + EXPECT_EQ(mr.get_bytes_counter().value, 0); + EXPECT_EQ(mr.get_allocations_counter().value, 0); + + // The inner_mr will have negative values + EXPECT_EQ(inner_mr.get_bytes_counter().value, -100_MiB); + EXPECT_EQ(inner_mr.get_allocations_counter().value, -10); + + // Verify the peak and total + EXPECT_EQ(mr.get_bytes_counter().peak, 150_MiB); + EXPECT_EQ(inner_mr.get_bytes_counter().peak, 50_MiB); + + EXPECT_EQ(mr.get_allocations_counter().peak, 15); + EXPECT_EQ(inner_mr.get_allocations_counter().peak, 5); + + EXPECT_EQ(mr.get_bytes_counter().total, 150_MiB); + EXPECT_EQ(inner_mr.get_bytes_counter().total, 50_MiB); + + EXPECT_EQ(mr.get_allocations_counter().total, 15); + EXPECT_EQ(inner_mr.get_allocations_counter().total, 5); +} + +} // namespace +} // namespace test +} // namespace rmm diff --git a/tests/mr/device/tracking_mr_tests.cpp b/tests/mr/device/tracking_mr_tests.cpp index 81e7e64f8..0ad6f7fc5 100644 --- a/tests/mr/device/tracking_mr_tests.cpp +++ b/tests/mr/device/tracking_mr_tests.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -88,6 +89,106 @@ TEST(TrackingTest, AllocationsLeftWithoutStacks) EXPECT_EQ(outstanding_allocations.begin()->second.strace, nullptr); } +TEST(TrackingTest, MultiTracking) +{ + tracking_adaptor mr{rmm::mr::get_current_device_resource(), true}; + rmm::mr::set_current_device_resource(&mr); + + std::vector> allocations; + for (std::size_t i = 0; i < 10; ++i) { + allocations.emplace_back( + std::make_shared(10_MiB, rmm::cuda_stream_default)); + } + + EXPECT_EQ(mr.get_outstanding_allocations().size(), 10); + + tracking_adaptor inner_mr{rmm::mr::get_current_device_resource()}; + rmm::mr::set_current_device_resource(&inner_mr); + + for (std::size_t i = 0; i < 5; ++i) { + allocations.emplace_back( + std::make_shared(10_MiB, rmm::cuda_stream_default)); + } + + // Check the allocated bytes for both MRs + EXPECT_EQ(mr.get_outstanding_allocations().size(), 15); + EXPECT_EQ(inner_mr.get_outstanding_allocations().size(), 5); + + EXPECT_EQ(mr.get_allocated_bytes(), 150_MiB); + EXPECT_EQ(inner_mr.get_allocated_bytes(), 50_MiB); + + EXPECT_GT(mr.get_outstanding_allocations_str().size(), 0); + + // Clear the allocations, causing all memory to be freed + allocations.clear(); + + // The current allocations for both MRs should be 0 + EXPECT_EQ(mr.get_outstanding_allocations().size(), 0); + EXPECT_EQ(inner_mr.get_outstanding_allocations().size(), 0); + + EXPECT_EQ(mr.get_allocated_bytes(), 0); + EXPECT_EQ(inner_mr.get_allocated_bytes(), 0); + + // Reset the current device resource + rmm::mr::set_current_device_resource(mr.get_upstream()); +} + +TEST(TrackingTest, NegativeInnerTracking) +{ + // This tests the unlikely scenario where pointers are deallocated on an inner + // wrapped memory resource. This can happen if the MR is not saved with the + // memory pointer + tracking_adaptor mr{rmm::mr::get_current_device_resource()}; + std::vector allocations; + for (std::size_t i = 0; i < 10; ++i) { + allocations.push_back(mr.allocate(10_MiB)); + } + + EXPECT_EQ(mr.get_outstanding_allocations().size(), 10); + + tracking_adaptor inner_mr{&mr}; + + // Add more allocations + for (std::size_t i = 0; i < 5; ++i) { + allocations.push_back(inner_mr.allocate(10_MiB)); + } + + // Check the outstanding allocations + EXPECT_EQ(mr.get_outstanding_allocations().size(), 15); + EXPECT_EQ(inner_mr.get_outstanding_allocations().size(), 5); + + // Deallocate all allocations using the inner_mr + for (std::size_t i = 0; i < allocations.size(); ++i) { + inner_mr.deallocate(allocations[i], 10_MiB); + } + allocations.clear(); + + // Check the outstanding allocations are all 0 + EXPECT_EQ(mr.get_outstanding_allocations().size(), 0); + EXPECT_EQ(inner_mr.get_outstanding_allocations().size(), 0); +} + +TEST(TrackingTest, DeallocWrongBytes) +{ + tracking_adaptor mr{rmm::mr::get_current_device_resource()}; + std::vector allocations; + for (std::size_t i = 0; i < 10; ++i) { + allocations.push_back(mr.allocate(10_MiB)); + } + + // When deallocating, pass the wrong bytes to deallocate + for (std::size_t i = 0; i < allocations.size(); ++i) { + mr.deallocate(allocations[i], 5_MiB); + } + allocations.clear(); + + EXPECT_EQ(mr.get_outstanding_allocations().size(), 0); + EXPECT_EQ(mr.get_allocated_bytes(), 0); + + // Verify current allocations are correct despite the error + EXPECT_EQ(mr.get_allocated_bytes(), 0); +} + } // namespace } // namespace test } // namespace rmm