-
Notifications
You must be signed in to change notification settings - Fork 201
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add Statistics Resource Adaptor and cython bindings to tracking_resource_adaptor
and statistics_resource_adaptor
#626
Changes from 9 commits
c5e91d9
b31bb2f
533fe2a
cffe9bf
e1218fa
25d5da3
976dabb
39d5e22
ddd296a
df0054e
cbf2772
e3e586a
da25869
312ca50
4c677f5
1d64c6d
97753ee
b812bc0
9d74e5d
9e69c67
8bfd27b
7cf3123
b5bbab4
c7395e4
1872ee2
6b20fb8
b281f9e
cab217d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -65,6 +65,47 @@ class tracking_resource_adaptor final : public device_memory_resource { | |||||
allocation_size{size} {}; | ||||||
}; | ||||||
|
||||||
/** | ||||||
harrism marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
* @brief Stores the current, peak, and total number of allocation and | ||||||
mdemoret-nv marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
* allocated bytes | ||||||
*/ | ||||||
struct allocation_counts { | ||||||
mdemoret-nv marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
ssize_t current_bytes{0}; // Current outstanding bytes | ||||||
mdemoret-nv marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
ssize_t current_count{0}; // Current outstanding count | ||||||
ssize_t peak_bytes{0}; // Max value of current_bytes | ||||||
ssize_t peak_count{0}; // Max value of current_count | ||||||
std::size_t total_bytes{0}; // Total allocated bytes | ||||||
std::size_t total_count{0}; // Total allocated count | ||||||
|
||||||
/** | ||||||
* @brief Increments the current, peak and total by bytes | ||||||
mdemoret-nv marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
* | ||||||
* @param bytes Number of bytes allocated | ||||||
*/ | ||||||
void increment_count(size_t bytes) | ||||||
{ | ||||||
current_bytes += bytes; | ||||||
current_count += 1; | ||||||
|
||||||
peak_bytes = std::max(current_bytes, peak_bytes); | ||||||
peak_count = std::max(current_count, peak_count); | ||||||
|
||||||
total_bytes += bytes; | ||||||
total_count += 1; | ||||||
} | ||||||
|
||||||
/** | ||||||
* @brief Decrement the current bytes and count. Peak and total remain unchanged | ||||||
mdemoret-nv marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
* | ||||||
* @param bytes Number of bytes deallocated | ||||||
*/ | ||||||
void decrement_count(size_t bytes) | ||||||
{ | ||||||
current_bytes -= bytes; | ||||||
current_count -= 1; | ||||||
} | ||||||
}; | ||||||
|
||||||
/** | ||||||
* @brief Construct a new tracking resource adaptor using `upstream` to satisfy | ||||||
* allocation requests. | ||||||
|
@@ -75,13 +116,16 @@ class tracking_resource_adaptor final : public device_memory_resource { | |||||
* @param capture_stacks If true, capture stacks for allocation calls | ||||||
*/ | ||||||
tracking_resource_adaptor(Upstream* upstream, bool capture_stacks = false) | ||||||
: upstream_{upstream}, capture_stacks_{capture_stacks}, allocated_bytes_{0} | ||||||
: capture_stacks_{capture_stacks}, upstream_{upstream} | ||||||
{ | ||||||
RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer."); | ||||||
|
||||||
// Need to maintain at least one on the stack at all times | ||||||
allocation_count_stack_.push_back(allocation_counts()); | ||||||
} | ||||||
|
||||||
tracking_resource_adaptor() = delete; | ||||||
~tracking_resource_adaptor() = default; | ||||||
virtual ~tracking_resource_adaptor() = default; | ||||||
tracking_resource_adaptor(tracking_resource_adaptor const&) = delete; | ||||||
tracking_resource_adaptor(tracking_resource_adaptor&&) = default; | ||||||
tracking_resource_adaptor& operator=(tracking_resource_adaptor const&) = delete; | ||||||
|
@@ -133,27 +177,153 @@ class tracking_resource_adaptor final : public device_memory_resource { | |||||
* @return std::size_t number of bytes that have been allocated through this | ||||||
* allocator. | ||||||
*/ | ||||||
std::size_t get_allocated_bytes() const noexcept { return allocated_bytes_; } | ||||||
std::size_t get_allocated_bytes() const noexcept | ||||||
{ | ||||||
return get_total_allocation_counts().current_bytes; | ||||||
} | ||||||
|
||||||
/** | ||||||
* @brief Log any outstanding allocations via RMM_LOG_DEBUG | ||||||
* @brief Returns an allocation_counts struct for this adaptor containing the | ||||||
* total current, peak, and total number of bytes and allocation count for | ||||||
* this adaptor regardless of any push/popped allocation_counts. Note: Because | ||||||
* its possible to change memory resources at any time while maintaining the | ||||||
* same upstream memory resource, its possible to have a negative allocation | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand the note, or the reason for a stack. You can't change the upstream for a |
||||||
* bytes or count if the number of deallocate() calls is greater than the | ||||||
* number of allocate(). | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||
* | ||||||
* @return allocation_counts struct containing the allocation number of bytes | ||||||
* and count info | ||||||
*/ | ||||||
void log_outstanding_allocations() const | ||||||
allocation_counts get_total_allocation_counts() const noexcept | ||||||
{ | ||||||
#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG | ||||||
read_lock_t lock(mtx); | ||||||
if (not allocations.empty()) { | ||||||
std::ostringstream oss; | ||||||
for (auto const& al : allocations) { | ||||||
read_lock_t lock(mtx_); | ||||||
|
||||||
// Start by copying the head | ||||||
allocation_counts sum_counts = allocation_count_stack_.front(); | ||||||
|
||||||
auto prev_curr_bytes = sum_counts.current_bytes; | ||||||
auto prev_curr_count = sum_counts.current_count; | ||||||
|
||||||
for (size_t i = 1; i < allocation_count_stack_.size(); i++) { | ||||||
// Sum the total and current | ||||||
sum_counts.current_bytes += allocation_count_stack_[i].current_bytes; | ||||||
sum_counts.current_count += allocation_count_stack_[i].current_count; | ||||||
|
||||||
sum_counts.total_bytes += allocation_count_stack_[i].total_bytes; | ||||||
sum_counts.total_count += allocation_count_stack_[i].total_count; | ||||||
|
||||||
// Peak works differently. For each, the max value is the previous stack's | ||||||
// `current_bytes` + the current stack's `peak_bytes` | ||||||
sum_counts.peak_bytes = | ||||||
std::max(sum_counts.peak_bytes, prev_curr_bytes + allocation_count_stack_[i].peak_bytes); | ||||||
sum_counts.peak_count = | ||||||
std::max(sum_counts.peak_count, prev_curr_count + allocation_count_stack_[i].peak_count); | ||||||
|
||||||
prev_curr_bytes = allocation_count_stack_[i].current_bytes; | ||||||
prev_curr_count = allocation_count_stack_[i].current_count; | ||||||
} | ||||||
|
||||||
return sum_counts; | ||||||
} | ||||||
|
||||||
/** | ||||||
* @brief Pushes a new allocation_counts struct onto the stack to allow | ||||||
* tracking memory allocations for a particular section without creating a new | ||||||
* memory resource. Call `pop_allocation_counts` to retrieve the | ||||||
* allocation_counts since this method was called. | ||||||
* | ||||||
* @return allocation_counts Returns the previous allocation_counts at the top | ||||||
* of the stack | ||||||
*/ | ||||||
allocation_counts push_allocation_counts() | ||||||
{ | ||||||
write_lock_t lock(mtx_); | ||||||
|
||||||
// Copy the top of the stack | ||||||
allocation_counts counts = allocation_count_stack_.back(); | ||||||
|
||||||
// Push a new one on the stack | ||||||
allocation_count_stack_.emplace_back(allocation_counts{}); | ||||||
|
||||||
return counts; | ||||||
} | ||||||
|
||||||
/** | ||||||
* @brief Pops the top allocation_counts struct off the stack and returns the | ||||||
* current, peak and total allocations since `push_allocation_counts` was | ||||||
* called. | ||||||
* | ||||||
* @throws rmm::out_of_range exception if `allocation_count_stack_.size() <= | ||||||
* 1` | ||||||
* | ||||||
* @return allocation_counts Returns the allocation_counts struct since | ||||||
* `push_allocation_counts` was called | ||||||
*/ | ||||||
allocation_counts pop_allocation_counts() | ||||||
{ | ||||||
write_lock_t lock(mtx_); | ||||||
|
||||||
RMM_EXPECTS(allocation_count_stack_.size() > 1, | ||||||
rmm::out_of_range, | ||||||
"Attempted to pop only allocation_counts on stack."); | ||||||
|
||||||
// Copy the top of the stack | ||||||
allocation_counts counts = allocation_count_stack_.back(); | ||||||
|
||||||
// Pop the stack | ||||||
allocation_count_stack_.pop_back(); | ||||||
|
||||||
// When popping the stack, the top needs to be rolled into the new top to keep the total values | ||||||
// correct | ||||||
auto& new_top = allocation_count_stack_.back(); | ||||||
|
||||||
// Make sure to do peak first here | ||||||
new_top.peak_bytes = std::max(new_top.peak_bytes, new_top.current_bytes + counts.peak_bytes); | ||||||
new_top.peak_count = std::max(new_top.peak_count, new_top.current_count + counts.peak_count); | ||||||
|
||||||
// Sum the total and current | ||||||
new_top.current_bytes += counts.current_bytes; | ||||||
new_top.current_count += counts.current_count; | ||||||
|
||||||
new_top.total_bytes += counts.total_bytes; | ||||||
new_top.total_count += counts.total_count; | ||||||
|
||||||
return counts; | ||||||
} | ||||||
|
||||||
/** | ||||||
* @brief Gets a string containing the outstanding allocation pointers, their | ||||||
* size, and optionally the stack trace for when each pointer was allocated. | ||||||
mdemoret-nv marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
* | ||||||
* @return std::string Containing the outstanding allocation pointers. | ||||||
*/ | ||||||
std::string get_outstanding_allocations_str() const | ||||||
{ | ||||||
read_lock_t lock(mtx_); | ||||||
|
||||||
std::ostringstream oss; | ||||||
|
||||||
if (!allocations_.empty()) { | ||||||
for (auto const& al : allocations_) { | ||||||
oss << al.first << ": " << al.second.allocation_size << " B"; | ||||||
if (al.second.strace != nullptr) { | ||||||
oss << " : callstack:" << std::endl << *al.second.strace; | ||||||
} | ||||||
oss << std::endl; | ||||||
} | ||||||
RMM_LOG_DEBUG("Outstanding Allocations: {}", oss.str()); | ||||||
} | ||||||
|
||||||
return oss.str(); | ||||||
} | ||||||
|
||||||
/** | ||||||
* @brief Log any outstanding allocations via RMM_LOG_DEBUG | ||||||
* | ||||||
*/ | ||||||
void log_outstanding_allocations() const | ||||||
{ | ||||||
#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG | ||||||
RMM_LOG_DEBUG("Outstanding Allocations: {}", get_outstanding_allocations_str()); | ||||||
#endif // SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG | ||||||
} | ||||||
|
||||||
|
@@ -179,8 +349,10 @@ class tracking_resource_adaptor final : public device_memory_resource { | |||||
{ | ||||||
write_lock_t lock(mtx_); | ||||||
allocations_.emplace(p, allocation_info{bytes, capture_stacks_}); | ||||||
|
||||||
// Increment the allocation_count_ while we have the lock | ||||||
allocation_count_stack_.back().increment_count(bytes); | ||||||
} | ||||||
allocated_bytes_ += bytes; | ||||||
|
||||||
return p; | ||||||
} | ||||||
|
@@ -197,11 +369,40 @@ class tracking_resource_adaptor final : public device_memory_resource { | |||||
void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override | ||||||
{ | ||||||
upstream_->deallocate(p, bytes, stream); | ||||||
|
||||||
{ | ||||||
write_lock_t lock(mtx_); | ||||||
allocations_.erase(p); | ||||||
|
||||||
const auto found = allocations_.find(p); | ||||||
|
||||||
// Ensure the allocation is found and the number of bytes match | ||||||
if (found == allocations_.end()) { | ||||||
// Don't throw but log an error. Throwing in a descructor (or any noexcept) will call | ||||||
// std::terminate | ||||||
RMM_LOG_ERROR( | ||||||
"Deallocating a pointer that was not tracked. Ptr: {:p} [{}B], Current Num. Allocations: " | ||||||
"{}", | ||||||
fmt::ptr(p), | ||||||
bytes, | ||||||
this->allocations_.size()); | ||||||
} else { | ||||||
allocations_.erase(found); | ||||||
|
||||||
auto allocated_bytes = found->second.allocation_size; | ||||||
|
||||||
if (allocated_bytes != bytes) { | ||||||
// Don't throw but log an error. Throwing in a descructor (or any noexcept) will call | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
// std::terminate | ||||||
RMM_LOG_ERROR( | ||||||
"Alloc bytes ({}) and Dealloc bytes ({}) do not match", allocated_bytes, bytes); | ||||||
|
||||||
bytes = allocated_bytes; | ||||||
} | ||||||
} | ||||||
|
||||||
// Decrement the current allocated counts. | ||||||
allocation_count_stack_.back().decrement_count(bytes); | ||||||
} | ||||||
allocated_bytes_ -= bytes; | ||||||
} | ||||||
|
||||||
/** | ||||||
|
@@ -239,8 +440,10 @@ class tracking_resource_adaptor final : public device_memory_resource { | |||||
|
||||||
bool capture_stacks_; // whether or not to capture call stacks | ||||||
std::map<void*, allocation_info> allocations_; // map of active allocations | ||||||
std::atomic<std::size_t> allocated_bytes_; // number of bytes currently allocated | ||||||
std::shared_timed_mutex mutable mtx_; // mutex for thread safe access to allocations_ | ||||||
std::vector<allocation_counts> | ||||||
allocation_count_stack_; // Stack of allocation_counts structs to track memory allocation | ||||||
// between push and pop | ||||||
std::shared_timed_mutex mutable mtx_; // mutex for thread safe access to allocations_ | ||||||
Upstream* upstream_; // the upstream resource used for satisfying allocation requests | ||||||
}; | ||||||
|
||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can make this code more beautiful. Discussed with @codereport . Suggestion is to replace the
for
with astd::transform
overst.stack_ptrs()
.And use an IILE to clean up the internal
if
, something likeThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I implemented the IILE to clean up the string construction but I couldnt come up with a good way to use
std::transform
here that was cleaner than the current implementation. Since we need both the value and the index we would need to use the binary_op overload ofstd::transform
with one iterator over the values and something similar toboost::counting_iterator
for the index.Maybe you are better with these functions in
<algorithm>
than I am, but I couldn't find an elegant way to create the indexing iterator that didn't require creating a second object like avector
. It's a shame because I was hoping to usestd::ostream_iterator
for the output which would have been nice. If I'm missing something here, let me know.