Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add debugging capabilities to the CachingAllocator [14.0.x] #45342

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 37 additions & 13 deletions HeterogeneousCore/AlpakaInterface/interface/AllocatorConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,50 @@

namespace cms::alpakatools {

namespace config {
struct AllocatorConfig {
// Bin growth factor (bin_growth in cub::CachingDeviceAllocator)
unsigned int binGrowth = 2;

// bin growth factor (bin_growth in cub::CachingDeviceAllocator)
constexpr unsigned int binGrowth = 2;
// Smallest bin, corresponds to binGrowth^minBin bytes (min_bin in cub::CachingDeviceAllocator
unsigned int minBin = 8; // 256 bytes

// smallest bin, corresponds to binGrowth^minBin bytes (min_bin in cub::CachingDeviceAllocator
constexpr unsigned int minBin = 8; // 256 bytes
// Largest bin, corresponds to binGrowth^maxBin bytes (max_bin in cub::CachingDeviceAllocator).
// Note that unlike in cub, allocations larger than binGrowth^maxBin are set to fail.
unsigned int maxBin = 30; // 1 GB

// largest bin, corresponds to binGrowth^maxBin bytes (max_bin in cub::CachingDeviceAllocator). Note that unlike in cub, allocations larger than binGrowth^maxBin are set to fail.
constexpr unsigned int maxBin = 30; // 1 GB
// Total storage for the allocator; 0 means no limit.
size_t maxCachedBytes = 0;

// total storage for the allocator; 0 means no limit.
constexpr size_t maxCachedBytes = 0;
// Fraction of total device memory taken for the allocator; 0 means no limit.
// If both maxCachedBytes and maxCachedFraction are non-zero, the smallest resulting value is used.
double maxCachedFraction = 0.8;

// fraction of total device memory taken for the allocator; 0 means no limit.
constexpr double maxCachedFraction = 0.8;
// Fill all newly allocated or re-used memory blocks with fillAllocationValue.
bool fillAllocations = false;

// if both maxCachedBytes and maxCachedFraction are non-zero, the smallest resulting value is used.
// Fill only the re-used memory blocks with fillReallocationValue.
// If both fillAllocations and fillReallocations are true, fillAllocationValue is used for newly allocated blocks and fillReallocationValue is used for re-allocated blocks.
bool fillReallocations = false;

} // namespace config
// Fill memory blocks with fillDeallocationValue before freeing or caching them for re-use
bool fillDeallocations = false;

// Fill memory blocks with fillCacheValue before caching them for re-use.
// If both fillDeallocations and fillCaches are true, fillDeallocationValue is used for blocks about to be freed and fillCacheValue is used for blocks about to be cached.
bool fillCaches = false;

// Byte value used to fill all newly allocated or re-used memory blocks
uint8_t fillAllocationValue = 0xA5;

// Byte value used to fill all re-used memory blocks
uint8_t fillReallocationValue = 0x69;

// Byte value used to fill all deallocated or cached memory blocks
uint8_t fillDeallocationValue = 0x5A;

// Byte value used to fill all cached memory blocks
uint8_t fillCacheValue = 0x96;
};

} // namespace cms::alpakatools

Expand Down
111 changes: 82 additions & 29 deletions HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <alpaka/alpaka.hpp>

#include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
#include "HeterogeneousCore/AlpakaInterface/interface/traits.h"
#include "HeterogeneousCore/AlpakaInterface/interface/AllocatorConfig.h"
#include "HeterogeneousCore/AlpakaInterface/interface/AlpakaServiceFwd.h"

// Inspired by cub::CachingDeviceAllocator
Expand All @@ -37,18 +37,20 @@ namespace cms::alpakatools {
return power;
}

// format a memory size in B/kB/MB/GB
// format a memory size in B/KiB/MiB/GiB/TiB
inline std::string as_bytes(size_t value) {
if (value == std::numeric_limits<size_t>::max()) {
return "unlimited";
} else if (value >= (1 << 30) and value % (1 << 30) == 0) {
return std::to_string(value >> 30) + " GB";
} else if (value >= (1 << 20) and value % (1 << 20) == 0) {
return std::to_string(value >> 20) + " MB";
} else if (value >= (1 << 10) and value % (1 << 10) == 0) {
return std::to_string(value >> 10) + " kB";
} else if (value >= (1ul << 40) and value % (1ul << 40) == 0) {
return std::to_string(value >> 40) + " TiB";
} else if (value >= (1ul << 30) and value % (1ul << 30) == 0) {
return std::to_string(value >> 30) + " GiB";
} else if (value >= (1ul << 20) and value % (1ul << 20) == 0) {
return std::to_string(value >> 20) + " MiB";
} else if (value >= (1ul << 10) and value % (1ul << 10) == 0) {
return std::to_string(value >> 10) + " KiB";
} else {
return std::to_string(value) + " B";
return std::to_string(value) + " B";
}
}

Expand Down Expand Up @@ -119,27 +121,27 @@ namespace cms::alpakatools {

explicit CachingAllocator(
Device const& device,
unsigned int binGrowth, // bin growth factor;
unsigned int minBin, // smallest bin, corresponds to binGrowth^minBin bytes;
// smaller allocations are rounded to this value;
unsigned int maxBin, // largest bin, corresponds to binGrowth^maxBin bytes;
// larger allocations will fail;
size_t maxCachedBytes, // total storage for the allocator (0 means no limit);
double maxCachedFraction, // fraction of total device memory taken for the allocator (0 means no limit);
// if both maxCachedBytes and maxCachedFraction are non-zero,
// the smallest resulting value is used.
AllocatorConfig const& config,
bool reuseSameQueueAllocations, // reuse non-ready allocations if they are in the same queue as the new one;
// this is safe only if all memory operations are scheduled in the same queue
bool debug)
bool debug = false)
: device_(device),
binGrowth_(binGrowth),
minBin_(minBin),
maxBin_(maxBin),
minBinBytes_(detail::power(binGrowth, minBin)),
maxBinBytes_(detail::power(binGrowth, maxBin)),
maxCachedBytes_(cacheSize(maxCachedBytes, maxCachedFraction)),
binGrowth_(config.binGrowth),
minBin_(config.minBin),
maxBin_(config.maxBin),
minBinBytes_(detail::power(binGrowth_, minBin_)),
maxBinBytes_(detail::power(binGrowth_, maxBin_)),
maxCachedBytes_(cacheSize(config.maxCachedBytes, config.maxCachedFraction)),
reuseSameQueueAllocations_(reuseSameQueueAllocations),
debug_(debug) {
debug_(debug),
fillAllocations_(config.fillAllocations),
fillAllocationValue_(config.fillAllocationValue),
fillReallocations_(config.fillReallocations),
fillReallocationValue_(config.fillReallocationValue),
fillDeallocations_(config.fillDeallocations),
fillDeallocationValue_(config.fillDeallocationValue),
fillCaches_(config.fillCaches),
fillCacheValue_(config.fillCacheValue) {
if (debug_) {
std::ostringstream out;
out << "CachingAllocator settings\n"
Expand All @@ -148,7 +150,7 @@ namespace cms::alpakatools {
<< " max bin " << maxBin_ << "\n"
<< " resulting bins:\n";
for (auto bin = minBin_; bin <= maxBin_; ++bin) {
auto binSize = detail::power(binGrowth, bin);
auto binSize = detail::power(binGrowth_, bin);
out << " " << std::right << std::setw(12) << detail::as_bytes(binSize) << '\n';
}
out << " maximum amount of cached memory: " << detail::as_bytes(maxCachedBytes_);
Expand Down Expand Up @@ -182,8 +184,19 @@ namespace cms::alpakatools {
std::tie(block.bin, block.bytes) = findBin(bytes);

// try to re-use a cached block, or allocate a new buffer
if (not tryReuseCachedBlock(block)) {
if (tryReuseCachedBlock(block)) {
// fill the re-used memory block with a pattern
if (fillReallocations_) {
alpaka::memset(*block.queue, *block.buffer, fillReallocationValue_);
} else if (fillAllocations_) {
alpaka::memset(*block.queue, *block.buffer, fillAllocationValue_);
}
} else {
allocateNewBlock(block);
// fill the newly allocated memory block with a pattern
if (fillAllocations_) {
alpaka::memset(*block.queue, *block.buffer, fillAllocationValue_);
}
}

return block.buffer->data();
Expand Down Expand Up @@ -215,11 +228,18 @@ namespace cms::alpakatools {
// because of multiple exceptions it is best to ignore these
// errors.
try {
// fill memory blocks with a pattern before caching them
if (fillCaches_) {
alpaka::memset(*block.queue, *block.buffer, fillCacheValue_);
} else if (fillDeallocations_) {
alpaka::memset(*block.queue, *block.buffer, fillDeallocationValue_);
}
// record in the block a marker associated to the work queue
alpaka::enqueue(*(block.queue), *(block.event));
} catch (std::exception& e) {
if (debug_) {
std::ostringstream out;
out << "CachingAllocator::free() error from alpaka::enqueue(): " << e.what() << "\n";
out << "CachingAllocator::free() caught an alpaka error: " << e.what() << "\n";
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed " << block.bytes << " bytes at "
<< ptr << " from associated queue " << block.queue->m_spQueueImpl.get() << ", event "
<< block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size()
Expand All @@ -244,6 +264,30 @@ namespace cms::alpakatools {
std::cout << out.str() << std::endl;
}
} else {
// If the memset fails, very likely an error has occurred in the
// asynchronous processing. In that case the error will show up in all
// device API function calls, and the free() will be called by
// destructors during stack unwinding. In order to avoid terminate()
// being called because of multiple exceptions it is best to ignore
// these errors.
try {
// fill memory blocks with a pattern before freeing them
if (fillDeallocations_) {
alpaka::memset(*block.queue, *block.buffer, fillDeallocationValue_);
}
} catch (std::exception& e) {
if (debug_) {
std::ostringstream out;
out << "CachingAllocator::free() caught an alpaka error: " << e.what() << "\n";
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed " << block.bytes << " bytes at "
<< ptr << " from associated queue " << block.queue->m_spQueueImpl.get() << ", event "
<< block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size()
<< " available blocks cached (" << cachedBytes_.free << " bytes), " << liveBlocks_.size()
<< " live blocks (" << cachedBytes_.live << " bytes) outstanding." << std::endl;
std::cout << out.str() << std::endl;
}
return;
}
// if the buffer is not recached, it is automatically freed when block goes out of scope
if (debug_) {
std::ostringstream out;
Expand Down Expand Up @@ -452,6 +496,15 @@ namespace cms::alpakatools {

const bool reuseSameQueueAllocations_;
const bool debug_;

const bool fillAllocations_;
const uint8_t fillAllocationValue_;
const bool fillReallocations_;
const uint8_t fillReallocationValue_;
const bool fillDeallocations_;
const uint8_t fillDeallocationValue_;
const bool fillCaches_;
const uint8_t fillCacheValue_;
};

} // namespace cms::alpakatools
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ namespace cms::alpakatools {
template <typename TDev,
typename TQueue,
typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
auto allocate_device_allocators() {
auto allocate_device_allocators(AllocatorConfig const& config, bool debug) {
using Allocator = CachingAllocator<TDev, TQueue>;
auto const& devices = cms::alpakatools::devices<alpaka::Platform<TDev>>();
ssize_t const size = devices.size();
Expand All @@ -38,13 +38,9 @@ namespace cms::alpakatools {
#endif
ptr + index,
devices[index],
config::binGrowth,
config::minBin,
config::maxBin,
config::maxCachedBytes,
config::maxCachedFraction,
true, // reuseSameQueueAllocations
false); // debug
config,
true, // reuseSameQueueAllocations
debug);
}
} catch (...) {
--index;
Expand All @@ -60,11 +56,11 @@ namespace cms::alpakatools {
}

// use a custom deleter to destroy all objects and deallocate the memory
auto deleter = [size](Allocator* ptr) {
auto deleter = [size](Allocator* allocators) {
for (size_t i = size; i > 0; --i) {
std::destroy_at(ptr + i - 1);
std::destroy_at(allocators + i - 1);
}
std::allocator<Allocator>().deallocate(ptr, size);
std::allocator<Allocator>().deallocate(allocators, size);
};

return std::unique_ptr<Allocator[], decltype(deleter)>(ptr, deleter);
Expand All @@ -75,9 +71,11 @@ namespace cms::alpakatools {
template <typename TDev,
typename TQueue,
typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
inline CachingAllocator<TDev, TQueue>& getDeviceCachingAllocator(TDev const& device) {
inline CachingAllocator<TDev, TQueue>& getDeviceCachingAllocator(TDev const& device,
AllocatorConfig const& config = AllocatorConfig{},
bool debug = false) {
// initialise all allocators, one per device
CMS_THREAD_SAFE static auto allocators = detail::allocate_device_allocators<TDev, TQueue>();
CMS_THREAD_SAFE static auto allocators = detail::allocate_device_allocators<TDev, TQueue>(config, debug);

size_t const index = alpaka::getNativeHandle(device);
assert(index < cms::alpakatools::devices<alpaka::Platform<TDev>>().size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,14 @@
namespace cms::alpakatools {

template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
inline CachingAllocator<alpaka_common::DevHost, TQueue>& getHostCachingAllocator() {
inline CachingAllocator<alpaka_common::DevHost, TQueue>& getHostCachingAllocator(
AllocatorConfig const& config = AllocatorConfig{}, bool debug = false) {
// thread safe initialisation of the host allocator
CMS_THREAD_SAFE static CachingAllocator<alpaka_common::DevHost, TQueue> allocator(
host(),
config::binGrowth,
config::minBin,
config::maxBin,
config::maxCachedBytes,
config::maxCachedFraction,
false, // reuseSameQueueAllocations
false); // debug
config,
false, // reuseSameQueueAllocations
debug);

// the public interface is thread safe
return allocator;
Expand Down
Loading