Skip to content

Commit

Permalink
Merge pull request #45342 from fwyzard/Alpaka_CachingAllocator_debug_…
Browse files Browse the repository at this point in the history
…140x

Add debugging capabilities to the CachingAllocator [14.0.x]
  • Loading branch information
cmsbuild authored Jul 2, 2024
2 parents 23c4792 + 3830ca7 commit 683d65b
Show file tree
Hide file tree
Showing 9 changed files with 394 additions and 66 deletions.
50 changes: 37 additions & 13 deletions HeterogeneousCore/AlpakaInterface/interface/AllocatorConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,50 @@

namespace cms::alpakatools {

namespace config {
struct AllocatorConfig {
// Bin growth factor (bin_growth in cub::CachingDeviceAllocator)
unsigned int binGrowth = 2;

// bin growth factor (bin_growth in cub::CachingDeviceAllocator)
constexpr unsigned int binGrowth = 2;
// Smallest bin, corresponds to binGrowth^minBin bytes (min_bin in cub::CachingDeviceAllocator
unsigned int minBin = 8; // 256 bytes

// smallest bin, corresponds to binGrowth^minBin bytes (min_bin in cub::CachingDeviceAllocator
constexpr unsigned int minBin = 8; // 256 bytes
// Largest bin, corresponds to binGrowth^maxBin bytes (max_bin in cub::CachingDeviceAllocator).
// Note that unlike in cub, allocations larger than binGrowth^maxBin are set to fail.
unsigned int maxBin = 30; // 1 GB

// largest bin, corresponds to binGrowth^maxBin bytes (max_bin in cub::CachingDeviceAllocator). Note that unlike in cub, allocations larger than binGrowth^maxBin are set to fail.
constexpr unsigned int maxBin = 30; // 1 GB
// Total storage for the allocator; 0 means no limit.
size_t maxCachedBytes = 0;

// total storage for the allocator; 0 means no limit.
constexpr size_t maxCachedBytes = 0;
// Fraction of total device memory taken for the allocator; 0 means no limit.
// If both maxCachedBytes and maxCachedFraction are non-zero, the smallest resulting value is used.
double maxCachedFraction = 0.8;

// fraction of total device memory taken for the allocator; 0 means no limit.
constexpr double maxCachedFraction = 0.8;
// Fill all newly allocated or re-used memory blocks with fillAllocationValue.
bool fillAllocations = false;

// if both maxCachedBytes and maxCachedFraction are non-zero, the smallest resulting value is used.
// Fill only the re-used memory blocks with fillReallocationValue.
// If both fillAllocations and fillReallocations are true, fillAllocationValue is used for newly allocated blocks and fillReallocationValue is used for re-allocated blocks.
bool fillReallocations = false;

} // namespace config
// Fill memory blocks with fillDeallocationValue before freeing or caching them for re-use
bool fillDeallocations = false;

// Fill memory blocks with fillCacheValue before caching them for re-use.
// If both fillDeallocations and fillCaches are true, fillDeallocationValue is used for blocks about to be freed and fillCacheValue is used for blocks about to be cached.
bool fillCaches = false;

// Byte value used to fill all newly allocated or re-used memory blocks
uint8_t fillAllocationValue = 0xA5;

// Byte value used to fill all re-used memory blocks
uint8_t fillReallocationValue = 0x69;

// Byte value used to fill all deallocated or cached memory blocks
uint8_t fillDeallocationValue = 0x5A;

// Byte value used to fill all cached memory blocks
uint8_t fillCacheValue = 0x96;
};

} // namespace cms::alpakatools

Expand Down
111 changes: 82 additions & 29 deletions HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <alpaka/alpaka.hpp>

#include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
#include "HeterogeneousCore/AlpakaInterface/interface/traits.h"
#include "HeterogeneousCore/AlpakaInterface/interface/AllocatorConfig.h"
#include "HeterogeneousCore/AlpakaInterface/interface/AlpakaServiceFwd.h"

// Inspired by cub::CachingDeviceAllocator
Expand All @@ -37,18 +37,20 @@ namespace cms::alpakatools {
return power;
}

// format a memory size in B/kB/MB/GB
// format a memory size in B/KiB/MiB/GiB/TiB
inline std::string as_bytes(size_t value) {
if (value == std::numeric_limits<size_t>::max()) {
return "unlimited";
} else if (value >= (1 << 30) and value % (1 << 30) == 0) {
return std::to_string(value >> 30) + " GB";
} else if (value >= (1 << 20) and value % (1 << 20) == 0) {
return std::to_string(value >> 20) + " MB";
} else if (value >= (1 << 10) and value % (1 << 10) == 0) {
return std::to_string(value >> 10) + " kB";
} else if (value >= (1ul << 40) and value % (1ul << 40) == 0) {
return std::to_string(value >> 40) + " TiB";
} else if (value >= (1ul << 30) and value % (1ul << 30) == 0) {
return std::to_string(value >> 30) + " GiB";
} else if (value >= (1ul << 20) and value % (1ul << 20) == 0) {
return std::to_string(value >> 20) + " MiB";
} else if (value >= (1ul << 10) and value % (1ul << 10) == 0) {
return std::to_string(value >> 10) + " KiB";
} else {
return std::to_string(value) + " B";
return std::to_string(value) + " B";
}
}

Expand Down Expand Up @@ -119,27 +121,27 @@ namespace cms::alpakatools {

explicit CachingAllocator(
Device const& device,
unsigned int binGrowth, // bin growth factor;
unsigned int minBin, // smallest bin, corresponds to binGrowth^minBin bytes;
// smaller allocations are rounded to this value;
unsigned int maxBin, // largest bin, corresponds to binGrowth^maxBin bytes;
// larger allocations will fail;
size_t maxCachedBytes, // total storage for the allocator (0 means no limit);
double maxCachedFraction, // fraction of total device memory taken for the allocator (0 means no limit);
// if both maxCachedBytes and maxCachedFraction are non-zero,
// the smallest resulting value is used.
AllocatorConfig const& config,
bool reuseSameQueueAllocations, // reuse non-ready allocations if they are in the same queue as the new one;
// this is safe only if all memory operations are scheduled in the same queue
bool debug)
bool debug = false)
: device_(device),
binGrowth_(binGrowth),
minBin_(minBin),
maxBin_(maxBin),
minBinBytes_(detail::power(binGrowth, minBin)),
maxBinBytes_(detail::power(binGrowth, maxBin)),
maxCachedBytes_(cacheSize(maxCachedBytes, maxCachedFraction)),
binGrowth_(config.binGrowth),
minBin_(config.minBin),
maxBin_(config.maxBin),
minBinBytes_(detail::power(binGrowth_, minBin_)),
maxBinBytes_(detail::power(binGrowth_, maxBin_)),
maxCachedBytes_(cacheSize(config.maxCachedBytes, config.maxCachedFraction)),
reuseSameQueueAllocations_(reuseSameQueueAllocations),
debug_(debug) {
debug_(debug),
fillAllocations_(config.fillAllocations),
fillAllocationValue_(config.fillAllocationValue),
fillReallocations_(config.fillReallocations),
fillReallocationValue_(config.fillReallocationValue),
fillDeallocations_(config.fillDeallocations),
fillDeallocationValue_(config.fillDeallocationValue),
fillCaches_(config.fillCaches),
fillCacheValue_(config.fillCacheValue) {
if (debug_) {
std::ostringstream out;
out << "CachingAllocator settings\n"
Expand All @@ -148,7 +150,7 @@ namespace cms::alpakatools {
<< " max bin " << maxBin_ << "\n"
<< " resulting bins:\n";
for (auto bin = minBin_; bin <= maxBin_; ++bin) {
auto binSize = detail::power(binGrowth, bin);
auto binSize = detail::power(binGrowth_, bin);
out << " " << std::right << std::setw(12) << detail::as_bytes(binSize) << '\n';
}
out << " maximum amount of cached memory: " << detail::as_bytes(maxCachedBytes_);
Expand Down Expand Up @@ -182,8 +184,19 @@ namespace cms::alpakatools {
std::tie(block.bin, block.bytes) = findBin(bytes);

// try to re-use a cached block, or allocate a new buffer
if (not tryReuseCachedBlock(block)) {
if (tryReuseCachedBlock(block)) {
// fill the re-used memory block with a pattern
if (fillReallocations_) {
alpaka::memset(*block.queue, *block.buffer, fillReallocationValue_);
} else if (fillAllocations_) {
alpaka::memset(*block.queue, *block.buffer, fillAllocationValue_);
}
} else {
allocateNewBlock(block);
// fill the newly allocated memory block with a pattern
if (fillAllocations_) {
alpaka::memset(*block.queue, *block.buffer, fillAllocationValue_);
}
}

return block.buffer->data();
Expand Down Expand Up @@ -215,11 +228,18 @@ namespace cms::alpakatools {
// because of multiple exceptions it is best to ignore these
// errors.
try {
// fill memory blocks with a pattern before caching them
if (fillCaches_) {
alpaka::memset(*block.queue, *block.buffer, fillCacheValue_);
} else if (fillDeallocations_) {
alpaka::memset(*block.queue, *block.buffer, fillDeallocationValue_);
}
// record in the block a marker associated to the work queue
alpaka::enqueue(*(block.queue), *(block.event));
} catch (std::exception& e) {
if (debug_) {
std::ostringstream out;
out << "CachingAllocator::free() error from alpaka::enqueue(): " << e.what() << "\n";
out << "CachingAllocator::free() caught an alpaka error: " << e.what() << "\n";
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed " << block.bytes << " bytes at "
<< ptr << " from associated queue " << block.queue->m_spQueueImpl.get() << ", event "
<< block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size()
Expand All @@ -244,6 +264,30 @@ namespace cms::alpakatools {
std::cout << out.str() << std::endl;
}
} else {
// If the memset fails, very likely an error has occurred in the
// asynchronous processing. In that case the error will show up in all
// device API function calls, and the free() will be called by
// destructors during stack unwinding. In order to avoid terminate()
// being called because of multiple exceptions it is best to ignore
// these errors.
try {
// fill memory blocks with a pattern before freeing them
if (fillDeallocations_) {
alpaka::memset(*block.queue, *block.buffer, fillDeallocationValue_);
}
} catch (std::exception& e) {
if (debug_) {
std::ostringstream out;
out << "CachingAllocator::free() caught an alpaka error: " << e.what() << "\n";
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed " << block.bytes << " bytes at "
<< ptr << " from associated queue " << block.queue->m_spQueueImpl.get() << ", event "
<< block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size()
<< " available blocks cached (" << cachedBytes_.free << " bytes), " << liveBlocks_.size()
<< " live blocks (" << cachedBytes_.live << " bytes) outstanding." << std::endl;
std::cout << out.str() << std::endl;
}
return;
}
// if the buffer is not recached, it is automatically freed when block goes out of scope
if (debug_) {
std::ostringstream out;
Expand Down Expand Up @@ -452,6 +496,15 @@ namespace cms::alpakatools {

const bool reuseSameQueueAllocations_;
const bool debug_;

const bool fillAllocations_;
const uint8_t fillAllocationValue_;
const bool fillReallocations_;
const uint8_t fillReallocationValue_;
const bool fillDeallocations_;
const uint8_t fillDeallocationValue_;
const bool fillCaches_;
const uint8_t fillCacheValue_;
};

} // namespace cms::alpakatools
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ namespace cms::alpakatools {
template <typename TDev,
typename TQueue,
typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
auto allocate_device_allocators() {
auto allocate_device_allocators(AllocatorConfig const& config, bool debug) {
using Allocator = CachingAllocator<TDev, TQueue>;
auto const& devices = cms::alpakatools::devices<alpaka::Platform<TDev>>();
ssize_t const size = devices.size();
Expand All @@ -38,13 +38,9 @@ namespace cms::alpakatools {
#endif
ptr + index,
devices[index],
config::binGrowth,
config::minBin,
config::maxBin,
config::maxCachedBytes,
config::maxCachedFraction,
true, // reuseSameQueueAllocations
false); // debug
config,
true, // reuseSameQueueAllocations
debug);
}
} catch (...) {
--index;
Expand All @@ -60,11 +56,11 @@ namespace cms::alpakatools {
}

// use a custom deleter to destroy all objects and deallocate the memory
auto deleter = [size](Allocator* ptr) {
auto deleter = [size](Allocator* allocators) {
for (size_t i = size; i > 0; --i) {
std::destroy_at(ptr + i - 1);
std::destroy_at(allocators + i - 1);
}
std::allocator<Allocator>().deallocate(ptr, size);
std::allocator<Allocator>().deallocate(allocators, size);
};

return std::unique_ptr<Allocator[], decltype(deleter)>(ptr, deleter);
Expand All @@ -75,9 +71,11 @@ namespace cms::alpakatools {
template <typename TDev,
typename TQueue,
typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
inline CachingAllocator<TDev, TQueue>& getDeviceCachingAllocator(TDev const& device) {
inline CachingAllocator<TDev, TQueue>& getDeviceCachingAllocator(TDev const& device,
AllocatorConfig const& config = AllocatorConfig{},
bool debug = false) {
// initialise all allocators, one per device
CMS_THREAD_SAFE static auto allocators = detail::allocate_device_allocators<TDev, TQueue>();
CMS_THREAD_SAFE static auto allocators = detail::allocate_device_allocators<TDev, TQueue>(config, debug);

size_t const index = alpaka::getNativeHandle(device);
assert(index < cms::alpakatools::devices<alpaka::Platform<TDev>>().size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,14 @@
namespace cms::alpakatools {

template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
inline CachingAllocator<alpaka_common::DevHost, TQueue>& getHostCachingAllocator() {
inline CachingAllocator<alpaka_common::DevHost, TQueue>& getHostCachingAllocator(
AllocatorConfig const& config = AllocatorConfig{}, bool debug = false) {
// thread safe initialisation of the host allocator
CMS_THREAD_SAFE static CachingAllocator<alpaka_common::DevHost, TQueue> allocator(
host(),
config::binGrowth,
config::minBin,
config::maxBin,
config::maxCachedBytes,
config::maxCachedFraction,
false, // reuseSameQueueAllocations
false); // debug
config,
false, // reuseSameQueueAllocations
debug);

// the public interface is thread safe
return allocator;
Expand Down
Loading

0 comments on commit 683d65b

Please sign in to comment.