From 6bfad55351fb25e9ba98ae953e0dc07837ede98f Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Fri, 28 Jun 2024 16:34:49 +0200 Subject: [PATCH 1/2] Add debugging capabilities to the CachingAllocator Extend the CachingAllocator to optionally fill with a configurable value all memory blocks that are: allocated, cached for re-use, re-used, or deallocated. Extend the AlpakaService to configure the host and device CachingAllocators. --- .../interface/AllocatorConfig.h | 50 ++++++-- .../interface/CachingAllocator.h | 111 +++++++++++++----- .../interface/getDeviceCachingAllocator.h | 24 ++-- .../interface/getHostCachingAllocator.h | 13 +- .../src/alpaka/AlpakaService.cc | 82 ++++++++++++- 5 files changed, 214 insertions(+), 66 deletions(-) diff --git a/HeterogeneousCore/AlpakaInterface/interface/AllocatorConfig.h b/HeterogeneousCore/AlpakaInterface/interface/AllocatorConfig.h index adeb7fa37a03a..f8e1deedc46b6 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/AllocatorConfig.h +++ b/HeterogeneousCore/AlpakaInterface/interface/AllocatorConfig.h @@ -6,26 +6,50 @@ namespace cms::alpakatools { - namespace config { + struct AllocatorConfig { + // Bin growth factor (bin_growth in cub::CachingDeviceAllocator) + unsigned int binGrowth = 2; - // bin growth factor (bin_growth in cub::CachingDeviceAllocator) - constexpr unsigned int binGrowth = 2; + // Smallest bin, corresponds to binGrowth^minBin bytes (min_bin in cub::CachingDeviceAllocator + unsigned int minBin = 8; // 256 bytes - // smallest bin, corresponds to binGrowth^minBin bytes (min_bin in cub::CachingDeviceAllocator - constexpr unsigned int minBin = 8; // 256 bytes + // Largest bin, corresponds to binGrowth^maxBin bytes (max_bin in cub::CachingDeviceAllocator). + // Note that unlike in cub, allocations larger than binGrowth^maxBin are set to fail. + unsigned int maxBin = 30; // 1 GB - // largest bin, corresponds to binGrowth^maxBin bytes (max_bin in cub::CachingDeviceAllocator). Note that unlike in cub, allocations larger than binGrowth^maxBin are set to fail. - constexpr unsigned int maxBin = 30; // 1 GB + // Total storage for the allocator; 0 means no limit. + size_t maxCachedBytes = 0; - // total storage for the allocator; 0 means no limit. - constexpr size_t maxCachedBytes = 0; + // Fraction of total device memory taken for the allocator; 0 means no limit. + // If both maxCachedBytes and maxCachedFraction are non-zero, the smallest resulting value is used. + double maxCachedFraction = 0.8; - // fraction of total device memory taken for the allocator; 0 means no limit. - constexpr double maxCachedFraction = 0.8; + // Fill all newly allocated or re-used memory blocks with fillAllocationValue. + bool fillAllocations = false; - // if both maxCachedBytes and maxCachedFraction are non-zero, the smallest resulting value is used. + // Fill only the re-used memory blocks with fillReallocationValue. + // If both fillAllocations and fillReallocations are true, fillAllocationValue is used for newly allocated blocks and fillReallocationValue is used for re-allocated blocks. + bool fillReallocations = false; - } // namespace config + // Fill memory blocks with fillDeallocationValue before freeing or caching them for re-use + bool fillDeallocations = false; + + // Fill memory blocks with fillCacheValue before caching them for re-use. + // If both fillDeallocations and fillCaches are true, fillDeallocationValue is used for blocks about to be freed and fillCacheValue is used for blocks about to be cached. + bool fillCaches = false; + + // Byte value used to fill all newly allocated or re-used memory blocks + uint8_t fillAllocationValue = 0xA5; + + // Byte value used to fill all re-used memory blocks + uint8_t fillReallocationValue = 0x69; + + // Byte value used to fill all deallocated or cached memory blocks + uint8_t fillDeallocationValue = 0x5A; + + // Byte value used to fill all cached memory blocks + uint8_t fillCacheValue = 0x96; + }; } // namespace cms::alpakatools diff --git a/HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h b/HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h index 2560361e796ef..19b4c5c466d09 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h +++ b/HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h @@ -16,7 +16,7 @@ #include #include "HeterogeneousCore/AlpakaInterface/interface/devices.h" -#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" +#include "HeterogeneousCore/AlpakaInterface/interface/AllocatorConfig.h" #include "HeterogeneousCore/AlpakaInterface/interface/AlpakaServiceFwd.h" // Inspired by cub::CachingDeviceAllocator @@ -37,18 +37,20 @@ namespace cms::alpakatools { return power; } - // format a memory size in B/kB/MB/GB + // format a memory size in B/KiB/MiB/GiB/TiB inline std::string as_bytes(size_t value) { if (value == std::numeric_limits::max()) { return "unlimited"; - } else if (value >= (1 << 30) and value % (1 << 30) == 0) { - return std::to_string(value >> 30) + " GB"; - } else if (value >= (1 << 20) and value % (1 << 20) == 0) { - return std::to_string(value >> 20) + " MB"; - } else if (value >= (1 << 10) and value % (1 << 10) == 0) { - return std::to_string(value >> 10) + " kB"; + } else if (value >= (1ul << 40) and value % (1ul << 40) == 0) { + return std::to_string(value >> 40) + " TiB"; + } else if (value >= (1ul << 30) and value % (1ul << 30) == 0) { + return std::to_string(value >> 30) + " GiB"; + } else if (value >= (1ul << 20) and value % (1ul << 20) == 0) { + return std::to_string(value >> 20) + " MiB"; + } else if (value >= (1ul << 10) and value % (1ul << 10) == 0) { + return std::to_string(value >> 10) + " KiB"; } else { - return std::to_string(value) + " B"; + return std::to_string(value) + " B"; } } @@ -119,27 +121,27 @@ namespace cms::alpakatools { explicit CachingAllocator( Device const& device, - unsigned int binGrowth, // bin growth factor; - unsigned int minBin, // smallest bin, corresponds to binGrowth^minBin bytes; - // smaller allocations are rounded to this value; - unsigned int maxBin, // largest bin, corresponds to binGrowth^maxBin bytes; - // larger allocations will fail; - size_t maxCachedBytes, // total storage for the allocator (0 means no limit); - double maxCachedFraction, // fraction of total device memory taken for the allocator (0 means no limit); - // if both maxCachedBytes and maxCachedFraction are non-zero, - // the smallest resulting value is used. + AllocatorConfig const& config, bool reuseSameQueueAllocations, // reuse non-ready allocations if they are in the same queue as the new one; // this is safe only if all memory operations are scheduled in the same queue - bool debug) + bool debug = false) : device_(device), - binGrowth_(binGrowth), - minBin_(minBin), - maxBin_(maxBin), - minBinBytes_(detail::power(binGrowth, minBin)), - maxBinBytes_(detail::power(binGrowth, maxBin)), - maxCachedBytes_(cacheSize(maxCachedBytes, maxCachedFraction)), + binGrowth_(config.binGrowth), + minBin_(config.minBin), + maxBin_(config.maxBin), + minBinBytes_(detail::power(binGrowth_, minBin_)), + maxBinBytes_(detail::power(binGrowth_, maxBin_)), + maxCachedBytes_(cacheSize(config.maxCachedBytes, config.maxCachedFraction)), reuseSameQueueAllocations_(reuseSameQueueAllocations), - debug_(debug) { + debug_(debug), + fillAllocations_(config.fillAllocations), + fillAllocationValue_(config.fillAllocationValue), + fillReallocations_(config.fillReallocations), + fillReallocationValue_(config.fillReallocationValue), + fillDeallocations_(config.fillDeallocations), + fillDeallocationValue_(config.fillDeallocationValue), + fillCaches_(config.fillCaches), + fillCacheValue_(config.fillCacheValue) { if (debug_) { std::ostringstream out; out << "CachingAllocator settings\n" @@ -148,7 +150,7 @@ namespace cms::alpakatools { << " max bin " << maxBin_ << "\n" << " resulting bins:\n"; for (auto bin = minBin_; bin <= maxBin_; ++bin) { - auto binSize = detail::power(binGrowth, bin); + auto binSize = detail::power(binGrowth_, bin); out << " " << std::right << std::setw(12) << detail::as_bytes(binSize) << '\n'; } out << " maximum amount of cached memory: " << detail::as_bytes(maxCachedBytes_); @@ -182,8 +184,19 @@ namespace cms::alpakatools { std::tie(block.bin, block.bytes) = findBin(bytes); // try to re-use a cached block, or allocate a new buffer - if (not tryReuseCachedBlock(block)) { + if (tryReuseCachedBlock(block)) { + // fill the re-used memory block with a pattern + if (fillReallocations_) { + alpaka::memset(*block.queue, *block.buffer, fillReallocationValue_); + } else if (fillAllocations_) { + alpaka::memset(*block.queue, *block.buffer, fillAllocationValue_); + } + } else { allocateNewBlock(block); + // fill the newly allocated memory block with a pattern + if (fillAllocations_) { + alpaka::memset(*block.queue, *block.buffer, fillAllocationValue_); + } } return block.buffer->data(); @@ -215,11 +228,18 @@ namespace cms::alpakatools { // because of multiple exceptions it is best to ignore these // errors. try { + // fill memory blocks with a pattern before caching them + if (fillCaches_) { + alpaka::memset(*block.queue, *block.buffer, fillCacheValue_); + } else if (fillDeallocations_) { + alpaka::memset(*block.queue, *block.buffer, fillDeallocationValue_); + } + // record in the block a marker associated to the work queue alpaka::enqueue(*(block.queue), *(block.event)); } catch (std::exception& e) { if (debug_) { std::ostringstream out; - out << "CachingAllocator::free() error from alpaka::enqueue(): " << e.what() << "\n"; + out << "CachingAllocator::free() caught an alpaka error: " << e.what() << "\n"; out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed " << block.bytes << " bytes at " << ptr << " from associated queue " << block.queue->m_spQueueImpl.get() << ", event " << block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size() @@ -244,6 +264,30 @@ namespace cms::alpakatools { std::cout << out.str() << std::endl; } } else { + // If the memset fails, very likely an error has occurred in the + // asynchronous processing. In that case the error will show up in all + // device API function calls, and the free() will be called by + // destructors during stack unwinding. In order to avoid terminate() + // being called because of multiple exceptions it is best to ignore + // these errors. + try { + // fill memory blocks with a pattern before freeing them + if (fillDeallocations_) { + alpaka::memset(*block.queue, *block.buffer, fillDeallocationValue_); + } + } catch (std::exception& e) { + if (debug_) { + std::ostringstream out; + out << "CachingAllocator::free() caught an alpaka error: " << e.what() << "\n"; + out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed " << block.bytes << " bytes at " + << ptr << " from associated queue " << block.queue->m_spQueueImpl.get() << ", event " + << block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size() + << " available blocks cached (" << cachedBytes_.free << " bytes), " << liveBlocks_.size() + << " live blocks (" << cachedBytes_.live << " bytes) outstanding." << std::endl; + std::cout << out.str() << std::endl; + } + return; + } // if the buffer is not recached, it is automatically freed when block goes out of scope if (debug_) { std::ostringstream out; @@ -452,6 +496,15 @@ namespace cms::alpakatools { const bool reuseSameQueueAllocations_; const bool debug_; + + const bool fillAllocations_; + const uint8_t fillAllocationValue_; + const bool fillReallocations_; + const uint8_t fillReallocationValue_; + const bool fillDeallocations_; + const uint8_t fillDeallocationValue_; + const bool fillCaches_; + const uint8_t fillCacheValue_; }; } // namespace cms::alpakatools diff --git a/HeterogeneousCore/AlpakaInterface/interface/getDeviceCachingAllocator.h b/HeterogeneousCore/AlpakaInterface/interface/getDeviceCachingAllocator.h index fe1e311f46e50..8c21e70634d1f 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/getDeviceCachingAllocator.h +++ b/HeterogeneousCore/AlpakaInterface/interface/getDeviceCachingAllocator.h @@ -19,7 +19,7 @@ namespace cms::alpakatools { template and alpaka::isQueue>> - auto allocate_device_allocators() { + auto allocate_device_allocators(AllocatorConfig const& config, bool debug) { using Allocator = CachingAllocator; auto const& devices = cms::alpakatools::devices>(); ssize_t const size = devices.size(); @@ -38,13 +38,9 @@ namespace cms::alpakatools { #endif ptr + index, devices[index], - config::binGrowth, - config::minBin, - config::maxBin, - config::maxCachedBytes, - config::maxCachedFraction, - true, // reuseSameQueueAllocations - false); // debug + config, + true, // reuseSameQueueAllocations + debug); } } catch (...) { --index; @@ -60,11 +56,11 @@ namespace cms::alpakatools { } // use a custom deleter to destroy all objects and deallocate the memory - auto deleter = [size](Allocator* ptr) { + auto deleter = [size](Allocator* allocators) { for (size_t i = size; i > 0; --i) { - std::destroy_at(ptr + i - 1); + std::destroy_at(allocators + i - 1); } - std::allocator().deallocate(ptr, size); + std::allocator().deallocate(allocators, size); }; return std::unique_ptr(ptr, deleter); @@ -75,9 +71,11 @@ namespace cms::alpakatools { template and alpaka::isQueue>> - inline CachingAllocator& getDeviceCachingAllocator(TDev const& device) { + inline CachingAllocator& getDeviceCachingAllocator(TDev const& device, + AllocatorConfig const& config = AllocatorConfig{}, + bool debug = false) { // initialise all allocators, one per device - CMS_THREAD_SAFE static auto allocators = detail::allocate_device_allocators(); + CMS_THREAD_SAFE static auto allocators = detail::allocate_device_allocators(config, debug); size_t const index = alpaka::getNativeHandle(device); assert(index < cms::alpakatools::devices>().size()); diff --git a/HeterogeneousCore/AlpakaInterface/interface/getHostCachingAllocator.h b/HeterogeneousCore/AlpakaInterface/interface/getHostCachingAllocator.h index 0950906f67737..638d9b4a9865f 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/getHostCachingAllocator.h +++ b/HeterogeneousCore/AlpakaInterface/interface/getHostCachingAllocator.h @@ -13,17 +13,14 @@ namespace cms::alpakatools { template >> - inline CachingAllocator& getHostCachingAllocator() { + inline CachingAllocator& getHostCachingAllocator( + AllocatorConfig const& config = AllocatorConfig{}, bool debug = false) { // thread safe initialisation of the host allocator CMS_THREAD_SAFE static CachingAllocator allocator( host(), - config::binGrowth, - config::minBin, - config::maxBin, - config::maxCachedBytes, - config::maxCachedFraction, - false, // reuseSameQueueAllocations - false); // debug + config, + false, // reuseSameQueueAllocations + debug); // the public interface is thread safe return allocator; diff --git a/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc b/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc index fbc0777c03b99..203bea265c1dd 100644 --- a/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc +++ b/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc @@ -24,6 +24,75 @@ #include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h" #endif // ALPAKA_ACC_GPU_HIP_ENABLED +namespace { + + // Note: we cannot use "uint64_t" with the ParameterSet-related functions, because the template specialisations expect "unsigned long long", while "uint64_t" expands to "unsigned long". + + edm::ParameterSetDescription createAllocatorConfig( + cms::alpakatools::AllocatorConfig const& alloc = cms::alpakatools::AllocatorConfig{}) { + edm::ParameterSetDescription desc; + desc.addUntracked("binGrowth", alloc.binGrowth) + ->setComment("Bin growth factor (bin_growth in cub::CachingDeviceAllocator)"); + desc.addUntracked("minBin", alloc.minBin) + ->setComment( + "Smallest bin, corresponds to binGrowth^minBin bytes (min_bin in cub::CachingDeviceAllocator).\n8 " + "corresponds to 256 bytes."); + desc.addUntracked("maxBin", alloc.maxBin) + ->setComment( + "Largest bin, corresponds to binGrowth^maxBin bytes (max_bin in cub::CachingDeviceAllocator).\n30 " + "corresponds do 1 GiB.\nNote that unlike in cub, allocations larger than binGrowth^maxBin are set to " + "fail."); + desc.addUntracked("maxCachedBytes", alloc.maxCachedBytes) + ->setComment("Total storage for the allocator; 0 means no limit."); + desc.addUntracked("maxCachedFraction", alloc.maxCachedFraction) + ->setComment( + "Fraction of total device memory taken for the allocator; 0 means no limit.\nIf both maxCachedBytes and " + "maxCachedFraction are non-zero, the smallest resulting value is used."); + desc.addUntracked("fillAllocations", alloc.fillAllocations) + ->setComment("Fill all newly allocated or re-used memory blocks with fillAllocationValue."); + desc.addUntracked("fillAllocationValue", alloc.fillAllocationValue) + ->setComment("Byte value used to fill all newly allocated or re-used memory blocks"); + desc.addUntracked("fillReallocations", alloc.fillReallocations) + ->setComment( + "Fill only the re-used memory blocks with fillReallocationValue.\nIf both fillAllocations and " + "fillReallocations are true, fillAllocationValue is used for newly allocated blocks and " + "fillReallocationValue is used for re-allocated blocks."); + desc.addUntracked("fillReallocationValue", alloc.fillReallocationValue) + ->setComment("Byte value used to fill all re-used memory blocks"); + desc.addUntracked("fillDeallocations", alloc.fillDeallocations) + ->setComment("Fill memory blocks with fillDeallocationValue before freeing or caching them for re-use"); + desc.addUntracked("fillDeallocationValue", alloc.fillDeallocationValue) + ->setComment("Byte value used to fill all deallocated or cached memory blocks"); + desc.addUntracked("fillCaches", alloc.fillCaches) + ->setComment( + "Fill memory blocks with fillCacheValue before caching them for re-use.\nIf both fillDeallocations and " + "fillCaches are true, fillDeallocationValue is used for blocks about to be freed and fillCacheValue is " + "used for blocks about to be cached."); + desc.addUntracked("fillCacheValue", alloc.fillCacheValue) + ->setComment("Byte value used to fill all cached memory blocks"); + return desc; + } + + cms::alpakatools::AllocatorConfig parseAllocatorConfig(edm::ParameterSet const& config) { + cms::alpakatools::AllocatorConfig alloc; + alloc.binGrowth = config.getUntrackedParameter("binGrowth"); + alloc.minBin = config.getUntrackedParameter("minBin"); + alloc.maxBin = config.getUntrackedParameter("maxBin"); + alloc.maxCachedBytes = config.getUntrackedParameter("maxCachedBytes"); + alloc.maxCachedFraction = config.getUntrackedParameter("maxCachedFraction"); + alloc.fillAllocations = config.getUntrackedParameter("fillAllocations"); + alloc.fillAllocationValue = static_cast(config.getUntrackedParameter("fillAllocationValue")); + alloc.fillReallocations = config.getUntrackedParameter("fillReallocations"); + alloc.fillReallocationValue = static_cast(config.getUntrackedParameter("fillReallocationValue")); + alloc.fillDeallocations = config.getUntrackedParameter("fillDeallocations"); + alloc.fillDeallocationValue = static_cast(config.getUntrackedParameter("fillDeallocationValue")); + alloc.fillCaches = config.getUntrackedParameter("fillCaches"); + alloc.fillCacheValue = static_cast(config.getUntrackedParameter("fillCacheValue")); + return alloc; + } + +} // namespace + namespace ALPAKA_ACCELERATOR_NAMESPACE { AlpakaService::AlpakaService(edm::ParameterSet const& config, edm::ActivityRegistry&) @@ -39,7 +108,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { #endif // ALPAKA_ACC_GPU_HIP_ENABLED // TODO from Andrea Bocci: - // - handle alpaka caching allocators ? // - extract and print more information about the platform and devices if (not enabled_) { @@ -88,9 +156,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { cms::alpakatools::getEventCache().clear(); // initialise the caching memory allocators - cms::alpakatools::getHostCachingAllocator(); + cms::alpakatools::AllocatorConfig hostAllocatorConfig = + parseAllocatorConfig(config.getUntrackedParameter("hostAllocator")); + cms::alpakatools::getHostCachingAllocator(hostAllocatorConfig, verbose_); + cms::alpakatools::AllocatorConfig deviceAllocatorConfig = + parseAllocatorConfig(config.getUntrackedParameter("deviceAllocator")); for (auto const& device : devices) - cms::alpakatools::getDeviceCachingAllocator(device); + cms::alpakatools::getDeviceCachingAllocator(device, deviceAllocatorConfig, verbose_); } AlpakaService::~AlpakaService() { @@ -108,6 +180,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { edm::ParameterSetDescription desc; desc.addUntracked("enabled", true); desc.addUntracked("verbose", false); + desc.addUntracked("hostAllocator", createAllocatorConfig()) + ->setComment("Configuration for the host's CachingAllocator"); + desc.addUntracked("deviceAllocator", createAllocatorConfig()) + ->setComment("Configuration for the devices' CachingAllocator"); descriptions.add(ALPAKA_TYPE_ALIAS_NAME(AlpakaService), desc); } From 3830ca70e89274b7374835313f6ebe43644dc6bb Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Fri, 28 Jun 2024 16:37:19 +0200 Subject: [PATCH 2/2] Add a simple test to load the AlpakaService --- .../AlpakaServices/test/BuildFile.xml | 18 ++++++ .../test/testAlpakaServiceCudaAsync.py | 55 +++++++++++++++++++ .../test/testAlpakaServiceROCmAsync.py | 55 +++++++++++++++++++ .../test/testAlpakaServiceSerialSync.py | 52 ++++++++++++++++++ 4 files changed, 180 insertions(+) create mode 100644 HeterogeneousCore/AlpakaServices/test/BuildFile.xml create mode 100644 HeterogeneousCore/AlpakaServices/test/testAlpakaServiceCudaAsync.py create mode 100644 HeterogeneousCore/AlpakaServices/test/testAlpakaServiceROCmAsync.py create mode 100644 HeterogeneousCore/AlpakaServices/test/testAlpakaServiceSerialSync.py diff --git a/HeterogeneousCore/AlpakaServices/test/BuildFile.xml b/HeterogeneousCore/AlpakaServices/test/BuildFile.xml new file mode 100644 index 0000000000000..0e99074d1259a --- /dev/null +++ b/HeterogeneousCore/AlpakaServices/test/BuildFile.xml @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + diff --git a/HeterogeneousCore/AlpakaServices/test/testAlpakaServiceCudaAsync.py b/HeterogeneousCore/AlpakaServices/test/testAlpakaServiceCudaAsync.py new file mode 100644 index 0000000000000..a0e82995977d3 --- /dev/null +++ b/HeterogeneousCore/AlpakaServices/test/testAlpakaServiceCudaAsync.py @@ -0,0 +1,55 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process( "TEST" ) + +process.options = cms.untracked.PSet( + numberOfThreads = cms.untracked.uint32( 4 ), + numberOfStreams = cms.untracked.uint32( 0 ), +) + +process.load('FWCore.MessageService.MessageLogger_cfi') +process.MessageLogger.CUDAService = {} +process.MessageLogger.AlpakaService = {} + +process.load('HeterogeneousCore.CUDAServices.CUDAService_cfi') + +from HeterogeneousCore.AlpakaServices.AlpakaServiceCudaAsync_cfi import AlpakaServiceCudaAsync as _AlpakaServiceCudaAsync +process.AlpakaServiceCudaAsync = _AlpakaServiceCudaAsync.clone( + verbose = True, + hostAllocator = dict( + binGrowth = 2, + minBin = 8, # 256 bytes + maxBin = 30, # 1 GB + maxCachedBytes = 64*1024*1024*1024, # 64 GB + maxCachedFraction = 0.8, # or 80%, whatever is less + fillAllocations = True, + fillAllocationValue = 0xA5, + fillReallocations = True, + fillReallocationValue = 0x69, + fillDeallocations = True, + fillDeallocationValue = 0x5A, + fillCaches = True, + fillCacheValue = 0x96 + ), + deviceAllocator = dict( + binGrowth = 2, + minBin = 8, # 256 bytes + maxBin = 30, # 1 GB + maxCachedBytes = 8*1024*1024*1024, # 8 GB + maxCachedFraction = 0.8, # or 80%, whatever is less + fillAllocations = True, + fillAllocationValue = 0xA5, + fillReallocations = True, + fillReallocationValue = 0x69, + fillDeallocations = True, + fillDeallocationValue = 0x5A, + fillCaches = True, + fillCacheValue = 0x96 + ) +) + +process.source = cms.Source("EmptySource") + +process.maxEvents = cms.untracked.PSet( + input = cms.untracked.int32( 0 ) +) diff --git a/HeterogeneousCore/AlpakaServices/test/testAlpakaServiceROCmAsync.py b/HeterogeneousCore/AlpakaServices/test/testAlpakaServiceROCmAsync.py new file mode 100644 index 0000000000000..2f1eac0141a72 --- /dev/null +++ b/HeterogeneousCore/AlpakaServices/test/testAlpakaServiceROCmAsync.py @@ -0,0 +1,55 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process( "TEST" ) + +process.options = cms.untracked.PSet( + numberOfThreads = cms.untracked.uint32( 4 ), + numberOfStreams = cms.untracked.uint32( 0 ), +) + +process.load('FWCore.MessageService.MessageLogger_cfi') +process.MessageLogger.ROCmService = {} +process.MessageLogger.AlpakaService = {} + +process.load('HeterogeneousCore.ROCmServices.ROCmService_cfi') + +from HeterogeneousCore.AlpakaServices.AlpakaServiceROCmAsync_cfi import AlpakaServiceROCmAsync as _AlpakaServiceROCmAsync +process.AlpakaServiceROCmAsync = _AlpakaServiceROCmAsync.clone( + verbose = True, + hostAllocator = cms.untracked.PSet( + binGrowth = cms.untracked.uint32(2), + minBin = cms.untracked.uint32(8), + maxBin = cms.untracked.uint32(30), + maxCachedBytes = cms.untracked.uint64(0), + maxCachedFraction = cms.untracked.double(0.8), + fillAllocations = cms.untracked.bool(True), + fillAllocationValue = cms.untracked.uint32(165), + fillReallocations = cms.untracked.bool(True), + fillReallocationValue = cms.untracked.uint32(90), + fillDeallocations = cms.untracked.bool(True), + fillDeallocationValue = cms.untracked.uint32(105), + fillCaches = cms.untracked.bool(True), + fillCacheValue = cms.untracked.uint32(150) + ), + deviceAllocator = cms.untracked.PSet( + binGrowth = cms.untracked.uint32(2), + minBin = cms.untracked.uint32(8), + maxBin = cms.untracked.uint32(30), + maxCachedBytes = cms.untracked.uint64(0), + maxCachedFraction = cms.untracked.double(0.8), + fillAllocations = cms.untracked.bool(True), + fillAllocationValue = cms.untracked.uint32(165), + fillReallocations = cms.untracked.bool(True), + fillReallocationValue = cms.untracked.uint32(90), + fillDeallocations = cms.untracked.bool(True), + fillDeallocationValue = cms.untracked.uint32(105), + fillCaches = cms.untracked.bool(True), + fillCacheValue = cms.untracked.uint32(150) + ) +) + +process.source = cms.Source("EmptySource") + +process.maxEvents = cms.untracked.PSet( + input = cms.untracked.int32( 0 ) +) diff --git a/HeterogeneousCore/AlpakaServices/test/testAlpakaServiceSerialSync.py b/HeterogeneousCore/AlpakaServices/test/testAlpakaServiceSerialSync.py new file mode 100644 index 0000000000000..51e9d9f5f1e8d --- /dev/null +++ b/HeterogeneousCore/AlpakaServices/test/testAlpakaServiceSerialSync.py @@ -0,0 +1,52 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process( "TEST" ) + +process.options = cms.untracked.PSet( + numberOfThreads = cms.untracked.uint32( 4 ), + numberOfStreams = cms.untracked.uint32( 0 ), +) + +process.load('FWCore.MessageService.MessageLogger_cfi') +process.MessageLogger.AlpakaService = {} + +from HeterogeneousCore.AlpakaServices.AlpakaServiceSerialSync_cfi import AlpakaServiceSerialSync as _AlpakaServiceSerialSync +process.AlpakaServiceSerialSync = _AlpakaServiceSerialSync.clone( + verbose = True, + hostAllocator = cms.untracked.PSet( + binGrowth = cms.untracked.uint32(2), + minBin = cms.untracked.uint32(8), + maxBin = cms.untracked.uint32(30), + maxCachedBytes = cms.untracked.uint64(0), + maxCachedFraction = cms.untracked.double(0.8), + fillAllocations = cms.untracked.bool(True), + fillAllocationValue = cms.untracked.uint32(165), + fillReallocations = cms.untracked.bool(True), + fillReallocationValue = cms.untracked.uint32(90), + fillDeallocations = cms.untracked.bool(True), + fillDeallocationValue = cms.untracked.uint32(105), + fillCaches = cms.untracked.bool(True), + fillCacheValue = cms.untracked.uint32(150) + ), + deviceAllocator = cms.untracked.PSet( + binGrowth = cms.untracked.uint32(2), + minBin = cms.untracked.uint32(8), + maxBin = cms.untracked.uint32(30), + maxCachedBytes = cms.untracked.uint64(0), + maxCachedFraction = cms.untracked.double(0.8), + fillAllocations = cms.untracked.bool(True), + fillAllocationValue = cms.untracked.uint32(165), + fillReallocations = cms.untracked.bool(True), + fillReallocationValue = cms.untracked.uint32(90), + fillDeallocations = cms.untracked.bool(True), + fillDeallocationValue = cms.untracked.uint32(105), + fillCaches = cms.untracked.bool(True), + fillCacheValue = cms.untracked.uint32(150) + ) +) + +process.source = cms.Source("EmptySource") + +process.maxEvents = cms.untracked.PSet( + input = cms.untracked.int32( 0 ) +)