diff --git a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc index 1b22159a53b88..749073d1f916f 100644 --- a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc +++ b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc @@ -34,7 +34,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template >> ALPAKA_FN_ACC void operator()(TAcc const& acc, reco::ZVertexSoAView zvertex_view) const { if (cms::alpakatools::once_per_grid(acc)) { - ALPAKA_ASSERT_OFFLOAD(zvertex_view.nvFinal() == 420); + ALPAKA_ASSERT_ACC(zvertex_view.nvFinal() == 420); } for (int32_t j : elements_with_stride(acc, zvertex_view.nvFinal())) { diff --git a/HeterogeneousCore/AlpakaCore/interface/EventCache.h b/HeterogeneousCore/AlpakaCore/interface/EventCache.h index 08ed26f7813b7..2701a739222f9 100644 --- a/HeterogeneousCore/AlpakaCore/interface/EventCache.h +++ b/HeterogeneousCore/AlpakaCore/interface/EventCache.h @@ -10,6 +10,7 @@ #include "FWCore/Utilities/interface/ReusableObjectHolder.h" #include "FWCore/Utilities/interface/thread_safety_macros.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" #include "HeterogeneousCore/AlpakaInterface/interface/AlpakaServiceFwd.h" namespace cms::alpakatools { @@ -31,11 +32,11 @@ namespace cms::alpakatools { #endif using Device = alpaka::Dev; - using Platform = alpaka::Pltf; + using Platform = alpaka::Platform; // EventCache should be constructed by the first call to // getEventCache() only if we have any devices present - EventCache() : cache_(alpaka::getDevCount()) {} + EventCache() : cache_(devices().size()) {} // Gets a (cached) event for the current device. The event // will be returned to the cache by the shared_ptr destructor. The @@ -79,7 +80,7 @@ namespace cms::alpakatools { // EventCache lives through multiple tests (and go through // multiple shutdowns of the framework). cache_.clear(); - cache_.resize(alpaka::getDevCount()); + cache_.resize(devices().size()); } std::vector> cache_; diff --git a/HeterogeneousCore/AlpakaCore/interface/QueueCache.h b/HeterogeneousCore/AlpakaCore/interface/QueueCache.h index e2f93d94da933..8415ec7600574 100644 --- a/HeterogeneousCore/AlpakaCore/interface/QueueCache.h +++ b/HeterogeneousCore/AlpakaCore/interface/QueueCache.h @@ -9,6 +9,7 @@ #include "FWCore/Utilities/interface/ReusableObjectHolder.h" #include "FWCore/Utilities/interface/thread_safety_macros.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" #include "HeterogeneousCore/AlpakaInterface/interface/AlpakaServiceFwd.h" namespace cms::alpakatools { @@ -29,12 +30,12 @@ namespace cms::alpakatools { #endif using Device = alpaka::Dev; - using Platform = alpaka::Pltf; + using Platform = alpaka::Platform; public: // QueueCache should be constructed by the first call to // getQueueCache() only if we have any devices present - QueueCache() : cache_(alpaka::getDevCount()) {} + QueueCache() : cache_(devices().size()) {} // Gets a (cached) queue for the current device. The queue // will be returned to the cache by the shared_ptr destructor. @@ -52,7 +53,7 @@ namespace cms::alpakatools { // QueueCache lives through multiple tests (and go through // multiple shutdowns of the framework). cache_.clear(); - cache_.resize(alpaka::getDevCount()); + cache_.resize(devices().size()); } std::vector> cache_; diff --git a/HeterogeneousCore/AlpakaCore/interface/alpaka/ESDeviceProductType.h b/HeterogeneousCore/AlpakaCore/interface/alpaka/ESDeviceProductType.h index 295adde387ed7..7c91b5dffaa5b 100644 --- a/HeterogeneousCore/AlpakaCore/interface/alpaka/ESDeviceProductType.h +++ b/HeterogeneousCore/AlpakaCore/interface/alpaka/ESDeviceProductType.h @@ -13,7 +13,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::detail { */ template struct ESDeviceProductType { - using type = std::conditional_t, + using type = std::conditional_t, // host backends can use TProduct directly TProduct, // all device backends need to be wrapped diff --git a/HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h b/HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h index 05d9bf55b1bdf..de59cc4a6d82c 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h +++ b/HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h @@ -104,7 +104,7 @@ namespace cms::alpakatools { auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); }; return alpaka::BufCudaRt( - dev, reinterpret_cast(memPtr), std::move(deleter), pitchBytes, extent); + dev, reinterpret_cast(memPtr), std::move(deleter), extent, pitchBytes); } }; @@ -180,7 +180,7 @@ namespace cms::alpakatools { auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); }; return alpaka::BufHipRt( - dev, reinterpret_cast(memPtr), std::move(deleter), pitchBytes, extent); + dev, reinterpret_cast(memPtr), std::move(deleter), extent, pitchBytes); } }; diff --git a/HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h b/HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h index e42c0df623f1d..017b13a2c1341 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h +++ b/HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h @@ -15,6 +15,7 @@ #include +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" #include "HeterogeneousCore/AlpakaInterface/interface/traits.h" #include "HeterogeneousCore/AlpakaInterface/interface/AlpakaServiceFwd.h" @@ -337,7 +338,8 @@ namespace cms::alpakatools { return alpaka::allocBuf(device_, bytes); } else if constexpr (std::is_same_v) { // allocate pinned host memory accessible by the queue's platform - return alpaka::allocMappedBuf>, std::byte, size_t>(device_, bytes); + using Platform = alpaka::Platform>; + return alpaka::allocMappedBuf(device_, platform(), bytes); } else { // unsupported combination static_assert(std::is_same_v> or std::is_same_v, diff --git a/HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h b/HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h index 9535abad90c01..b122ccc5a54ee 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h +++ b/HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h @@ -27,10 +27,10 @@ namespace cms::alpakatools { const uint32_t nt = offsets[nh]; for (uint32_t i : uniform_elements(acc, nt)) { auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i); - ALPAKA_ASSERT_OFFLOAD((*off) > 0); + ALPAKA_ASSERT_ACC((*off) > 0); int32_t ih = off - offsets - 1; - ALPAKA_ASSERT_OFFLOAD(ih >= 0); - ALPAKA_ASSERT_OFFLOAD(ih < int(nh)); + ALPAKA_ASSERT_ACC(ih >= 0); + ALPAKA_ASSERT_ACC(ih < int(nh)); h->count(acc, v[i], ih); } } @@ -46,10 +46,10 @@ namespace cms::alpakatools { const uint32_t nt = offsets[nh]; for (uint32_t i : uniform_elements(acc, nt)) { auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i); - ALPAKA_ASSERT_OFFLOAD((*off) > 0); + ALPAKA_ASSERT_ACC((*off) > 0); int32_t ih = off - offsets - 1; - ALPAKA_ASSERT_OFFLOAD(ih >= 0); - ALPAKA_ASSERT_OFFLOAD(ih < int(nh)); + ALPAKA_ASSERT_ACC(ih >= 0); + ALPAKA_ASSERT_ACC(ih < int(nh)); h->fill(acc, v[i], i, ih); } } @@ -102,7 +102,7 @@ namespace cms::alpakatools { int bs = Hist::bin(value); int be = std::min(int(Hist::nbins() - 1), bs + n); bs = std::max(0, bs - n); - ALPAKA_ASSERT_OFFLOAD(be >= bs); + ALPAKA_ASSERT_ACC(be >= bs); for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) { func(*pj); } @@ -113,7 +113,7 @@ namespace cms::alpakatools { ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInWindow(Hist const &hist, V wmin, V wmax, Func const &func) { auto bs = Hist::bin(wmin); auto be = Hist::bin(wmax); - ALPAKA_ASSERT_OFFLOAD(be >= bs); + ALPAKA_ASSERT_ACC(be >= bs); for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) { func(*pj); } @@ -164,36 +164,36 @@ namespace cms::alpakatools { template ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t) { uint32_t b = bin(t); - ALPAKA_ASSERT_OFFLOAD(b < nbins()); + ALPAKA_ASSERT_ACC(b < nbins()); Base::atomicIncrement(acc, this->off[b]); } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j) { uint32_t b = bin(t); - ALPAKA_ASSERT_OFFLOAD(b < nbins()); + ALPAKA_ASSERT_ACC(b < nbins()); auto w = Base::atomicDecrement(acc, this->off[b]); - ALPAKA_ASSERT_OFFLOAD(w > 0); + ALPAKA_ASSERT_ACC(w > 0); this->content[w - 1] = j; } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t, uint32_t nh) { uint32_t b = bin(t); - ALPAKA_ASSERT_OFFLOAD(b < nbins()); + ALPAKA_ASSERT_ACC(b < nbins()); b += histOff(nh); - ALPAKA_ASSERT_OFFLOAD(b < totbins()); + ALPAKA_ASSERT_ACC(b < totbins()); Base::atomicIncrement(acc, this->off[b]); } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j, uint32_t nh) { uint32_t b = bin(t); - ALPAKA_ASSERT_OFFLOAD(b < nbins()); + ALPAKA_ASSERT_ACC(b < nbins()); b += histOff(nh); - ALPAKA_ASSERT_OFFLOAD(b < totbins()); + ALPAKA_ASSERT_ACC(b < totbins()); auto w = Base::atomicDecrement(acc, this->off[b]); - ALPAKA_ASSERT_OFFLOAD(w > 0); + ALPAKA_ASSERT_ACC(w > 0); this->content[w - 1] = j; } }; diff --git a/HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h b/HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h index 866564d3f896e..a914f0989dc88 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h +++ b/HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h @@ -42,15 +42,15 @@ namespace cms::alpakatools { constexpr auto capacity() const { return content.capacity(); } ALPAKA_FN_HOST_ACC void initStorage(View view) { - ALPAKA_ASSERT_OFFLOAD(view.assoc == this); + ALPAKA_ASSERT_ACC(view.assoc == this); if constexpr (ctCapacity() < 0) { - ALPAKA_ASSERT_OFFLOAD(view.contentStorage); - ALPAKA_ASSERT_OFFLOAD(view.contentSize > 0); + ALPAKA_ASSERT_ACC(view.contentStorage); + ALPAKA_ASSERT_ACC(view.contentSize > 0); content.init(view.contentStorage, view.contentSize); } if constexpr (ctNOnes() < 0) { - ALPAKA_ASSERT_OFFLOAD(view.offStorage); - ALPAKA_ASSERT_OFFLOAD(view.offSize > 0); + ALPAKA_ASSERT_ACC(view.offStorage); + ALPAKA_ASSERT_ACC(view.offSize > 0); off.init(view.offStorage, view.offSize); } } @@ -80,15 +80,15 @@ namespace cms::alpakatools { template ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, I b) { - ALPAKA_ASSERT_OFFLOAD(b < static_cast(nOnes())); + ALPAKA_ASSERT_ACC(b < static_cast(nOnes())); atomicIncrement(acc, off[b]); } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, I b, index_type j) { - ALPAKA_ASSERT_OFFLOAD(b < static_cast(nOnes())); + ALPAKA_ASSERT_ACC(b < static_cast(nOnes())); auto w = atomicDecrement(acc, off[b]); - ALPAKA_ASSERT_OFFLOAD(w > 0); + ALPAKA_ASSERT_ACC(w > 0); content[w - 1] = j; } @@ -96,8 +96,8 @@ namespace cms::alpakatools { struct zeroAndInit { template ALPAKA_FN_ACC void operator()(const TAcc &acc, View view) const { - ALPAKA_ASSERT_OFFLOAD((1 == alpaka::getWorkDiv(acc)[0])); - ALPAKA_ASSERT_OFFLOAD((0 == alpaka::getIdx(acc)[0])); + ALPAKA_ASSERT_ACC((1 == alpaka::getWorkDiv(acc)[0])); + ALPAKA_ASSERT_ACC((0 == alpaka::getIdx(acc)[0])); auto h = view.assoc; if (cms::alpakatools::once_per_block(acc)) { h->psws = 0; @@ -119,12 +119,12 @@ namespace cms::alpakatools { template ALPAKA_FN_INLINE static void launchZero(View view, TQueue &queue) { if constexpr (ctCapacity() < 0) { - ALPAKA_ASSERT_OFFLOAD(view.contentStorage); - ALPAKA_ASSERT_OFFLOAD(view.contentSize > 0); + ALPAKA_ASSERT_ACC(view.contentStorage); + ALPAKA_ASSERT_ACC(view.contentSize > 0); } if constexpr (ctNOnes() < 0) { - ALPAKA_ASSERT_OFFLOAD(view.offStorage); - ALPAKA_ASSERT_OFFLOAD(view.offSize > 0); + ALPAKA_ASSERT_ACC(view.offStorage); + ALPAKA_ASSERT_ACC(view.offSize > 0); } if constexpr (!requires_single_thread_per_block_v) { auto nthreads = 1024; @@ -133,7 +133,7 @@ namespace cms::alpakatools { alpaka::exec(queue, workDiv, zeroAndInit{}, view); } else { auto h = view.assoc; - ALPAKA_ASSERT_OFFLOAD(h); + ALPAKA_ASSERT_ACC(h); h->initStorage(view); h->zero(); h->psws = 0; @@ -213,9 +213,9 @@ namespace cms::alpakatools { template ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void finalize(TAcc &acc, Counter *ws = nullptr) { - ALPAKA_ASSERT_OFFLOAD(this->off[this->totOnes() - 1] == 0); + ALPAKA_ASSERT_ACC(this->off[this->totOnes() - 1] == 0); blockPrefixScan(acc, this->off.data(), this->totOnes(), ws); - ALPAKA_ASSERT_OFFLOAD(this->off[this->totOnes() - 1] == this->off[this->totOnes() - 2]); + ALPAKA_ASSERT_ACC(this->off[this->totOnes() - 1] == this->off[this->totOnes() - 2]); } ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void finalize() { @@ -234,17 +234,17 @@ namespace cms::alpakatools { ALPAKA_FN_INLINE static void launchFinalize(View view, TQueue &queue) { // View stores a base pointer, we need to upcast back... auto h = static_cast(view.assoc); - ALPAKA_ASSERT_OFFLOAD(h); + ALPAKA_ASSERT_ACC(h); if constexpr (!requires_single_thread_per_block_v) { Counter *poff = (Counter *)((char *)(h) + offsetof(OneToManyAssocRandomAccess, off)); auto nOnes = OneToManyAssocRandomAccess::ctNOnes(); if constexpr (OneToManyAssocRandomAccess::ctNOnes() < 0) { - ALPAKA_ASSERT_OFFLOAD(view.offStorage); - ALPAKA_ASSERT_OFFLOAD(view.offSize > 0); + ALPAKA_ASSERT_ACC(view.offStorage); + ALPAKA_ASSERT_ACC(view.offSize > 0); nOnes = view.offSize; poff = view.offStorage; } - ALPAKA_ASSERT_OFFLOAD(nOnes > 0); + ALPAKA_ASSERT_ACC(nOnes > 0); int32_t *ppsws = (int32_t *)((char *)(h) + offsetof(OneToManyAssocRandomAccess, psws)); auto nthreads = 1024; auto nblocks = (nOnes + nthreads - 1) / nthreads; diff --git a/HeterogeneousCore/AlpakaInterface/interface/config.h b/HeterogeneousCore/AlpakaInterface/interface/config.h index 55bc9c873c775..be37c779110cc 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/config.h +++ b/HeterogeneousCore/AlpakaInterface/interface/config.h @@ -35,7 +35,7 @@ namespace alpaka_common { // host types using DevHost = alpaka::DevCpu; - using PltfHost = alpaka::Pltf; + using PlatformHost = alpaka::Platform; } // namespace alpaka_common @@ -43,7 +43,7 @@ namespace alpaka_common { namespace alpaka_cuda_async { using namespace alpaka_common; - using Platform = alpaka::PltfCudaRt; + using Platform = alpaka::PlatformCudaRt; using Device = alpaka::DevCudaRt; using Queue = alpaka::QueueCudaRtNonBlocking; using Event = alpaka::EventCudaRt; @@ -88,7 +88,7 @@ namespace alpaka { namespace alpaka_rocm_async { using namespace alpaka_common; - using Platform = alpaka::PltfHipRt; + using Platform = alpaka::PlatformHipRt; using Device = alpaka::DevHipRt; using Queue = alpaka::QueueHipRtNonBlocking; using Event = alpaka::EventHipRt; @@ -133,7 +133,7 @@ namespace alpaka { namespace alpaka_serial_sync { using namespace alpaka_common; - using Platform = alpaka::PltfCpu; + using Platform = alpaka::PlatformCpu; using Device = alpaka::DevCpu; using Queue = alpaka::QueueCpuBlocking; using Event = alpaka::EventCpu; @@ -159,7 +159,7 @@ namespace alpaka_serial_sync { namespace alpaka_tbb_async { using namespace alpaka_common; - using Platform = alpaka::PltfCpu; + using Platform = alpaka::PlatformCpu; using Device = alpaka::DevCpu; using Queue = alpaka::QueueCpuNonBlocking; using Event = alpaka::EventCpu; diff --git a/HeterogeneousCore/AlpakaInterface/interface/devices.h b/HeterogeneousCore/AlpakaInterface/interface/devices.h index cfe907a76ac7c..41f572d6ab091 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/devices.h +++ b/HeterogeneousCore/AlpakaInterface/interface/devices.h @@ -1,7 +1,6 @@ #ifndef HeterogeneousCore_AlpakaInterface_interface_devices_h #define HeterogeneousCore_AlpakaInterface_interface_devices_h -#include #include #include @@ -10,30 +9,19 @@ namespace cms::alpakatools { - namespace detail { - - template >> - inline std::vector> enumerate_devices() { - using Platform = TPlatform; - using Device = alpaka::Dev; - - std::vector devices; - uint32_t n = alpaka::getDevCount(); - devices.reserve(n); - for (uint32_t i = 0; i < n; ++i) { - devices.push_back(alpaka::getDevByIdx(i)); - assert(alpaka::getNativeHandle(devices.back()) == static_cast(i)); - } - - return devices; - } - - } // namespace detail + // returns the alpaka accelerator platform + template >> + inline TPlatform const& platform() { + // initialise the platform the first time that this function is called + static const auto platform = TPlatform{}; + return platform; + } // return the alpaka accelerator devices for the given platform template >> inline std::vector> const& devices() { - static const auto devices = detail::enumerate_devices(); + // enumerate all devices the first time that this function is called + static const auto devices = alpaka::getDevs(platform()); return devices; } diff --git a/HeterogeneousCore/AlpakaInterface/interface/getDeviceCachingAllocator.h b/HeterogeneousCore/AlpakaInterface/interface/getDeviceCachingAllocator.h index 06b4453a68502..fe1e311f46e50 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/getDeviceCachingAllocator.h +++ b/HeterogeneousCore/AlpakaInterface/interface/getDeviceCachingAllocator.h @@ -21,7 +21,7 @@ namespace cms::alpakatools { typename = std::enable_if_t and alpaka::isQueue>> auto allocate_device_allocators() { using Allocator = CachingAllocator; - auto const& devices = cms::alpakatools::devices>(); + auto const& devices = cms::alpakatools::devices>(); ssize_t const size = devices.size(); // allocate the storage for the objects @@ -80,7 +80,7 @@ namespace cms::alpakatools { CMS_THREAD_SAFE static auto allocators = detail::allocate_device_allocators(); size_t const index = alpaka::getNativeHandle(device); - assert(index < cms::alpakatools::devices>().size()); + assert(index < cms::alpakatools::devices>().size()); // the public interface is thread safe return allocators[index]; diff --git a/HeterogeneousCore/AlpakaInterface/interface/host.h b/HeterogeneousCore/AlpakaInterface/interface/host.h index 1b2bb70a105c8..a8064fe578469 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/host.h +++ b/HeterogeneousCore/AlpakaInterface/interface/host.h @@ -1,32 +1,17 @@ #ifndef HeterogeneousCore_AlpakaInterface_interface_host_h #define HeterogeneousCore_AlpakaInterface_interface_host_h -#include - #include -namespace cms::alpakatools { - - namespace detail { +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" - inline alpaka::DevCpu enumerate_host() { - using Platform = alpaka::PltfCpu; - using Host = alpaka::DevCpu; - - assert(alpaka::getDevCount() == 1); - Host host = alpaka::getDevByIdx(0); - assert(alpaka::getNativeHandle(host) == 0); - - return host; - } +namespace cms::alpakatools { - } // namespace detail + // returns the alpaka host platform + inline alpaka::PlatformCpu const& host_platform() { return platform(); } // returns the alpaka host device - inline alpaka::DevCpu const& host() { - static const auto host = detail::enumerate_host(); - return host; - } + inline alpaka::DevCpu const& host() { return devices()[0]; } } // namespace cms::alpakatools diff --git a/HeterogeneousCore/AlpakaInterface/interface/memory.h b/HeterogeneousCore/AlpakaInterface/interface/memory.h index 757f3fa048b98..c0c28de206a11 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/memory.h +++ b/HeterogeneousCore/AlpakaInterface/interface/memory.h @@ -8,6 +8,7 @@ #include "HeterogeneousCore/AlpakaInterface/interface/AllocatorPolicy.h" #include "HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" #include "HeterogeneousCore/AlpakaInterface/interface/traits.h" namespace cms::alpakatools { @@ -82,19 +83,23 @@ namespace cms::alpakatools { template std::enable_if_t, host_buffer> make_host_buffer() { - return alpaka::allocMappedBuf(host(), Scalar{}); + using Platform = TPlatform; + return alpaka::allocMappedBuf(host(), platform(), Scalar{}); } template std::enable_if_t and not std::is_array_v>, host_buffer> make_host_buffer(Extent extent) { - return alpaka::allocMappedBuf, Idx>(host(), Vec1D{extent}); + using Platform = TPlatform; + return alpaka::allocMappedBuf, Idx>(host(), platform(), Vec1D{extent}); } template std::enable_if_t and not std::is_array_v>, host_buffer> make_host_buffer() { - return alpaka::allocMappedBuf, Idx>(host(), Vec1D{std::extent_v}); + using Platform = TPlatform; + return alpaka::allocMappedBuf, Idx>( + host(), platform(), Vec1D{std::extent_v}); } // potentially cached, pinned, scalar and 1-dimensional host buffers, associated to a work queue @@ -106,7 +111,8 @@ namespace cms::alpakatools { if constexpr (allocator_policy> == AllocatorPolicy::Caching) { return allocCachedBuf(host(), queue, Scalar{}); } else { - return alpaka::allocMappedBuf>, T, Idx>(host(), Scalar{}); + using Platform = alpaka::Platform>; + return alpaka::allocMappedBuf(host(), platform(), Scalar{}); } } @@ -118,8 +124,9 @@ namespace cms::alpakatools { if constexpr (allocator_policy> == AllocatorPolicy::Caching) { return allocCachedBuf, Idx>(host(), queue, Vec1D{extent}); } else { - return alpaka::allocMappedBuf>, std::remove_extent_t, Idx>(host(), - Vec1D{extent}); + using Platform = alpaka::Platform>; + return alpaka::allocMappedBuf, Idx>( + host(), platform(), Vec1D{extent}); } } @@ -131,8 +138,9 @@ namespace cms::alpakatools { if constexpr (allocator_policy> == AllocatorPolicy::Caching) { return allocCachedBuf, Idx>(host(), queue, Vec1D{std::extent_v}); } else { - return alpaka::allocMappedBuf>, std::remove_extent_t, Idx>( - host(), Vec1D{std::extent_v}); + using Platform = alpaka::Platform>; + return alpaka::allocMappedBuf, Idx>( + host(), platform(), Vec1D{std::extent_v}); } } diff --git a/HeterogeneousCore/AlpakaInterface/interface/prefixScan.h b/HeterogeneousCore/AlpakaInterface/interface/prefixScan.h index 5af78500f3ca3..afaddcc7f5473 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/prefixScan.h +++ b/HeterogeneousCore/AlpakaInterface/interface/prefixScan.h @@ -50,11 +50,11 @@ namespace cms::alpakatools { const auto warpSize = alpaka::warp::getSize(acc); int32_t const blockDimension(alpaka::getWorkDiv(acc)[0u]); int32_t const blockThreadIdx(alpaka::getIdx(acc)[0u]); - ALPAKA_ASSERT_OFFLOAD(ws); - ALPAKA_ASSERT_OFFLOAD(size <= warpSize * warpSize); - ALPAKA_ASSERT_OFFLOAD(0 == blockDimension % warpSize); + ALPAKA_ASSERT_ACC(ws); + ALPAKA_ASSERT_ACC(size <= warpSize * warpSize); + ALPAKA_ASSERT_ACC(0 == blockDimension % warpSize); auto first = blockThreadIdx; - ALPAKA_ASSERT_OFFLOAD(isPowerOf2(warpSize)); + ALPAKA_ASSERT_ACC(isPowerOf2(warpSize)); auto laneId = blockThreadIdx & (warpSize - 1); auto warpUpRoundedSize = (size + warpSize - 1) / warpSize * warpSize; @@ -64,7 +64,7 @@ namespace cms::alpakatools { if (i < size) { // Skipped in warp padding threads. auto warpId = i / warpSize; - ALPAKA_ASSERT_OFFLOAD(warpId < warpSize); + ALPAKA_ASSERT_ACC(warpId < warpSize); if ((warpSize - 1) == laneId) ws[warpId] = co[i]; } @@ -97,9 +97,9 @@ namespace cms::alpakatools { const auto warpSize = alpaka::warp::getSize(acc); int32_t const blockDimension(alpaka::getWorkDiv(acc)[0u]); int32_t const blockThreadIdx(alpaka::getIdx(acc)[0u]); - ALPAKA_ASSERT_OFFLOAD(ws); - ALPAKA_ASSERT_OFFLOAD(size <= warpSize * warpSize); - ALPAKA_ASSERT_OFFLOAD(0 == blockDimension % warpSize); + ALPAKA_ASSERT_ACC(ws); + ALPAKA_ASSERT_ACC(size <= warpSize * warpSize); + ALPAKA_ASSERT_ACC(0 == blockDimension % warpSize); auto first = blockThreadIdx; auto laneId = blockThreadIdx & (warpSize - 1); auto warpUpRoundedSize = (size + warpSize - 1) / warpSize * warpSize; @@ -110,7 +110,7 @@ namespace cms::alpakatools { if (i < size) { // Skipped in warp padding threads. auto warpId = i / warpSize; - ALPAKA_ASSERT_OFFLOAD(warpId < warpSize); + ALPAKA_ASSERT_ACC(warpId < warpSize); if ((warpSize - 1) == laneId) ws[warpId] = c[i]; } @@ -144,14 +144,14 @@ namespace cms::alpakatools { if constexpr (!requires_single_thread_per_block_v) { ws = alpaka::getDynSharedMem(acc); } - ALPAKA_ASSERT_OFFLOAD(warpSize == static_cast(alpaka::warp::getSize(acc))); + ALPAKA_ASSERT_ACC(warpSize == static_cast(alpaka::warp::getSize(acc))); [[maybe_unused]] const auto elementsPerGrid = alpaka::getWorkDiv(acc)[0u]; const auto elementsPerBlock = alpaka::getWorkDiv(acc)[0u]; const auto threadsPerBlock = alpaka::getWorkDiv(acc)[0u]; const auto blocksPerGrid = alpaka::getWorkDiv(acc)[0u]; const auto blockIdx = alpaka::getIdx(acc)[0u]; const auto threadIdx = alpaka::getIdx(acc)[0u]; - ALPAKA_ASSERT_OFFLOAD(elementsPerGrid >= size); + ALPAKA_ASSERT_ACC(elementsPerGrid >= size); // first each block does a scan [[maybe_unused]] int off = elementsPerBlock * blockIdx; if (size - off > 0) { @@ -172,7 +172,7 @@ namespace cms::alpakatools { if (!isLastBlockDone) return; - ALPAKA_ASSERT_OFFLOAD(int(blocksPerGrid) == *pc); + ALPAKA_ASSERT_ACC(int(blocksPerGrid) == *pc); // good each block has done its work and now we are left in last block diff --git a/HeterogeneousCore/AlpakaInterface/interface/radixSort.h b/HeterogeneousCore/AlpakaInterface/interface/radixSort.h index 0f94ad200efd9..f9b26cf3d17ae 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/radixSort.h +++ b/HeterogeneousCore/AlpakaInterface/interface/radixSort.h @@ -132,9 +132,9 @@ namespace cms::alpakatools { auto& ibs = alpaka::declareSharedVar(acc); auto& currentSortingPass = alpaka::declareSharedVar(acc); - ALPAKA_ASSERT_OFFLOAD(size > 0); + ALPAKA_ASSERT_ACC(size > 0); // TODO: is this a hard requirement? - ALPAKA_ASSERT_OFFLOAD(blockDimension >= binsNumber); + ALPAKA_ASSERT_ACC(blockDimension >= binsNumber); currentSortingPass = initialSortingPass; @@ -283,7 +283,7 @@ namespace cms::alpakatools { */ alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(c[0] == 0); + ALPAKA_ASSERT_ACC(c[0] == 0); // swap (local, ok) auto t = j; @@ -297,8 +297,8 @@ namespace cms::alpakatools { } if ((dataBits != 8) && (0 == (NS & 1))) - ALPAKA_ASSERT_OFFLOAD(j == - ind); // dataBits/binBits is even so ind is correct (the result is in the right location) + ALPAKA_ASSERT_ACC(j == + ind); // dataBits/binBits is even so ind is correct (the result is in the right location) // TODO this copy is (doubly?) redundant with the reorder if (j != ind) // odd number of sorting passes, we need to move the result to the right array (ind[]) diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testHistoContainer.dev.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testHistoContainer.dev.cc index 57b80cc9cf275..026c4bc5866e2 100644 --- a/HeterogeneousCore/AlpakaInterface/test/alpaka/testHistoContainer.dev.cc +++ b/HeterogeneousCore/AlpakaInterface/test/alpaka/testHistoContainer.dev.cc @@ -38,12 +38,12 @@ void checkContents(Hist* h, #ifndef NDEBUG [[maybe_unused]] auto bk = h->bin(v[k]); #endif - ALPAKA_ASSERT_OFFLOAD(bk == i); - ALPAKA_ASSERT_OFFLOAD(k < offsets[j + 1]); + ALPAKA_ASSERT_ACC(bk == i); + ALPAKA_ASSERT_ACC(k < offsets[j + 1]); auto kl = h->bin(v[k] - window); auto kh = h->bin(v[k] + window); - ALPAKA_ASSERT_OFFLOAD(kl != i); - ALPAKA_ASSERT_OFFLOAD(kh != i); + ALPAKA_ASSERT_ACC(kl != i); + ALPAKA_ASSERT_ACC(kh != i); // std::cout << kl << ' ' << kh << std::endl; auto me = v[k]; @@ -81,7 +81,7 @@ void checkContents(Hist* h, std::cout << "what? " << j << ' ' << i << ' ' << int(me) << '/' << (int)T(me - window) << '/' << (int)T(me + window) << ": " << kl << '/' << kh << ' ' << khh << ' ' << tot << '/' << nm << std::endl; - ALPAKA_ASSERT_OFFLOAD(!l); + ALPAKA_ASSERT_ACC(!l); } } int status; @@ -133,7 +133,7 @@ int go(const DevHost& host, const Device& device, Queue& queue) { offsets[0] = 0; for (uint32_t j = 1; j < nParts + 1; ++j) { offsets[j] = offsets[j - 1] + partSize - 3 * j; - ALPAKA_ASSERT_OFFLOAD(offsets[j] <= N); + ALPAKA_ASSERT_ACC(offsets[j] <= N); } if (it == 1) { // special cases... @@ -210,14 +210,14 @@ int go(const DevHost& host, const Device& device, Queue& queue) { // std::cout << offsets[i] <<" - "<< h->size() << std::endl; // } - ALPAKA_ASSERT_OFFLOAD(0 == h->off[0]); - ALPAKA_ASSERT_OFFLOAD(offsets[10] == h->size()); - ALPAKA_ASSERT_OFFLOAD(0 == hr->off[0]); - ALPAKA_ASSERT_OFFLOAD(offsets[10] == hr->size()); + ALPAKA_ASSERT_ACC(0 == h->off[0]); + ALPAKA_ASSERT_ACC(offsets[10] == h->size()); + ALPAKA_ASSERT_ACC(0 == hr->off[0]); + ALPAKA_ASSERT_ACC(offsets[10] == hr->size()); auto verify = [&](uint32_t i, uint32_t k, uint32_t t1, uint32_t t2) { - ALPAKA_ASSERT_OFFLOAD(t1 < N); - ALPAKA_ASSERT_OFFLOAD(t2 < N); + ALPAKA_ASSERT_ACC(t1 < N); + ALPAKA_ASSERT_ACC(t2 < N); if (T(v[t1] - v[t2]) <= 0) std::cout << "for " << i << ':' << v[k] << " failed " << v[t1] << ' ' << v[t2] << std::endl; }; diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneHistoContainer.dev.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneHistoContainer.dev.cc index b032939f9870b..20639da606d0d 100644 --- a/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneHistoContainer.dev.cc +++ b/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneHistoContainer.dev.cc @@ -15,8 +15,8 @@ template struct mykernel { template ALPAKA_FN_ACC void operator()(const TAcc& acc, T const* __restrict__ v, uint32_t N) const { - ALPAKA_ASSERT_OFFLOAD(v); - ALPAKA_ASSERT_OFFLOAD(N == 12000); + ALPAKA_ASSERT_ACC(v); + ALPAKA_ASSERT_ACC(N == 12000); const uint32_t threadIdxLocal(alpaka::getIdx(acc)[0u]); if (threadIdxLocal == 0) { @@ -46,18 +46,18 @@ struct mykernel { } alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(0 == hist.size()); + ALPAKA_ASSERT_ACC(0 == hist.size()); alpaka::syncBlockThreads(acc); // finalize hist.finalize(acc, ws); alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(N == hist.size()); + ALPAKA_ASSERT_ACC(N == hist.size()); // verify for ([[maybe_unused]] auto j : uniform_elements(acc, Hist::nbins())) { - ALPAKA_ASSERT_OFFLOAD(hist.off[j] <= hist.off[j + 1]); + ALPAKA_ASSERT_ACC(hist.off[j] <= hist.off[j + 1]); } alpaka::syncBlockThreads(acc); @@ -72,17 +72,17 @@ struct mykernel { } alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(0 == hist.off[0]); - ALPAKA_ASSERT_OFFLOAD(N == hist.size()); + ALPAKA_ASSERT_ACC(0 == hist.off[0]); + ALPAKA_ASSERT_ACC(N == hist.size()); // bin #ifndef NDEBUG for (auto j : uniform_elements(acc, hist.size() - 1)) { auto p = hist.begin() + j; - ALPAKA_ASSERT_OFFLOAD((*p) < N); + ALPAKA_ASSERT_ACC((*p) < N); [[maybe_unused]] auto k1 = Hist::bin(v[*p]); [[maybe_unused]] auto k2 = Hist::bin(v[*(p + 1)]); - ALPAKA_ASSERT_OFFLOAD(k2 >= k1); + ALPAKA_ASSERT_ACC(k2 >= k1); } #endif @@ -95,13 +95,13 @@ struct mykernel { #endif [[maybe_unused]] int tot = 0; auto ftest = [&](unsigned int k) { - ALPAKA_ASSERT_OFFLOAD(k < N); + ALPAKA_ASSERT_ACC(k < N); ++tot; }; forEachInWindow(hist, v[j], v[j], ftest); #ifndef NDEBUG [[maybe_unused]] int rtot = hist.size(b0); - ALPAKA_ASSERT_OFFLOAD(tot == rtot); + ALPAKA_ASSERT_ACC(tot == rtot); #endif tot = 0; auto vm = int(v[j]) - DELTA; @@ -111,13 +111,13 @@ struct mykernel { vm = std::min(vm, vmax); vp = std::min(vp, vmax); vp = std::max(vp, 0); - ALPAKA_ASSERT_OFFLOAD(vp >= vm); + ALPAKA_ASSERT_ACC(vp >= vm); forEachInWindow(hist, vm, vp, ftest); #ifndef NDEBUG int bp = Hist::bin(vp); int bm = Hist::bin(vm); rtot = hist.end(bp) - hist.begin(bm); - ALPAKA_ASSERT_OFFLOAD(tot == rtot); + ALPAKA_ASSERT_ACC(tot == rtot); #endif } } diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneToManyAssoc.dev.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneToManyAssoc.dev.cc index 492911e6b1a57..d50b7830ad8f3 100644 --- a/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneToManyAssoc.dev.cc +++ b/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneToManyAssoc.dev.cc @@ -69,7 +69,7 @@ struct verifyMulti { template ALPAKA_FN_ACC void operator()(const TAcc& acc, Multiplicity* __restrict__ m1, Multiplicity* __restrict__ m2) const { for ([[maybe_unused]] auto i : uniform_elements(acc, Multiplicity{}.totOnes())) { - ALPAKA_ASSERT_OFFLOAD(m1->off[i] == m2->off[i]); + ALPAKA_ASSERT_ACC(m1->off[i] == m2->off[i]); } } }; @@ -83,7 +83,7 @@ struct count { for (auto i : uniform_elements(acc, 4 * n)) { auto k = i / 4; auto j = i - 4 * k; - ALPAKA_ASSERT_OFFLOAD(j < 4); + ALPAKA_ASSERT_ACC(j < 4); if (k >= n) { return; } @@ -103,7 +103,7 @@ struct fill { for (auto i : uniform_elements(acc, 4 * n)) { auto k = i / 4; auto j = i - 4 * k; - ALPAKA_ASSERT_OFFLOAD(j < 4); + ALPAKA_ASSERT_ACC(j < 4); if (k >= n) { return; } @@ -117,7 +117,7 @@ struct fill { struct verify { template ALPAKA_FN_ACC void operator()(const TAcc& acc, Assoc* __restrict__ assoc) const { - ALPAKA_ASSERT_OFFLOAD(assoc->size() < Assoc{}.capacity()); + ALPAKA_ASSERT_ACC(assoc->size() < Assoc{}.capacity()); } }; @@ -138,7 +138,7 @@ struct verifyBulk { if (::toSigned(apc->get().first) >= Assoc::ctNOnes()) { printf("Overflow %d %d\n", apc->get().first, Assoc::ctNOnes()); } - ALPAKA_ASSERT_OFFLOAD(toSigned(assoc->size()) < Assoc::ctCapacity()); + ALPAKA_ASSERT_ACC(toSigned(assoc->size()) < Assoc::ctCapacity()); } }; @@ -197,8 +197,8 @@ int main() { } ++z; } - ALPAKA_ASSERT_OFFLOAD(n <= MaxElem); - ALPAKA_ASSERT_OFFLOAD(j <= N); + ALPAKA_ASSERT_ACC(n <= MaxElem); + ALPAKA_ASSERT_ACC(j <= N); } std::cout << "filled with " << n << " elements " << double(ave) / n << ' ' << imax << ' ' << nz << std::endl; @@ -239,7 +239,7 @@ int main() { ave += x; imax = std::max(imax, int(x)); } - ALPAKA_ASSERT_OFFLOAD(0 == ara_h->size(n)); + ALPAKA_ASSERT_ACC(0 == ara_h->size(n)); std::cout << "found with " << n << " elements " << double(ave) / n << ' ' << imax << ' ' << z << std::endl; // now the inverse map (actually this is the direct....) @@ -289,11 +289,11 @@ int main() { if (!(x == 4 || x == 3)) { std::cout << "i=" << i << " x=" << x << std::endl; } - ALPAKA_ASSERT_OFFLOAD(x == 4 || x == 3); + ALPAKA_ASSERT_ACC(x == 4 || x == 3); ave += x; imax = std::max(imax, int(x)); } - ALPAKA_ASSERT_OFFLOAD(0 == as_h->size(N)); + ALPAKA_ASSERT_ACC(0 == as_h->size(N)); std::cout << "found with ave occupancy " << double(ave) / N << ' ' << imax << std::endl; // here verify use of block local counters diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testPrefixScan.dev.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testPrefixScan.dev.cc index 5e8f4ee3b8e9a..d96d5d5b9b403 100644 --- a/HeterogeneousCore/AlpakaInterface/test/alpaka/testPrefixScan.dev.cc +++ b/HeterogeneousCore/AlpakaInterface/test/alpaka/testPrefixScan.dev.cc @@ -43,8 +43,8 @@ struct testPrefixScan { blockPrefixScan(acc, c, co, size, ws); blockPrefixScan(acc, c, size, ws); - ALPAKA_ASSERT_OFFLOAD(1 == c[0]); - ALPAKA_ASSERT_OFFLOAD(1 == co[0]); + ALPAKA_ASSERT_ACC(1 == c[0]); + ALPAKA_ASSERT_ACC(1 == co[0]); // TODO: not needed? Not in multi kernel version, not in CUDA version alpaka::syncBlockThreads(acc); @@ -59,9 +59,9 @@ struct testPrefixScan { if (!((c[i] == c[i - 1] + 1) && (c[i] == i + 1) && (c[i] == co[i]))) printf("c[%d]=%f, co[%d]=%f\n", i, c[i], i, co[i]); } - ALPAKA_ASSERT_OFFLOAD(c[i] == c[i - 1] + 1); - ALPAKA_ASSERT_OFFLOAD(c[i] == i + 1); - ALPAKA_ASSERT_OFFLOAD(c[i] == co[i]); + ALPAKA_ASSERT_ACC(c[i] == c[i - 1] + 1); + ALPAKA_ASSERT_ACC(c[i] == i + 1); + ALPAKA_ASSERT_ACC(c[i] == co[i]); } } }; @@ -74,7 +74,7 @@ struct testWarpPrefixScan { template ALPAKA_FN_ACC void operator()(const TAcc& acc, uint32_t size) const { if constexpr (!requires_single_thread_per_block_v) { - ALPAKA_ASSERT_OFFLOAD(size <= 32); + ALPAKA_ASSERT_ACC(size <= 32); auto& c = alpaka::declareSharedVar(acc); auto& co = alpaka::declareSharedVar(acc); @@ -90,18 +90,18 @@ struct testWarpPrefixScan { alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(1 == c[0]); - ALPAKA_ASSERT_OFFLOAD(1 == co[0]); + ALPAKA_ASSERT_ACC(1 == c[0]); + ALPAKA_ASSERT_ACC(1 == co[0]); if (i != 0) { if (c[i] != c[i - 1] + 1) printf(format_traits::failed_msg, size, i, blockDimension, c[i], c[i - 1]); - ALPAKA_ASSERT_OFFLOAD(c[i] == c[i - 1] + 1); - ALPAKA_ASSERT_OFFLOAD(c[i] == static_cast(i + 1)); - ALPAKA_ASSERT_OFFLOAD(c[i] == co[i]); + ALPAKA_ASSERT_ACC(c[i] == c[i - 1] + 1); + ALPAKA_ASSERT_ACC(c[i] == static_cast(i + 1)); + ALPAKA_ASSERT_ACC(c[i] == co[i]); } } else { // We should never be called outsie of the GPU. - ALPAKA_ASSERT_OFFLOAD(false); + ALPAKA_ASSERT_ACC(false); } } }; @@ -122,7 +122,7 @@ struct verify { template ALPAKA_FN_ACC void operator()(const TAcc& acc, uint32_t const* v, uint32_t n) const { for (auto index : uniform_elements(acc, n)) { - ALPAKA_ASSERT_OFFLOAD(v[index] == index + 1); + ALPAKA_ASSERT_ACC(v[index] == index + 1); if (index == 0) printf("verify\n"); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h index d50995cf8d6e5..80a7f4301be42 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h @@ -33,8 +33,8 @@ namespace pixelClustering { constexpr int startBPIX2 = TrackerTraits::layerStart[1]; - ALPAKA_ASSERT_OFFLOAD(TrackerTraits::numberOfModules < maxNumModules); - ALPAKA_ASSERT_OFFLOAD(startBPIX2 < TrackerTraits::numberOfModules); + ALPAKA_ASSERT_ACC(TrackerTraits::numberOfModules < maxNumModules); + ALPAKA_ASSERT_ACC(startBPIX2 < TrackerTraits::numberOfModules); auto endModule = clus_view[0].moduleStart(); for (auto module : cms::alpakatools::independent_groups(acc, endModule)) { @@ -53,7 +53,7 @@ namespace pixelClustering { // reached the end of the module while skipping the invalid pixels, skip this module continue; } - ALPAKA_ASSERT_OFFLOAD(thisModuleId < TrackerTraits::numberOfModules); + ALPAKA_ASSERT_ACC(thisModuleId < TrackerTraits::numberOfModules); uint32_t nclus = clus_view[thisModuleId].clusInModule(); if (nclus == 0) @@ -87,7 +87,7 @@ namespace pixelClustering { printf("start cluster charge cut for module %d in block %d\n", thisModuleId, module); #endif - ALPAKA_ASSERT_OFFLOAD(nclus <= maxNumClustersPerModules); + ALPAKA_ASSERT_ACC(nclus <= maxNumClustersPerModules); for (auto i : cms::alpakatools::independent_group_elements(acc, nclus)) { charge[i] = 0; } @@ -136,7 +136,7 @@ namespace pixelClustering { alpaka::syncBlockThreads(acc); } } - ALPAKA_ASSERT_OFFLOAD(nclus >= newclusId[nclus - 1]); + ALPAKA_ASSERT_ACC(nclus >= newclusId[nclus - 1]); clus_view[thisModuleId].clusInModule() = newclusId[nclus - 1]; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h index b2fcca94e1d24..4a2d9e72e9366 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h @@ -118,7 +118,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { &clus_view[0].moduleStart(), static_cast(::pixelClustering::maxNumModules), alpaka::hierarchy::Blocks{}); - ALPAKA_ASSERT_OFFLOAD(loc < TrackerTraits::numberOfModules); + ALPAKA_ASSERT_ACC(loc < TrackerTraits::numberOfModules); #ifdef GPU_DEBUG printf("> New module (no. %d) found at digi %d \n", loc, i); #endif @@ -143,7 +143,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { for (uint32_t module : cms::alpakatools::independent_groups(acc, lastModule)) { auto firstPixel = clus_view[1 + module].moduleStart(); uint32_t thisModuleId = digi_view[firstPixel].moduleId(); - ALPAKA_ASSERT_OFFLOAD(thisModuleId < TrackerTraits::numberOfModules); + ALPAKA_ASSERT_ACC(thisModuleId < TrackerTraits::numberOfModules); #ifdef GPU_DEBUG if (thisModuleId % 100 == 1) @@ -182,8 +182,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { } alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD((lastPixel == numElements) or - ((lastPixel < numElements) and (digi_view[lastPixel].moduleId() != thisModuleId))); + ALPAKA_ASSERT_ACC((lastPixel == numElements) or + ((lastPixel < numElements) and (digi_view[lastPixel].moduleId() != thisModuleId))); // limit to maxPixInModule (FIXME if recurrent (and not limited to simulation with low threshold) one will need to implement something cleverer) if (cms::alpakatools::once_per_block(acc)) { if (lastPixel - firstPixel > TrackerTraits::maxPixInModule) { @@ -195,7 +195,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { } } alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(lastPixel - firstPixel <= TrackerTraits::maxPixInModule); + ALPAKA_ASSERT_ACC(lastPixel - firstPixel <= TrackerTraits::maxPixInModule); #ifdef GPU_DEBUG auto& totGood = alpaka::declareSharedVar(acc); @@ -254,7 +254,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { hist.finalize(acc, ws); alpaka::syncBlockThreads(acc); #ifdef GPU_DEBUG - ALPAKA_ASSERT_OFFLOAD(hist.size() == totGood); + ALPAKA_ASSERT_ACC(hist.size() == totGood); if (thisModuleId % 100 == 1) if (cms::alpakatools::once_per_block(acc)) printf("histo size %d\n", hist.size()); @@ -299,11 +299,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { // with blockDimension = threadPerBlock * elementsPerThread. // Hence, maxIter can be tuned accordingly to the workdiv. constexpr unsigned int maxIterGPU = 16; - ALPAKA_ASSERT_OFFLOAD((hist.size() / blockDimension) < maxIterGPU); + ALPAKA_ASSERT_ACC((hist.size() / blockDimension) < maxIterGPU); // NB: can be tuned. constexpr uint32_t maxElements = cms::alpakatools::requires_single_thread_per_block_v ? 256 : 1; - ALPAKA_ASSERT_OFFLOAD((alpaka::getWorkDiv(acc)[0u] <= maxElements)); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0u] <= maxElements)); constexpr unsigned int maxIter = maxIterGPU * maxElements; @@ -321,23 +321,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { // fill the nearest neighbours uint32_t k = 0; for (uint32_t j : cms::alpakatools::independent_group_elements(acc, hist.size())) { - ALPAKA_ASSERT_OFFLOAD(k < maxIter); + ALPAKA_ASSERT_ACC(k < maxIter); auto p = hist.begin() + j; auto i = *p + firstPixel; - ALPAKA_ASSERT_OFFLOAD(digi_view[i].moduleId() != ::pixelClustering::invalidModuleId); - ALPAKA_ASSERT_OFFLOAD(digi_view[i].moduleId() == thisModuleId); // same module + ALPAKA_ASSERT_ACC(digi_view[i].moduleId() != ::pixelClustering::invalidModuleId); + ALPAKA_ASSERT_ACC(digi_view[i].moduleId() == thisModuleId); // same module auto bin = Hist::bin(digi_view[i].yy() + 1); auto end = hist.end(bin); ++p; - ALPAKA_ASSERT_OFFLOAD(0 == nnn[k]); + ALPAKA_ASSERT_ACC(0 == nnn[k]); for (; p < end; ++p) { auto m = *p + firstPixel; - ALPAKA_ASSERT_OFFLOAD(m != i); - ALPAKA_ASSERT_OFFLOAD(int(digi_view[m].yy()) - int(digi_view[i].yy()) >= 0); - ALPAKA_ASSERT_OFFLOAD(int(digi_view[m].yy()) - int(digi_view[i].yy()) <= 1); + ALPAKA_ASSERT_ACC(m != i); + ALPAKA_ASSERT_ACC(int(digi_view[m].yy()) - int(digi_view[i].yy()) >= 0); + ALPAKA_ASSERT_ACC(int(digi_view[m].yy()) - int(digi_view[i].yy()) <= 1); if (std::abs(int(digi_view[m].xx()) - int(digi_view[i].xx())) <= 1) { auto l = nnn[k]++; - ALPAKA_ASSERT_OFFLOAD(l < maxNeighbours); + ALPAKA_ASSERT_ACC(l < maxNeighbours); nn[k][l] = *p; } } @@ -360,13 +360,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { more = false; uint32_t k = 0; for (uint32_t j : cms::alpakatools::independent_group_elements(acc, hist.size())) { - ALPAKA_ASSERT_OFFLOAD(k < maxIter); + ALPAKA_ASSERT_ACC(k < maxIter); auto p = hist.begin() + j; auto i = *p + firstPixel; for (int kk = 0; kk < nnn[k]; ++kk) { auto l = nn[k][kk]; auto m = l + firstPixel; - ALPAKA_ASSERT_OFFLOAD(m != i); + ALPAKA_ASSERT_ACC(m != i); // FIXME ::Threads ? auto old = alpaka::atomicMin(acc, &digi_view[m].clus(), digi_view[i].clus(), alpaka::hierarchy::Blocks{}); if (old != digi_view[i].clus()) { @@ -404,7 +404,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { if (cms::alpakatools::once_per_block(acc)) n0 = nloops; alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(alpaka::syncBlockThreadsPredicate(acc, nloops == n0)); + ALPAKA_ASSERT_ACC(alpaka::syncBlockThreadsPredicate(acc, nloops == n0)); if (thisModuleId % 100 == 1) if (cms::alpakatools::once_per_block(acc)) printf("# loops %d\n", nloops); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc index 6a28f0cd0504a..13b971753bd75 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc @@ -432,16 +432,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { struct FillHitsModuleStart { template ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const { - ALPAKA_ASSERT_OFFLOAD(TrackerTraits::numberOfModules < 2048); // easy to extend at least till 32*1024 + ALPAKA_ASSERT_ACC(TrackerTraits::numberOfModules < 2048); // easy to extend at least till 32*1024 constexpr int numberOfModules = TrackerTraits::numberOfModules; constexpr uint32_t maxHitsInModule = TrackerTraits::maxHitsInModule; #ifndef NDEBUG [[maybe_unused]] const uint32_t blockIdxLocal(alpaka::getIdx(acc)[0u]); - ALPAKA_ASSERT_OFFLOAD(0 == blockIdxLocal); + ALPAKA_ASSERT_ACC(0 == blockIdxLocal); [[maybe_unused]] const uint32_t gridDimension(alpaka::getWorkDiv(acc)[0u]); - ALPAKA_ASSERT_OFFLOAD(1 == gridDimension); + ALPAKA_ASSERT_ACC(1 == gridDimension); #endif // limit to maxHitsInModule; @@ -488,16 +488,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::syncBlockThreads(acc); } #ifdef GPU_DEBUG - ALPAKA_ASSERT_OFFLOAD(0 == clus_view[0].moduleStart()); + ALPAKA_ASSERT_ACC(0 == clus_view[0].moduleStart()); auto c0 = std::min(maxHitsInModule, clus_view[1].clusModuleStart()); - ALPAKA_ASSERT_OFFLOAD(c0 == clus_view[1].moduleStart()); - ALPAKA_ASSERT_OFFLOAD(clus_view[1024].moduleStart() >= clus_view[1023].moduleStart()); - ALPAKA_ASSERT_OFFLOAD(clus_view[1025].moduleStart() >= clus_view[1024].moduleStart()); - ALPAKA_ASSERT_OFFLOAD(clus_view[numberOfModules].moduleStart() >= clus_view[1025].moduleStart()); + ALPAKA_ASSERT_ACC(c0 == clus_view[1].moduleStart()); + ALPAKA_ASSERT_ACC(clus_view[1024].moduleStart() >= clus_view[1023].moduleStart()); + ALPAKA_ASSERT_ACC(clus_view[1025].moduleStart() >= clus_view[1024].moduleStart()); + ALPAKA_ASSERT_ACC(clus_view[numberOfModules].moduleStart() >= clus_view[1025].moduleStart()); for (uint32_t i : cms::alpakatools::independent_group_elements(acc, numberOfModules + 1)) { if (0 != i) - ALPAKA_ASSERT_OFFLOAD(clus_view[i].moduleStart() >= clus_view[i - 1].moduleStart()); + ALPAKA_ASSERT_ACC(clus_view[i].moduleStart() >= clus_view[i - 1].moduleStart()); // Check BPX2 (1), FP1 (4) constexpr auto bpix2 = TrackerTraits::layerStart[1]; constexpr auto fpix1 = TrackerTraits::layerStart[4]; diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h index d90f38c11c984..aacdeb79a2749 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h @@ -38,7 +38,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { uint32_t nonEmptyModules, SiPixelClustersSoAConstView clusters, TrackingRecHitSoAView hits) const { - ALPAKA_ASSERT_OFFLOAD(cpeParams); + ALPAKA_ASSERT_ACC(cpeParams); // outer loop: one block per module for (uint32_t module : cms::alpakatools::independent_groups(acc, nonEmptyModules)) { @@ -80,7 +80,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto k = clusters[1 + module].moduleStart(); while (digis[k].moduleId() == invalidModuleId) ++k; - ALPAKA_ASSERT_OFFLOAD(digis[k].moduleId() == me); + ALPAKA_ASSERT_ACC(digis[k].moduleId() == me); } if (me % 100 == 1) @@ -129,8 +129,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { if (cl < startClus || cl >= lastClus) continue; cl -= startClus; - ALPAKA_ASSERT_OFFLOAD(cl >= 0); - ALPAKA_ASSERT_OFFLOAD(cl < maxHitsInIter); + ALPAKA_ASSERT_ACC(cl >= 0); + ALPAKA_ASSERT_ACC(cl < maxHitsInIter); auto x = digis[i].xx(); auto y = digis[i].yy(); alpaka::atomicMin(acc, &clusParams.minRow[cl], (uint32_t)x, alpaka::hierarchy::Threads{}); @@ -152,8 +152,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { if (cl < startClus || cl >= lastClus) continue; cl -= startClus; - ALPAKA_ASSERT_OFFLOAD(cl >= 0); - ALPAKA_ASSERT_OFFLOAD(cl < maxHitsInIter); + ALPAKA_ASSERT_ACC(cl >= 0); + ALPAKA_ASSERT_ACC(cl < maxHitsInIter); auto x = digis[i].xx(); auto y = digis[i].yy(); auto ch = digis[i].adc(); diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc index ae6739cfb72df..aaf83bbc2e097 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc @@ -42,17 +42,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { constexpr uint32_t hitsInFit = N; constexpr auto invalidTkId = std::numeric_limits::max(); - ALPAKA_ASSERT_OFFLOAD(hitsInFit <= nHitsL); - ALPAKA_ASSERT_OFFLOAD(nHitsL <= nHitsH); - ALPAKA_ASSERT_OFFLOAD(phits); - ALPAKA_ASSERT_OFFLOAD(pfast_fit); - ALPAKA_ASSERT_OFFLOAD(foundNtuplets); - ALPAKA_ASSERT_OFFLOAD(tupleMultiplicity); + ALPAKA_ASSERT_ACC(hitsInFit <= nHitsL); + ALPAKA_ASSERT_ACC(nHitsL <= nHitsH); + ALPAKA_ASSERT_ACC(phits); + ALPAKA_ASSERT_ACC(pfast_fit); + ALPAKA_ASSERT_ACC(foundNtuplets); + ALPAKA_ASSERT_ACC(tupleMultiplicity); // look in bin for this hit multiplicity int totTK = tupleMultiplicity->end(nHitsH) - tupleMultiplicity->begin(nHitsL); - ALPAKA_ASSERT_OFFLOAD(totTK <= int(tupleMultiplicity->size())); - ALPAKA_ASSERT_OFFLOAD(totTK >= 0); + ALPAKA_ASSERT_ACC(totTK <= int(tupleMultiplicity->size())); + ALPAKA_ASSERT_ACC(totTK >= 0); #ifdef BROKENLINE_DEBUG const uint32_t threadIdx(alpaka::getIdx(acc)[0u]); @@ -70,14 +70,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } // get it from the ntuple container (one to one to helix) auto tkid = *(tupleMultiplicity->begin(nHitsL) + tuple_idx); - ALPAKA_ASSERT_OFFLOAD(static_cast(tkid) < foundNtuplets->nOnes()); + ALPAKA_ASSERT_ACC(static_cast(tkid) < foundNtuplets->nOnes()); ptkids[local_idx] = tkid; auto nHits = foundNtuplets->size(tkid); - ALPAKA_ASSERT_OFFLOAD(nHits >= nHitsL); - ALPAKA_ASSERT_OFFLOAD(nHits <= nHitsH); + ALPAKA_ASSERT_ACC(nHits >= nHitsL); + ALPAKA_ASSERT_ACC(nHits <= nHitsH); riemannFit::Map3xNd hits(phits + local_idx); riemannFit::Map4d fast_fit(pfast_fit + local_idx); @@ -109,7 +109,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { int j = int(n + 0.5f); // round if (hitsInFit - 1 == i) j = nHits - 1; // force last hit to ensure max lever arm. - ALPAKA_ASSERT_OFFLOAD(j < int(nHits)); + ALPAKA_ASSERT_ACC(j < int(nHits)); n += incr; auto hit = hitId[j]; float ge[6]; @@ -118,7 +118,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto const &dp = cpeParams->detParams(hh.detectorIndex(hit)); auto status = hh[hit].chargeAndStatus().status; int qbin = CPEFastParametrisation::kGenErrorQBins - 1 - status.qBin; - ALPAKA_ASSERT_OFFLOAD(qbin >= 0 && qbin < 5); + ALPAKA_ASSERT_ACC(qbin >= 0 && qbin < 5); bool nok = (status.isBigY | status.isOneY); // compute cotanbeta and use it to recompute error dp.frame.rotation().multiply(dx, dy, dz, ux, uy, uz); @@ -161,10 +161,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { brokenline::fastFit(acc, hits, fast_fit); // no NaN here.... - ALPAKA_ASSERT_OFFLOAD(fast_fit(0) == fast_fit(0)); - ALPAKA_ASSERT_OFFLOAD(fast_fit(1) == fast_fit(1)); - ALPAKA_ASSERT_OFFLOAD(fast_fit(2) == fast_fit(2)); - ALPAKA_ASSERT_OFFLOAD(fast_fit(3) == fast_fit(3)); + ALPAKA_ASSERT_ACC(fast_fit(0) == fast_fit(0)); + ALPAKA_ASSERT_ACC(fast_fit(1) == fast_fit(1)); + ALPAKA_ASSERT_ACC(fast_fit(2) == fast_fit(2)); + ALPAKA_ASSERT_ACC(fast_fit(3) == fast_fit(3)); } } }; @@ -181,10 +181,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { double *__restrict__ phits, float *__restrict__ phits_ge, double *__restrict__ pfast_fit) const { - ALPAKA_ASSERT_OFFLOAD(results_view.pt()); - ALPAKA_ASSERT_OFFLOAD(results_view.eta()); - ALPAKA_ASSERT_OFFLOAD(results_view.chi2()); - ALPAKA_ASSERT_OFFLOAD(pfast_fit); + ALPAKA_ASSERT_ACC(results_view.pt()); + ALPAKA_ASSERT_ACC(results_view.eta()); + ALPAKA_ASSERT_ACC(results_view.chi2()); + ALPAKA_ASSERT_ACC(pfast_fit); constexpr auto invalidTkId = std::numeric_limits::max(); // same as above... @@ -195,7 +195,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { break; auto tkid = ptkids[local_idx]; - ALPAKA_ASSERT_OFFLOAD(tkid < TrackerTraits::maxNumberOfTuples); + ALPAKA_ASSERT_ACC(tkid < TrackerTraits::maxNumberOfTuples); riemannFit::Map3xNd hits(phits + local_idx); riemannFit::Map4d fast_fit(pfast_fit + local_idx); @@ -247,7 +247,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { uint32_t hitsInFit, uint32_t maxNumberOfTuples, Queue &queue) { - ALPAKA_ASSERT_OFFLOAD(tuples_); + ALPAKA_ASSERT_ACC(tuples_); uint32_t blockSize = 64; uint32_t numberOfBlocks = cms::alpakatools::divide_up_by(maxNumberOfConcurrentFits_, blockSize); diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h b/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h index 4c83eef84fdfe..d8af548109d29 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h @@ -301,11 +301,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { if constexpr (DEPTH <= 0) { printf("ERROR: CACellT::find_ntuplets reached full depth!\n"); - ALPAKA_ASSERT_OFFLOAD(false); + ALPAKA_ASSERT_ACC(false); } else { auto doubletId = this - cells; tmpNtuplet.push_back_unsafe(doubletId); - ALPAKA_ASSERT_OFFLOAD(tmpNtuplet.size() <= int(TrackerTraits::maxHitsOnTrack - 3)); + ALPAKA_ASSERT_ACC(tmpNtuplet.size() <= int(TrackerTraits::maxHitsOnTrack - 3)); bool last = true; for (unsigned int otherCell : outerNeighbors()) { diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc index 56bae962fbe06..0cc24f81254aa 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc @@ -280,7 +280,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { #endif // in principle we can use "nhits" to heuristically dimension the workspace... - ALPAKA_ASSERT_OFFLOAD(this->device_isOuterHitOfCell_.data()); + ALPAKA_ASSERT_ACC(this->device_isOuterHitOfCell_.data()); alpaka::exec( queue, diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h index 7b296324ba3eb..e7ff7a2c5a01a 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h @@ -108,17 +108,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { apc->get().second, nHits); if (apc->get().first < TrackerTraits::maxNumberOfQuadruplets) { - ALPAKA_ASSERT_OFFLOAD(tracks_view.hitIndices().size(apc->get().first) == 0); - ALPAKA_ASSERT_OFFLOAD(tracks_view.hitIndices().size() == apc->get().second); + ALPAKA_ASSERT_ACC(tracks_view.hitIndices().size(apc->get().first) == 0); + ALPAKA_ASSERT_ACC(tracks_view.hitIndices().size() == apc->get().second); } } for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) { if (tracks_view.hitIndices().size(idx) > TrackerTraits::maxHitsOnTrack) // current real limit printf("ERROR %d, %d\n", idx, tracks_view.hitIndices().size(idx)); - ALPAKA_ASSERT_OFFLOAD(ftracks_view.hitIndices().size(idx) <= TrackerTraits::maxHitsOnTrack); + ALPAKA_ASSERT_ACC(ftracks_view.hitIndices().size(idx) <= TrackerTraits::maxHitsOnTrack); for (auto ih = tracks_view.hitIndices().begin(idx); ih != tracks_view.hitIndices().end(idx); ++ih) - ALPAKA_ASSERT_OFFLOAD(int(*ih) < nHits); + ALPAKA_ASSERT_ACC(int(*ih) < nHits); } #endif @@ -198,7 +198,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { bool dupPassThrough) const { // quality to mark rejected constexpr auto reject = Quality::edup; /// cannot be loose - ALPAKA_ASSERT_OFFLOAD(nCells); + ALPAKA_ASSERT_ACC(nCells); for (auto idx : cms::alpakatools::uniform_elements(acc, *nCells)) { auto const &thisCell = cells[idx]; @@ -239,7 +239,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { auto const reject = dupPassThrough ? Quality::loose : Quality::dup; constexpr auto loose = Quality::loose; - ALPAKA_ASSERT_OFFLOAD(nCells); + ALPAKA_ASSERT_ACC(nCells); const auto ntNCells = (*nCells); for (auto idx : cms::alpakatools::uniform_elements(acc, ntNCells)) { @@ -431,7 +431,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { stack, params.minHitsPerNtuplet_, bpix1Start); - ALPAKA_ASSERT_OFFLOAD(stack.empty()); + ALPAKA_ASSERT_ACC(stack.empty()); } } } @@ -466,10 +466,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { continue; if (tracks_view[it].quality() == Quality::edup) continue; - ALPAKA_ASSERT_OFFLOAD(tracks_view[it].quality() == Quality::bad); + ALPAKA_ASSERT_ACC(tracks_view[it].quality() == Quality::bad); if (nhits > TrackerTraits::maxHitsOnTrack) // current limit printf("wrong mult %d %d\n", it, nhits); - ALPAKA_ASSERT_OFFLOAD(nhits <= TrackerTraits::maxHitsOnTrack); + ALPAKA_ASSERT_ACC(nhits <= TrackerTraits::maxHitsOnTrack); tupleMultiplicity->count(acc, nhits); } } @@ -488,10 +488,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { continue; if (tracks_view[it].quality() == Quality::edup) continue; - ALPAKA_ASSERT_OFFLOAD(tracks_view[it].quality() == Quality::bad); + ALPAKA_ASSERT_ACC(tracks_view[it].quality() == Quality::bad); if (nhits > TrackerTraits::maxHitsOnTrack) printf("wrong mult %d %d\n", it, nhits); - ALPAKA_ASSERT_OFFLOAD(nhits <= TrackerTraits::maxHitsOnTrack); + ALPAKA_ASSERT_ACC(nhits <= TrackerTraits::maxHitsOnTrack); tupleMultiplicity->fill(acc, nhits, it); } } @@ -513,7 +513,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { if (tracks_view[it].quality() == Quality::edup) continue; - ALPAKA_ASSERT_OFFLOAD(tracks_view[it].quality() == Quality::bad); + ALPAKA_ASSERT_ACC(tracks_view[it].quality() == Quality::bad); // mark doublets as bad if (nhits < 3) @@ -607,7 +607,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { } // fill hit indices for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().size())) { - ALPAKA_ASSERT_OFFLOAD(tracks_view.hitIndices().content[idx] < (uint32_t)hh.metadata().size()); + ALPAKA_ASSERT_ACC(tracks_view.hitIndices().content[idx] < (uint32_t)hh.metadata().size()); tracks_view.detIndices().content[idx] = hh[tracks_view.hitIndices().content[idx]].detectorIndex(); } } @@ -626,7 +626,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { if (cms::alpakatools::once_per_grid(acc)) tracks_view.nTracks() = ntracks; for (auto idx : cms::alpakatools::uniform_elements(acc, ntracks)) { - ALPAKA_ASSERT_OFFLOAD(TracksUtilities::nHits(tracks_view, idx) >= 3); + ALPAKA_ASSERT_ACC(TracksUtilities::nHits(tracks_view, idx) >= 3); tracks_view[idx].nLayers() = TracksUtilities::computeNumberOfLayers(tracks_view, idx); } } diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h index 580198772034d..e3116eb5b7f43 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h @@ -27,7 +27,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { CellNeighbors* cellNeighborsContainer, CellTracksVector* cellTracks, CellTracks* cellTracksContainer) const { - ALPAKA_ASSERT_OFFLOAD((*isOuterHitOfCell).container); + ALPAKA_ASSERT_ACC((*isOuterHitOfCell).container); for (auto i : cms::alpakatools::uniform_elements(acc, nHits - isOuterHitOfCell->offset)) (*isOuterHitOfCell).container[i].reset(); @@ -36,10 +36,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { cellNeighbors->construct(TrackerTraits::maxNumOfActiveDoublets, cellNeighborsContainer); cellTracks->construct(TrackerTraits::maxNumOfActiveDoublets, cellTracksContainer); [[maybe_unused]] auto i = cellNeighbors->extend(acc); - ALPAKA_ASSERT_OFFLOAD(0 == i); + ALPAKA_ASSERT_ACC(0 == i); (*cellNeighbors)[0].reset(); i = cellTracks->extend(acc); - ALPAKA_ASSERT_OFFLOAD(0 == i); + ALPAKA_ASSERT_ACC(0 == i); (*cellTracks)[0].reset(); } } diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h index 048aaf2058d27..97d9acdd8739a 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h @@ -155,7 +155,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { auto const& __restrict__ phiBinner = hh.phiBinner(); uint32_t const* __restrict__ offsets = hh.hitsLayerStart().data(); - ALPAKA_ASSERT_OFFLOAD(offsets); + ALPAKA_ASSERT_ACC(offsets); auto layerSize = [=](uint8_t li) { return offsets[li + 1] - offsets[li]; }; @@ -189,20 +189,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { ; --pairLayerId; - ALPAKA_ASSERT_OFFLOAD(pairLayerId < nPairs); - ALPAKA_ASSERT_OFFLOAD(j < innerLayerCumulativeSize[pairLayerId]); - ALPAKA_ASSERT_OFFLOAD(0 == pairLayerId || j >= innerLayerCumulativeSize[pairLayerId - 1]); + ALPAKA_ASSERT_ACC(pairLayerId < nPairs); + ALPAKA_ASSERT_ACC(j < innerLayerCumulativeSize[pairLayerId]); + ALPAKA_ASSERT_ACC(0 == pairLayerId || j >= innerLayerCumulativeSize[pairLayerId - 1]); uint8_t inner = TrackerTraits::layerPairs[2 * pairLayerId]; uint8_t outer = TrackerTraits::layerPairs[2 * pairLayerId + 1]; - ALPAKA_ASSERT_OFFLOAD(outer > inner); + ALPAKA_ASSERT_ACC(outer > inner); auto hoff = PhiBinner::histOff(outer); auto i = (0 == pairLayerId) ? j : j - innerLayerCumulativeSize[pairLayerId - 1]; i += offsets[inner]; - ALPAKA_ASSERT_OFFLOAD(i >= offsets[inner]); - ALPAKA_ASSERT_OFFLOAD(i < offsets[inner + 1]); + ALPAKA_ASSERT_ACC(i >= offsets[inner]); + ALPAKA_ASSERT_ACC(i < offsets[inner + 1]); // found hit corresponding to our worker thread, now do the job if (hh[i].detectorIndex() > pixelClustering::maxNumModules) @@ -267,8 +267,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { for (uint32_t pIndex : cms::alpakatools::independent_group_elements_x(acc, maxpIndex)) { // FIXME implement alpaka::ldg and use it here? or is it const* __restrict__ enough? auto oi = p[pIndex]; - ALPAKA_ASSERT_OFFLOAD(oi >= offsets[outer]); - ALPAKA_ASSERT_OFFLOAD(oi < offsets[outer + 1]); + ALPAKA_ASSERT_ACC(oi >= offsets[outer]); + ALPAKA_ASSERT_ACC(oi < offsets[outer + 1]); auto mo = hh[oi].detectorIndex(); // invalid diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.cc b/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.cc index 078cbe8de45a4..d0fe19233b225 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.cc @@ -8,8 +8,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { tupleMultiplicity_ = tupleMultiplicity; outputSoa_ = helix_fit_results; - ALPAKA_ASSERT_OFFLOAD(tuples_); - ALPAKA_ASSERT_OFFLOAD(tupleMultiplicity_); + ALPAKA_ASSERT_ACC(tuples_); + ALPAKA_ASSERT_ACC(tupleMultiplicity_); } template diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc index 9ab7d1fdf1e78..a822bbd8a8252 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc @@ -40,11 +40,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { uint32_t offset) const { constexpr uint32_t hitsInFit = N; - ALPAKA_ASSERT_OFFLOAD(hitsInFit <= nHits); + ALPAKA_ASSERT_ACC(hitsInFit <= nHits); - ALPAKA_ASSERT_OFFLOAD(pfast_fit); - ALPAKA_ASSERT_OFFLOAD(foundNtuplets); - ALPAKA_ASSERT_OFFLOAD(tupleMultiplicity); + ALPAKA_ASSERT_ACC(pfast_fit); + ALPAKA_ASSERT_ACC(foundNtuplets); + ALPAKA_ASSERT_ACC(tupleMultiplicity); // look in bin for this hit multiplicity @@ -62,9 +62,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // get it from the ntuple container (one to one to helix) auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx); - ALPAKA_ASSERT_OFFLOAD(static_cast(tkid) < foundNtuplets->nOnes()); + ALPAKA_ASSERT_ACC(static_cast(tkid) < foundNtuplets->nOnes()); - ALPAKA_ASSERT_OFFLOAD(foundNtuplets->size(tkid) == nHits); + ALPAKA_ASSERT_ACC(foundNtuplets->size(tkid) == nHits); riemannFit::Map3xNd hits(phits + local_idx); riemannFit::Map4d fast_fit(pfast_fit + local_idx); @@ -83,10 +83,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { riemannFit::fastFit(acc, hits, fast_fit); // no NaN here.... - ALPAKA_ASSERT_OFFLOAD(fast_fit(0) == fast_fit(0)); - ALPAKA_ASSERT_OFFLOAD(fast_fit(1) == fast_fit(1)); - ALPAKA_ASSERT_OFFLOAD(fast_fit(2) == fast_fit(2)); - ALPAKA_ASSERT_OFFLOAD(fast_fit(3) == fast_fit(3)); + ALPAKA_ASSERT_ACC(fast_fit(0) == fast_fit(0)); + ALPAKA_ASSERT_ACC(fast_fit(1) == fast_fit(1)); + ALPAKA_ASSERT_ACC(fast_fit(2) == fast_fit(2)); + ALPAKA_ASSERT_ACC(fast_fit(3) == fast_fit(3)); } } }; @@ -104,8 +104,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { double *__restrict__ pfast_fit_input, riemannFit::CircleFit *circle_fit, uint32_t offset) const { - ALPAKA_ASSERT_OFFLOAD(circle_fit); - ALPAKA_ASSERT_OFFLOAD(N <= nHits); + ALPAKA_ASSERT_ACC(circle_fit); + ALPAKA_ASSERT_ACC(N <= nHits); // same as above... @@ -151,8 +151,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { double *__restrict__ pfast_fit_input, riemannFit::CircleFit *__restrict__ circle_fit, uint32_t offset) const { - ALPAKA_ASSERT_OFFLOAD(circle_fit); - ALPAKA_ASSERT_OFFLOAD(N <= nHits); + ALPAKA_ASSERT_ACC(circle_fit); + ALPAKA_ASSERT_ACC(N <= nHits); // same as above... diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h index cb772a7e653b4..122457a7d05d2 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h @@ -57,11 +57,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { int32_t* __restrict__ nn = data.ndof(); int32_t* __restrict__ iv = ws.iv(); - ALPAKA_ASSERT_OFFLOAD(zt); - ALPAKA_ASSERT_OFFLOAD(ezt2); - ALPAKA_ASSERT_OFFLOAD(izt); - ALPAKA_ASSERT_OFFLOAD(nn); - ALPAKA_ASSERT_OFFLOAD(iv); + ALPAKA_ASSERT_ACC(zt); + ALPAKA_ASSERT_ACC(ezt2); + ALPAKA_ASSERT_ACC(izt); + ALPAKA_ASSERT_ACC(nn); + ALPAKA_ASSERT_ACC(iv); using Hist = cms::alpakatools::HistoContainer; auto& hist = alpaka::declareSharedVar(acc); @@ -76,17 +76,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { if (cms::alpakatools::once_per_block(acc)) printf("booked hist with %d bins, size %d for %d tracks\n", hist.totbins(), hist.capacity(), nt); } - ALPAKA_ASSERT_OFFLOAD(static_cast(nt) <= hist.capacity()); + ALPAKA_ASSERT_ACC(static_cast(nt) <= hist.capacity()); // fill hist (bin shall be wider than "eps") for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { - ALPAKA_ASSERT_OFFLOAD(i < ::zVertex::MAXTRACKS); + ALPAKA_ASSERT_ACC(i < ::zVertex::MAXTRACKS); int iz = int(zt[i] * 10.); // valid if eps<=0.1 // iz = std::clamp(iz, INT8_MIN, INT8_MAX); // sorry c++17 only iz = std::min(std::max(iz, INT8_MIN), INT8_MAX); izt[i] = iz - INT8_MIN; - ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN >= 0); - ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN < 256); + ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0); + ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256); hist.count(acc, izt[i]); iv[i] = i; nn[i] = 0; @@ -97,7 +97,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { alpaka::syncBlockThreads(acc); hist.finalize(acc, hws); alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(hist.size() == nt); + ALPAKA_ASSERT_ACC(hist.size() == nt); for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { hist.fill(acc, izt[i], uint16_t(i)); } @@ -145,7 +145,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { // mini verification for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] != int(i)) - ALPAKA_ASSERT_OFFLOAD(iv[iv[i]] != int(i)); + ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i)); } alpaka::syncBlockThreads(acc); #endif @@ -163,7 +163,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { // mini verification for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] != int(i)) - ALPAKA_ASSERT_OFFLOAD(iv[iv[i]] != int(i)); + ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i)); } #endif @@ -187,8 +187,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { }; cms::alpakatools::forEachInBins(hist, izt[i], 1, loop); // should belong to the same cluster... - ALPAKA_ASSERT_OFFLOAD(iv[i] == iv[minJ]); - ALPAKA_ASSERT_OFFLOAD(nn[i] <= nn[iv[i]]); + ALPAKA_ASSERT_ACC(iv[i] == iv[minJ]); + ALPAKA_ASSERT_ACC(nn[i] <= nn[iv[i]]); } alpaka::syncBlockThreads(acc); #endif @@ -211,7 +211,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { } alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(foundClusters < ::zVertex::MAXVTX); + ALPAKA_ASSERT_ACC(foundClusters < ::zVertex::MAXVTX); // propagate the negative id to all the tracks in the cluster. for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h index 38cfb0bec2289..7090599dcfdb0 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h @@ -53,10 +53,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { int32_t* __restrict__ nn = data.ndof(); int32_t* __restrict__ iv = ws.iv(); - ALPAKA_ASSERT_OFFLOAD(zt); - ALPAKA_ASSERT_OFFLOAD(iv); - ALPAKA_ASSERT_OFFLOAD(nn); - ALPAKA_ASSERT_OFFLOAD(ezt2); + ALPAKA_ASSERT_ACC(zt); + ALPAKA_ASSERT_ACC(iv); + ALPAKA_ASSERT_ACC(nn); + ALPAKA_ASSERT_ACC(ezt2); using Hist = cms::alpakatools::HistoContainer; auto& hist = alpaka::declareSharedVar(acc); @@ -72,16 +72,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt); } - ALPAKA_ASSERT_OFFLOAD(static_cast(nt) <= hist.capacity()); + ALPAKA_ASSERT_ACC(static_cast(nt) <= hist.capacity()); // fill hist (bin shall be wider than "eps") for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { - ALPAKA_ASSERT_OFFLOAD(i < ::zVertex::MAXTRACKS); + ALPAKA_ASSERT_ACC(i < ::zVertex::MAXTRACKS); int iz = int(zt[i] * 10.); // valid if eps<=0.1 iz = std::clamp(iz, INT8_MIN, INT8_MAX); izt[i] = iz - INT8_MIN; - ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN >= 0); - ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN < 256); + ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0); + ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256); hist.count(acc, izt[i]); iv[i] = i; nn[i] = 0; @@ -92,7 +92,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { alpaka::syncBlockThreads(acc); hist.finalize(acc, hws); alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(hist.size() == nt); + ALPAKA_ASSERT_ACC(hist.size() == nt); for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { hist.fill(acc, izt[i], uint32_t(i)); } @@ -143,7 +143,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { // mini verification for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] != int(i)) - ALPAKA_ASSERT_OFFLOAD(iv[iv[i]] != int(i)); + ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i)); } alpaka::syncBlockThreads(acc); #endif @@ -162,7 +162,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { // mini verification for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] != int(i)) - ALPAKA_ASSERT_OFFLOAD(iv[iv[i]] != int(i)); + ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i)); } alpaka::syncBlockThreads(acc); #endif @@ -172,7 +172,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (nn[i] < minT) continue; // DBSCAN core rule - ALPAKA_ASSERT_OFFLOAD(zt[iv[i]] <= zt[i]); + ALPAKA_ASSERT_ACC(zt[iv[i]] <= zt[i]); auto loop = [&](uint32_t j) { if (nn[j] < minT) return; // DBSCAN core rule @@ -186,7 +186,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { printf(" %d %d %f %f %d\n", j, iv[j], zt[j], zt[iv[j]], iv[iv[j]]); ; } - ALPAKA_ASSERT_OFFLOAD(iv[i] == iv[j]); + ALPAKA_ASSERT_ACC(iv[i] == iv[j]); }; cms::alpakatools::forEachInBins(hist, izt[i], 1, loop); } @@ -231,7 +231,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { } alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(foundClusters < ::zVertex::MAXVTX); + ALPAKA_ASSERT_ACC(foundClusters < ::zVertex::MAXVTX); // propagate the negative id to all the tracks in the cluster. for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h index 100b4b6d42d84..38e8429c0d28f 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h @@ -52,10 +52,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { int32_t* __restrict__ nn = data.ndof(); int32_t* __restrict__ iv = ws.iv(); - ALPAKA_ASSERT_OFFLOAD(zt); - ALPAKA_ASSERT_OFFLOAD(nn); - ALPAKA_ASSERT_OFFLOAD(iv); - ALPAKA_ASSERT_OFFLOAD(ezt2); + ALPAKA_ASSERT_ACC(zt); + ALPAKA_ASSERT_ACC(nn); + ALPAKA_ASSERT_ACC(iv); + ALPAKA_ASSERT_ACC(ezt2); using Hist = cms::alpakatools::HistoContainer; auto& hist = alpaka::declareSharedVar(acc); @@ -71,16 +71,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt); } - ALPAKA_ASSERT_OFFLOAD(static_cast(nt) <= hist.capacity()); + ALPAKA_ASSERT_ACC(static_cast(nt) <= hist.capacity()); // fill hist (bin shall be wider than "eps") for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { - ALPAKA_ASSERT_OFFLOAD(i < ::zVertex::MAXTRACKS); + ALPAKA_ASSERT_ACC(i < ::zVertex::MAXTRACKS); int iz = int(zt[i] * 10.); // valid if eps<=0.1 iz = std::clamp(iz, INT8_MIN, INT8_MAX); izt[i] = iz - INT8_MIN; - ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN >= 0); - ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN < 256); + ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0); + ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256); hist.count(acc, izt[i]); iv[i] = i; nn[i] = 0; @@ -94,7 +94,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { hist.finalize(acc, hws); alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(hist.size() == nt); + ALPAKA_ASSERT_ACC(hist.size() == nt); for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { hist.fill(acc, izt[i], uint16_t(i)); } @@ -142,7 +142,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { if (nn[i] < minT) continue; // DBSCAN core rule auto loop = [&](uint32_t j) { - ALPAKA_ASSERT_OFFLOAD(i != j); + ALPAKA_ASSERT_ACC(i != j); if (nn[j] < minT) return; // DBSCAN core rule auto dist = std::abs(zt[i] - zt[j]); @@ -204,7 +204,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(foundClusters < ::zVertex::MAXVTX); + ALPAKA_ASSERT_ACC(foundClusters < ::zVertex::MAXVTX); // propagate the negative id to all the tracks in the cluster. for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h index caba60c826823..a8c428e2f5a00 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h @@ -37,7 +37,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { int32_t* __restrict__ nn = data.ndof(); int32_t* __restrict__ iv = ws.iv(); - ALPAKA_ASSERT_OFFLOAD(nvFinal <= nvIntermediate); + ALPAKA_ASSERT_ACC(nvFinal <= nvIntermediate); nvFinal = nvIntermediate; auto foundClusters = nvFinal; @@ -64,8 +64,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { alpaka::atomicAdd(acc, &noise, 1, alpaka::hierarchy::Threads{}); continue; } - ALPAKA_ASSERT_OFFLOAD(iv[i] >= 0); - ALPAKA_ASSERT_OFFLOAD(iv[i] < int(foundClusters)); + ALPAKA_ASSERT_ACC(iv[i] >= 0); + ALPAKA_ASSERT_ACC(iv[i] < int(foundClusters)); auto w = 1.f / ezt2[i]; alpaka::atomicAdd(acc, &zv[iv[i]], zt[i] * w, alpaka::hierarchy::Threads{}); alpaka::atomicAdd(acc, &wv[iv[i]], w, alpaka::hierarchy::Threads{}); @@ -74,7 +74,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { alpaka::syncBlockThreads(acc); // reuse nn for (auto i : cms::alpakatools::uniform_elements(acc, foundClusters)) { - ALPAKA_ASSERT_OFFLOAD(wv[i] > 0.f); + ALPAKA_ASSERT_ACC(wv[i] > 0.f); zv[i] /= wv[i]; nn[i] = -1; // ndof } diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h index 7ba0f905e260b..e2ba0b46b8be4 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h @@ -38,10 +38,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { int32_t const* __restrict__ nn = data.ndof(); int32_t* __restrict__ iv = ws.iv(); - ALPAKA_ASSERT_OFFLOAD(zt); - ALPAKA_ASSERT_OFFLOAD(wv); - ALPAKA_ASSERT_OFFLOAD(chi2); - ALPAKA_ASSERT_OFFLOAD(nn); + ALPAKA_ASSERT_ACC(zt); + ALPAKA_ASSERT_ACC(wv); + ALPAKA_ASSERT_ACC(chi2); + ALPAKA_ASSERT_ACC(nn); constexpr uint32_t MAXTK = 512; @@ -61,7 +61,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { if (chi2[kv] < maxChi2 * float(nn[kv])) continue; - ALPAKA_ASSERT_OFFLOAD(nn[kv] < int32_t(MAXTK)); + ALPAKA_ASSERT_ACC(nn[kv] < int32_t(MAXTK)); if ((uint32_t)nn[kv] >= MAXTK) continue; // too bad FIXME @@ -85,7 +85,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { auto& wnew = alpaka::declareSharedVar(acc); alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(int(nq) == nn[kv] + 1); + ALPAKA_ASSERT_ACC(int(nq) == nn[kv] + 1); int maxiter = 20; // kt-min.... diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc b/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc index 2d33fee32752c..b41e07aff56d5 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc @@ -41,7 +41,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.nTracks())) { [[maybe_unused]] auto nHits = helper::nHits(tracks_view, idx); - ALPAKA_ASSERT_OFFLOAD(nHits >= 3); + ALPAKA_ASSERT_ACC(nHits >= 3); // initialize soa... soa[idx].idv() = -1;