Merge pull request #43943 from fwyzard/cmssw_alpaka_v1.1.0

Update Alpaka to version 1.1.0 [14.0.x]
cms-sw · Feb 14, 2024 · c02f349 · c02f349
2 parents efc4d46 + 856ec61
commit c02f349
Show file tree

Hide file tree

Showing 37 changed files with 312 additions and 327 deletions.
diff --git a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc
@@ -34,7 +34,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
       template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
       ALPAKA_FN_ACC void operator()(TAcc const& acc, reco::ZVertexSoAView zvertex_view) const {
         if (cms::alpakatools::once_per_grid(acc)) {
-          ALPAKA_ASSERT_OFFLOAD(zvertex_view.nvFinal() == 420);
+          ALPAKA_ASSERT_ACC(zvertex_view.nvFinal() == 420);
         }
 
         for (int32_t j : elements_with_stride(acc, zvertex_view.nvFinal())) {

diff --git a/HeterogeneousCore/AlpakaCore/interface/EventCache.h b/HeterogeneousCore/AlpakaCore/interface/EventCache.h
@@ -10,6 +10,7 @@
 #include "FWCore/Utilities/interface/ReusableObjectHolder.h"
 #include "FWCore/Utilities/interface/thread_safety_macros.h"
 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
 #include "HeterogeneousCore/AlpakaInterface/interface/AlpakaServiceFwd.h"
 
 namespace cms::alpakatools {
@@ -31,11 +32,11 @@ namespace cms::alpakatools {
 #endif
 
     using Device = alpaka::Dev<Event>;
-    using Platform = alpaka::Pltf<Device>;
+    using Platform = alpaka::Platform<Device>;
 
     // EventCache should be constructed by the first call to
     // getEventCache() only if we have any devices present
-    EventCache() : cache_(alpaka::getDevCount<Platform>()) {}
+    EventCache() : cache_(devices<Platform>().size()) {}
 
     // Gets a (cached) event for the current device. The event
     // will be returned to the cache by the shared_ptr destructor. The
@@ -79,7 +80,7 @@ namespace cms::alpakatools {
       // EventCache lives through multiple tests (and go through
       // multiple shutdowns of the framework).
       cache_.clear();
-      cache_.resize(alpaka::getDevCount<Platform>());
+      cache_.resize(devices<Platform>().size());
     }
 
     std::vector<edm::ReusableObjectHolder<Event>> cache_;

diff --git a/HeterogeneousCore/AlpakaCore/interface/QueueCache.h b/HeterogeneousCore/AlpakaCore/interface/QueueCache.h
@@ -9,6 +9,7 @@
 #include "FWCore/Utilities/interface/ReusableObjectHolder.h"
 #include "FWCore/Utilities/interface/thread_safety_macros.h"
 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
 #include "HeterogeneousCore/AlpakaInterface/interface/AlpakaServiceFwd.h"
 
 namespace cms::alpakatools {
@@ -29,12 +30,12 @@ namespace cms::alpakatools {
 #endif
 
     using Device = alpaka::Dev<Queue>;
-    using Platform = alpaka::Pltf<Device>;
+    using Platform = alpaka::Platform<Device>;
 
   public:
     // QueueCache should be constructed by the first call to
     // getQueueCache() only if we have any devices present
-    QueueCache() : cache_(alpaka::getDevCount<Platform>()) {}
+    QueueCache() : cache_(devices<Platform>().size()) {}
 
     // Gets a (cached) queue for the current device. The queue
     // will be returned to the cache by the shared_ptr destructor.
@@ -52,7 +53,7 @@ namespace cms::alpakatools {
       // QueueCache lives through multiple tests (and go through
       // multiple shutdowns of the framework).
       cache_.clear();
-      cache_.resize(alpaka::getDevCount<Platform>());
+      cache_.resize(devices<Platform>().size());
     }
 
     std::vector<edm::ReusableObjectHolder<Queue>> cache_;

diff --git a/HeterogeneousCore/AlpakaCore/interface/alpaka/ESDeviceProductType.h b/HeterogeneousCore/AlpakaCore/interface/alpaka/ESDeviceProductType.h
@@ -13,7 +13,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::detail {
    */
   template <typename TProduct>
   struct ESDeviceProductType {
-    using type = std::conditional_t<std::is_same_v<Platform, alpaka::PltfCpu>,
+    using type = std::conditional_t<std::is_same_v<Platform, alpaka::PlatformCpu>,
                                     // host backends can use TProduct directly
                                     TProduct,
                                     // all device backends need to be wrapped

diff --git a/HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h b/HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h
@@ -104,7 +104,7 @@ namespace cms::alpakatools {
         auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
 
         return alpaka::BufCudaRt<TElem, TDim, TIdx>(
-            dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), pitchBytes, extent);
+            dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent, pitchBytes);
       }
     };
 
@@ -180,7 +180,7 @@ namespace cms::alpakatools {
         auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
 
         return alpaka::BufHipRt<TElem, TDim, TIdx>(
-            dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), pitchBytes, extent);
+            dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent, pitchBytes);
       }
     };
 

diff --git a/HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h b/HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h
@@ -15,6 +15,7 @@
 
 #include <alpaka/alpaka.hpp>
 
+#include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
 #include "HeterogeneousCore/AlpakaInterface/interface/traits.h"
 #include "HeterogeneousCore/AlpakaInterface/interface/AlpakaServiceFwd.h"
 
@@ -337,7 +338,8 @@ namespace cms::alpakatools {
         return alpaka::allocBuf<std::byte, size_t>(device_, bytes);
       } else if constexpr (std::is_same_v<Device, alpaka::DevCpu>) {
         // allocate pinned host memory accessible by the queue's platform
-        return alpaka::allocMappedBuf<alpaka::Pltf<alpaka::Dev<Queue>>, std::byte, size_t>(device_, bytes);
+        using Platform = alpaka::Platform<alpaka::Dev<Queue>>;
+        return alpaka::allocMappedBuf<Platform, std::byte, size_t>(device_, platform<Platform>(), bytes);
       } else {
         // unsupported combination
         static_assert(std::is_same_v<Device, alpaka::Dev<Queue>> or std::is_same_v<Device, alpaka::DevCpu>,

diff --git a/HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h b/HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h
@@ -27,10 +27,10 @@ namespace cms::alpakatools {
       const uint32_t nt = offsets[nh];
       for (uint32_t i : uniform_elements(acc, nt)) {
         auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i);
-        ALPAKA_ASSERT_OFFLOAD((*off) > 0);
+        ALPAKA_ASSERT_ACC((*off) > 0);
         int32_t ih = off - offsets - 1;
-        ALPAKA_ASSERT_OFFLOAD(ih >= 0);
-        ALPAKA_ASSERT_OFFLOAD(ih < int(nh));
+        ALPAKA_ASSERT_ACC(ih >= 0);
+        ALPAKA_ASSERT_ACC(ih < int(nh));
         h->count(acc, v[i], ih);
       }
     }
@@ -46,10 +46,10 @@ namespace cms::alpakatools {
       const uint32_t nt = offsets[nh];
       for (uint32_t i : uniform_elements(acc, nt)) {
         auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i);
-        ALPAKA_ASSERT_OFFLOAD((*off) > 0);
+        ALPAKA_ASSERT_ACC((*off) > 0);
         int32_t ih = off - offsets - 1;
-        ALPAKA_ASSERT_OFFLOAD(ih >= 0);
-        ALPAKA_ASSERT_OFFLOAD(ih < int(nh));
+        ALPAKA_ASSERT_ACC(ih >= 0);
+        ALPAKA_ASSERT_ACC(ih < int(nh));
         h->fill(acc, v[i], i, ih);
       }
     }
@@ -102,7 +102,7 @@ namespace cms::alpakatools {
     int bs = Hist::bin(value);
     int be = std::min(int(Hist::nbins() - 1), bs + n);
     bs = std::max(0, bs - n);
-    ALPAKA_ASSERT_OFFLOAD(be >= bs);
+    ALPAKA_ASSERT_ACC(be >= bs);
     for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
       func(*pj);
     }
@@ -113,7 +113,7 @@ namespace cms::alpakatools {
   ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInWindow(Hist const &hist, V wmin, V wmax, Func const &func) {
     auto bs = Hist::bin(wmin);
     auto be = Hist::bin(wmax);
-    ALPAKA_ASSERT_OFFLOAD(be >= bs);
+    ALPAKA_ASSERT_ACC(be >= bs);
     for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
       func(*pj);
     }
@@ -164,36 +164,36 @@ namespace cms::alpakatools {
     template <typename TAcc>
     ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t) {
       uint32_t b = bin(t);
-      ALPAKA_ASSERT_OFFLOAD(b < nbins());
+      ALPAKA_ASSERT_ACC(b < nbins());
       Base::atomicIncrement(acc, this->off[b]);
     }
 
     template <typename TAcc>
     ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j) {
       uint32_t b = bin(t);
-      ALPAKA_ASSERT_OFFLOAD(b < nbins());
+      ALPAKA_ASSERT_ACC(b < nbins());
       auto w = Base::atomicDecrement(acc, this->off[b]);
-      ALPAKA_ASSERT_OFFLOAD(w > 0);
+      ALPAKA_ASSERT_ACC(w > 0);
       this->content[w - 1] = j;
     }
 
     template <typename TAcc>
     ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t, uint32_t nh) {
       uint32_t b = bin(t);
-      ALPAKA_ASSERT_OFFLOAD(b < nbins());
+      ALPAKA_ASSERT_ACC(b < nbins());
       b += histOff(nh);
-      ALPAKA_ASSERT_OFFLOAD(b < totbins());
+      ALPAKA_ASSERT_ACC(b < totbins());
       Base::atomicIncrement(acc, this->off[b]);
     }
 
     template <typename TAcc>
     ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j, uint32_t nh) {
       uint32_t b = bin(t);
-      ALPAKA_ASSERT_OFFLOAD(b < nbins());
+      ALPAKA_ASSERT_ACC(b < nbins());
       b += histOff(nh);
-      ALPAKA_ASSERT_OFFLOAD(b < totbins());
+      ALPAKA_ASSERT_ACC(b < totbins());
       auto w = Base::atomicDecrement(acc, this->off[b]);
-      ALPAKA_ASSERT_OFFLOAD(w > 0);
+      ALPAKA_ASSERT_ACC(w > 0);
       this->content[w - 1] = j;
     }
   };

diff --git a/HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h b/HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h
@@ -42,15 +42,15 @@ namespace cms::alpakatools {
     constexpr auto capacity() const { return content.capacity(); }
 
     ALPAKA_FN_HOST_ACC void initStorage(View view) {
-      ALPAKA_ASSERT_OFFLOAD(view.assoc == this);
+      ALPAKA_ASSERT_ACC(view.assoc == this);
       if constexpr (ctCapacity() < 0) {
-        ALPAKA_ASSERT_OFFLOAD(view.contentStorage);
-        ALPAKA_ASSERT_OFFLOAD(view.contentSize > 0);
+        ALPAKA_ASSERT_ACC(view.contentStorage);
+        ALPAKA_ASSERT_ACC(view.contentSize > 0);
         content.init(view.contentStorage, view.contentSize);
       }
       if constexpr (ctNOnes() < 0) {
-        ALPAKA_ASSERT_OFFLOAD(view.offStorage);
-        ALPAKA_ASSERT_OFFLOAD(view.offSize > 0);
+        ALPAKA_ASSERT_ACC(view.offStorage);
+        ALPAKA_ASSERT_ACC(view.offSize > 0);
         off.init(view.offStorage, view.offSize);
       }
     }
@@ -80,24 +80,24 @@ namespace cms::alpakatools {
 
     template <typename TAcc>
     ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, I b) {
-      ALPAKA_ASSERT_OFFLOAD(b < static_cast<uint32_t>(nOnes()));
+      ALPAKA_ASSERT_ACC(b < static_cast<uint32_t>(nOnes()));
       atomicIncrement(acc, off[b]);
     }
 
     template <typename TAcc>
     ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, I b, index_type j) {
-      ALPAKA_ASSERT_OFFLOAD(b < static_cast<uint32_t>(nOnes()));
+      ALPAKA_ASSERT_ACC(b < static_cast<uint32_t>(nOnes()));
       auto w = atomicDecrement(acc, off[b]);
-      ALPAKA_ASSERT_OFFLOAD(w > 0);
+      ALPAKA_ASSERT_ACC(w > 0);
       content[w - 1] = j;
     }
 
     // this MUST BE DONE in a single block (or in two kernels!)
     struct zeroAndInit {
       template <typename TAcc>
       ALPAKA_FN_ACC void operator()(const TAcc &acc, View view) const {
-        ALPAKA_ASSERT_OFFLOAD((1 == alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0]));
-        ALPAKA_ASSERT_OFFLOAD((0 == alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0]));
+        ALPAKA_ASSERT_ACC((1 == alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0]));
+        ALPAKA_ASSERT_ACC((0 == alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0]));
         auto h = view.assoc;
         if (cms::alpakatools::once_per_block(acc)) {
           h->psws = 0;
@@ -119,12 +119,12 @@ namespace cms::alpakatools {
     template <typename TAcc, typename TQueue>
     ALPAKA_FN_INLINE static void launchZero(View view, TQueue &queue) {
       if constexpr (ctCapacity() < 0) {
-        ALPAKA_ASSERT_OFFLOAD(view.contentStorage);
-        ALPAKA_ASSERT_OFFLOAD(view.contentSize > 0);
+        ALPAKA_ASSERT_ACC(view.contentStorage);
+        ALPAKA_ASSERT_ACC(view.contentSize > 0);
       }
       if constexpr (ctNOnes() < 0) {
-        ALPAKA_ASSERT_OFFLOAD(view.offStorage);
-        ALPAKA_ASSERT_OFFLOAD(view.offSize > 0);
+        ALPAKA_ASSERT_ACC(view.offStorage);
+        ALPAKA_ASSERT_ACC(view.offSize > 0);
       }
       if constexpr (!requires_single_thread_per_block_v<TAcc>) {
         auto nthreads = 1024;
@@ -133,7 +133,7 @@ namespace cms::alpakatools {
         alpaka::exec<TAcc>(queue, workDiv, zeroAndInit{}, view);
       } else {
         auto h = view.assoc;
-        ALPAKA_ASSERT_OFFLOAD(h);
+        ALPAKA_ASSERT_ACC(h);
         h->initStorage(view);
         h->zero();
         h->psws = 0;
@@ -213,9 +213,9 @@ namespace cms::alpakatools {
 
     template <typename TAcc>
     ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void finalize(TAcc &acc, Counter *ws = nullptr) {
-      ALPAKA_ASSERT_OFFLOAD(this->off[this->totOnes() - 1] == 0);
+      ALPAKA_ASSERT_ACC(this->off[this->totOnes() - 1] == 0);
       blockPrefixScan(acc, this->off.data(), this->totOnes(), ws);
-      ALPAKA_ASSERT_OFFLOAD(this->off[this->totOnes() - 1] == this->off[this->totOnes() - 2]);
+      ALPAKA_ASSERT_ACC(this->off[this->totOnes() - 1] == this->off[this->totOnes() - 2]);
     }
 
     ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void finalize() {
@@ -234,17 +234,17 @@ namespace cms::alpakatools {
     ALPAKA_FN_INLINE static void launchFinalize(View view, TQueue &queue) {
       // View stores a base pointer, we need to upcast back...
       auto h = static_cast<OneToManyAssocRandomAccess *>(view.assoc);
-      ALPAKA_ASSERT_OFFLOAD(h);
+      ALPAKA_ASSERT_ACC(h);
       if constexpr (!requires_single_thread_per_block_v<TAcc>) {
         Counter *poff = (Counter *)((char *)(h) + offsetof(OneToManyAssocRandomAccess, off));
         auto nOnes = OneToManyAssocRandomAccess::ctNOnes();
         if constexpr (OneToManyAssocRandomAccess::ctNOnes() < 0) {
-          ALPAKA_ASSERT_OFFLOAD(view.offStorage);
-          ALPAKA_ASSERT_OFFLOAD(view.offSize > 0);
+          ALPAKA_ASSERT_ACC(view.offStorage);
+          ALPAKA_ASSERT_ACC(view.offSize > 0);
           nOnes = view.offSize;
           poff = view.offStorage;
         }
-        ALPAKA_ASSERT_OFFLOAD(nOnes > 0);
+        ALPAKA_ASSERT_ACC(nOnes > 0);
         int32_t *ppsws = (int32_t *)((char *)(h) + offsetof(OneToManyAssocRandomAccess, psws));
         auto nthreads = 1024;
         auto nblocks = (nOnes + nthreads - 1) / nthreads;

diff --git a/HeterogeneousCore/AlpakaInterface/interface/config.h b/HeterogeneousCore/AlpakaInterface/interface/config.h
@@ -35,15 +35,15 @@ namespace alpaka_common {
 
   // host types
   using DevHost = alpaka::DevCpu;
-  using PltfHost = alpaka::Pltf<DevHost>;
+  using PlatformHost = alpaka::Platform<DevHost>;
 
 }  // namespace alpaka_common
 
 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
 namespace alpaka_cuda_async {
   using namespace alpaka_common;
 
-  using Platform = alpaka::PltfCudaRt;
+  using Platform = alpaka::PlatformCudaRt;
   using Device = alpaka::DevCudaRt;
   using Queue = alpaka::QueueCudaRtNonBlocking;
   using Event = alpaka::EventCudaRt;
@@ -88,7 +88,7 @@ namespace alpaka {
 namespace alpaka_rocm_async {
   using namespace alpaka_common;
 
-  using Platform = alpaka::PltfHipRt;
+  using Platform = alpaka::PlatformHipRt;
   using Device = alpaka::DevHipRt;
   using Queue = alpaka::QueueHipRtNonBlocking;
   using Event = alpaka::EventHipRt;
@@ -133,7 +133,7 @@ namespace alpaka {
 namespace alpaka_serial_sync {
   using namespace alpaka_common;
 
-  using Platform = alpaka::PltfCpu;
+  using Platform = alpaka::PlatformCpu;
   using Device = alpaka::DevCpu;
   using Queue = alpaka::QueueCpuBlocking;
   using Event = alpaka::EventCpu;
@@ -159,7 +159,7 @@ namespace alpaka_serial_sync {
 namespace alpaka_tbb_async {
   using namespace alpaka_common;
 
-  using Platform = alpaka::PltfCpu;
+  using Platform = alpaka::PlatformCpu;
   using Device = alpaka::DevCpu;
   using Queue = alpaka::QueueCpuNonBlocking;
   using Event = alpaka::EventCpu;