Skip to content

Commit

Permalink
Merge pull request #43943 from fwyzard/cmssw_alpaka_v1.1.0
Browse files Browse the repository at this point in the history
Update Alpaka to version 1.1.0 [14.0.x]
  • Loading branch information
cmsbuild authored Feb 14, 2024
2 parents efc4d46 + 856ec61 commit c02f349
Show file tree
Hide file tree
Showing 37 changed files with 312 additions and 327 deletions.
2 changes: 1 addition & 1 deletion DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC void operator()(TAcc const& acc, reco::ZVertexSoAView zvertex_view) const {
if (cms::alpakatools::once_per_grid(acc)) {
ALPAKA_ASSERT_OFFLOAD(zvertex_view.nvFinal() == 420);
ALPAKA_ASSERT_ACC(zvertex_view.nvFinal() == 420);
}

for (int32_t j : elements_with_stride(acc, zvertex_view.nvFinal())) {
Expand Down
7 changes: 4 additions & 3 deletions HeterogeneousCore/AlpakaCore/interface/EventCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "FWCore/Utilities/interface/ReusableObjectHolder.h"
#include "FWCore/Utilities/interface/thread_safety_macros.h"
#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
#include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
#include "HeterogeneousCore/AlpakaInterface/interface/AlpakaServiceFwd.h"

namespace cms::alpakatools {
Expand All @@ -31,11 +32,11 @@ namespace cms::alpakatools {
#endif

using Device = alpaka::Dev<Event>;
using Platform = alpaka::Pltf<Device>;
using Platform = alpaka::Platform<Device>;

// EventCache should be constructed by the first call to
// getEventCache() only if we have any devices present
EventCache() : cache_(alpaka::getDevCount<Platform>()) {}
EventCache() : cache_(devices<Platform>().size()) {}

// Gets a (cached) event for the current device. The event
// will be returned to the cache by the shared_ptr destructor. The
Expand Down Expand Up @@ -79,7 +80,7 @@ namespace cms::alpakatools {
// EventCache lives through multiple tests (and go through
// multiple shutdowns of the framework).
cache_.clear();
cache_.resize(alpaka::getDevCount<Platform>());
cache_.resize(devices<Platform>().size());
}

std::vector<edm::ReusableObjectHolder<Event>> cache_;
Expand Down
7 changes: 4 additions & 3 deletions HeterogeneousCore/AlpakaCore/interface/QueueCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "FWCore/Utilities/interface/ReusableObjectHolder.h"
#include "FWCore/Utilities/interface/thread_safety_macros.h"
#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
#include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
#include "HeterogeneousCore/AlpakaInterface/interface/AlpakaServiceFwd.h"

namespace cms::alpakatools {
Expand All @@ -29,12 +30,12 @@ namespace cms::alpakatools {
#endif

using Device = alpaka::Dev<Queue>;
using Platform = alpaka::Pltf<Device>;
using Platform = alpaka::Platform<Device>;

public:
// QueueCache should be constructed by the first call to
// getQueueCache() only if we have any devices present
QueueCache() : cache_(alpaka::getDevCount<Platform>()) {}
QueueCache() : cache_(devices<Platform>().size()) {}

// Gets a (cached) queue for the current device. The queue
// will be returned to the cache by the shared_ptr destructor.
Expand All @@ -52,7 +53,7 @@ namespace cms::alpakatools {
// QueueCache lives through multiple tests (and go through
// multiple shutdowns of the framework).
cache_.clear();
cache_.resize(alpaka::getDevCount<Platform>());
cache_.resize(devices<Platform>().size());
}

std::vector<edm::ReusableObjectHolder<Queue>> cache_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::detail {
*/
template <typename TProduct>
struct ESDeviceProductType {
using type = std::conditional_t<std::is_same_v<Platform, alpaka::PltfCpu>,
using type = std::conditional_t<std::is_same_v<Platform, alpaka::PlatformCpu>,
// host backends can use TProduct directly
TProduct,
// all device backends need to be wrapped
Expand Down
4 changes: 2 additions & 2 deletions HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ namespace cms::alpakatools {
auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };

return alpaka::BufCudaRt<TElem, TDim, TIdx>(
dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), pitchBytes, extent);
dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent, pitchBytes);
}
};

Expand Down Expand Up @@ -180,7 +180,7 @@ namespace cms::alpakatools {
auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };

return alpaka::BufHipRt<TElem, TDim, TIdx>(
dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), pitchBytes, extent);
dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent, pitchBytes);
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include <alpaka/alpaka.hpp>

#include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
#include "HeterogeneousCore/AlpakaInterface/interface/traits.h"
#include "HeterogeneousCore/AlpakaInterface/interface/AlpakaServiceFwd.h"

Expand Down Expand Up @@ -337,7 +338,8 @@ namespace cms::alpakatools {
return alpaka::allocBuf<std::byte, size_t>(device_, bytes);
} else if constexpr (std::is_same_v<Device, alpaka::DevCpu>) {
// allocate pinned host memory accessible by the queue's platform
return alpaka::allocMappedBuf<alpaka::Pltf<alpaka::Dev<Queue>>, std::byte, size_t>(device_, bytes);
using Platform = alpaka::Platform<alpaka::Dev<Queue>>;
return alpaka::allocMappedBuf<Platform, std::byte, size_t>(device_, platform<Platform>(), bytes);
} else {
// unsupported combination
static_assert(std::is_same_v<Device, alpaka::Dev<Queue>> or std::is_same_v<Device, alpaka::DevCpu>,
Expand Down
32 changes: 16 additions & 16 deletions HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ namespace cms::alpakatools {
const uint32_t nt = offsets[nh];
for (uint32_t i : uniform_elements(acc, nt)) {
auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i);
ALPAKA_ASSERT_OFFLOAD((*off) > 0);
ALPAKA_ASSERT_ACC((*off) > 0);
int32_t ih = off - offsets - 1;
ALPAKA_ASSERT_OFFLOAD(ih >= 0);
ALPAKA_ASSERT_OFFLOAD(ih < int(nh));
ALPAKA_ASSERT_ACC(ih >= 0);
ALPAKA_ASSERT_ACC(ih < int(nh));
h->count(acc, v[i], ih);
}
}
Expand All @@ -46,10 +46,10 @@ namespace cms::alpakatools {
const uint32_t nt = offsets[nh];
for (uint32_t i : uniform_elements(acc, nt)) {
auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i);
ALPAKA_ASSERT_OFFLOAD((*off) > 0);
ALPAKA_ASSERT_ACC((*off) > 0);
int32_t ih = off - offsets - 1;
ALPAKA_ASSERT_OFFLOAD(ih >= 0);
ALPAKA_ASSERT_OFFLOAD(ih < int(nh));
ALPAKA_ASSERT_ACC(ih >= 0);
ALPAKA_ASSERT_ACC(ih < int(nh));
h->fill(acc, v[i], i, ih);
}
}
Expand Down Expand Up @@ -102,7 +102,7 @@ namespace cms::alpakatools {
int bs = Hist::bin(value);
int be = std::min(int(Hist::nbins() - 1), bs + n);
bs = std::max(0, bs - n);
ALPAKA_ASSERT_OFFLOAD(be >= bs);
ALPAKA_ASSERT_ACC(be >= bs);
for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
func(*pj);
}
Expand All @@ -113,7 +113,7 @@ namespace cms::alpakatools {
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInWindow(Hist const &hist, V wmin, V wmax, Func const &func) {
auto bs = Hist::bin(wmin);
auto be = Hist::bin(wmax);
ALPAKA_ASSERT_OFFLOAD(be >= bs);
ALPAKA_ASSERT_ACC(be >= bs);
for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
func(*pj);
}
Expand Down Expand Up @@ -164,36 +164,36 @@ namespace cms::alpakatools {
template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t) {
uint32_t b = bin(t);
ALPAKA_ASSERT_OFFLOAD(b < nbins());
ALPAKA_ASSERT_ACC(b < nbins());
Base::atomicIncrement(acc, this->off[b]);
}

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j) {
uint32_t b = bin(t);
ALPAKA_ASSERT_OFFLOAD(b < nbins());
ALPAKA_ASSERT_ACC(b < nbins());
auto w = Base::atomicDecrement(acc, this->off[b]);
ALPAKA_ASSERT_OFFLOAD(w > 0);
ALPAKA_ASSERT_ACC(w > 0);
this->content[w - 1] = j;
}

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t, uint32_t nh) {
uint32_t b = bin(t);
ALPAKA_ASSERT_OFFLOAD(b < nbins());
ALPAKA_ASSERT_ACC(b < nbins());
b += histOff(nh);
ALPAKA_ASSERT_OFFLOAD(b < totbins());
ALPAKA_ASSERT_ACC(b < totbins());
Base::atomicIncrement(acc, this->off[b]);
}

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j, uint32_t nh) {
uint32_t b = bin(t);
ALPAKA_ASSERT_OFFLOAD(b < nbins());
ALPAKA_ASSERT_ACC(b < nbins());
b += histOff(nh);
ALPAKA_ASSERT_OFFLOAD(b < totbins());
ALPAKA_ASSERT_ACC(b < totbins());
auto w = Base::atomicDecrement(acc, this->off[b]);
ALPAKA_ASSERT_OFFLOAD(w > 0);
ALPAKA_ASSERT_ACC(w > 0);
this->content[w - 1] = j;
}
};
Expand Down
42 changes: 21 additions & 21 deletions HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ namespace cms::alpakatools {
constexpr auto capacity() const { return content.capacity(); }

ALPAKA_FN_HOST_ACC void initStorage(View view) {
ALPAKA_ASSERT_OFFLOAD(view.assoc == this);
ALPAKA_ASSERT_ACC(view.assoc == this);
if constexpr (ctCapacity() < 0) {
ALPAKA_ASSERT_OFFLOAD(view.contentStorage);
ALPAKA_ASSERT_OFFLOAD(view.contentSize > 0);
ALPAKA_ASSERT_ACC(view.contentStorage);
ALPAKA_ASSERT_ACC(view.contentSize > 0);
content.init(view.contentStorage, view.contentSize);
}
if constexpr (ctNOnes() < 0) {
ALPAKA_ASSERT_OFFLOAD(view.offStorage);
ALPAKA_ASSERT_OFFLOAD(view.offSize > 0);
ALPAKA_ASSERT_ACC(view.offStorage);
ALPAKA_ASSERT_ACC(view.offSize > 0);
off.init(view.offStorage, view.offSize);
}
}
Expand Down Expand Up @@ -80,24 +80,24 @@ namespace cms::alpakatools {

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, I b) {
ALPAKA_ASSERT_OFFLOAD(b < static_cast<uint32_t>(nOnes()));
ALPAKA_ASSERT_ACC(b < static_cast<uint32_t>(nOnes()));
atomicIncrement(acc, off[b]);
}

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, I b, index_type j) {
ALPAKA_ASSERT_OFFLOAD(b < static_cast<uint32_t>(nOnes()));
ALPAKA_ASSERT_ACC(b < static_cast<uint32_t>(nOnes()));
auto w = atomicDecrement(acc, off[b]);
ALPAKA_ASSERT_OFFLOAD(w > 0);
ALPAKA_ASSERT_ACC(w > 0);
content[w - 1] = j;
}

// this MUST BE DONE in a single block (or in two kernels!)
struct zeroAndInit {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(const TAcc &acc, View view) const {
ALPAKA_ASSERT_OFFLOAD((1 == alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0]));
ALPAKA_ASSERT_OFFLOAD((0 == alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0]));
ALPAKA_ASSERT_ACC((1 == alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0]));
ALPAKA_ASSERT_ACC((0 == alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0]));
auto h = view.assoc;
if (cms::alpakatools::once_per_block(acc)) {
h->psws = 0;
Expand All @@ -119,12 +119,12 @@ namespace cms::alpakatools {
template <typename TAcc, typename TQueue>
ALPAKA_FN_INLINE static void launchZero(View view, TQueue &queue) {
if constexpr (ctCapacity() < 0) {
ALPAKA_ASSERT_OFFLOAD(view.contentStorage);
ALPAKA_ASSERT_OFFLOAD(view.contentSize > 0);
ALPAKA_ASSERT_ACC(view.contentStorage);
ALPAKA_ASSERT_ACC(view.contentSize > 0);
}
if constexpr (ctNOnes() < 0) {
ALPAKA_ASSERT_OFFLOAD(view.offStorage);
ALPAKA_ASSERT_OFFLOAD(view.offSize > 0);
ALPAKA_ASSERT_ACC(view.offStorage);
ALPAKA_ASSERT_ACC(view.offSize > 0);
}
if constexpr (!requires_single_thread_per_block_v<TAcc>) {
auto nthreads = 1024;
Expand All @@ -133,7 +133,7 @@ namespace cms::alpakatools {
alpaka::exec<TAcc>(queue, workDiv, zeroAndInit{}, view);
} else {
auto h = view.assoc;
ALPAKA_ASSERT_OFFLOAD(h);
ALPAKA_ASSERT_ACC(h);
h->initStorage(view);
h->zero();
h->psws = 0;
Expand Down Expand Up @@ -213,9 +213,9 @@ namespace cms::alpakatools {

template <typename TAcc>
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void finalize(TAcc &acc, Counter *ws = nullptr) {
ALPAKA_ASSERT_OFFLOAD(this->off[this->totOnes() - 1] == 0);
ALPAKA_ASSERT_ACC(this->off[this->totOnes() - 1] == 0);
blockPrefixScan(acc, this->off.data(), this->totOnes(), ws);
ALPAKA_ASSERT_OFFLOAD(this->off[this->totOnes() - 1] == this->off[this->totOnes() - 2]);
ALPAKA_ASSERT_ACC(this->off[this->totOnes() - 1] == this->off[this->totOnes() - 2]);
}

ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void finalize() {
Expand All @@ -234,17 +234,17 @@ namespace cms::alpakatools {
ALPAKA_FN_INLINE static void launchFinalize(View view, TQueue &queue) {
// View stores a base pointer, we need to upcast back...
auto h = static_cast<OneToManyAssocRandomAccess *>(view.assoc);
ALPAKA_ASSERT_OFFLOAD(h);
ALPAKA_ASSERT_ACC(h);
if constexpr (!requires_single_thread_per_block_v<TAcc>) {
Counter *poff = (Counter *)((char *)(h) + offsetof(OneToManyAssocRandomAccess, off));
auto nOnes = OneToManyAssocRandomAccess::ctNOnes();
if constexpr (OneToManyAssocRandomAccess::ctNOnes() < 0) {
ALPAKA_ASSERT_OFFLOAD(view.offStorage);
ALPAKA_ASSERT_OFFLOAD(view.offSize > 0);
ALPAKA_ASSERT_ACC(view.offStorage);
ALPAKA_ASSERT_ACC(view.offSize > 0);
nOnes = view.offSize;
poff = view.offStorage;
}
ALPAKA_ASSERT_OFFLOAD(nOnes > 0);
ALPAKA_ASSERT_ACC(nOnes > 0);
int32_t *ppsws = (int32_t *)((char *)(h) + offsetof(OneToManyAssocRandomAccess, psws));
auto nthreads = 1024;
auto nblocks = (nOnes + nthreads - 1) / nthreads;
Expand Down
10 changes: 5 additions & 5 deletions HeterogeneousCore/AlpakaInterface/interface/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,15 @@ namespace alpaka_common {

// host types
using DevHost = alpaka::DevCpu;
using PltfHost = alpaka::Pltf<DevHost>;
using PlatformHost = alpaka::Platform<DevHost>;

} // namespace alpaka_common

#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
namespace alpaka_cuda_async {
using namespace alpaka_common;

using Platform = alpaka::PltfCudaRt;
using Platform = alpaka::PlatformCudaRt;
using Device = alpaka::DevCudaRt;
using Queue = alpaka::QueueCudaRtNonBlocking;
using Event = alpaka::EventCudaRt;
Expand Down Expand Up @@ -88,7 +88,7 @@ namespace alpaka {
namespace alpaka_rocm_async {
using namespace alpaka_common;

using Platform = alpaka::PltfHipRt;
using Platform = alpaka::PlatformHipRt;
using Device = alpaka::DevHipRt;
using Queue = alpaka::QueueHipRtNonBlocking;
using Event = alpaka::EventHipRt;
Expand Down Expand Up @@ -133,7 +133,7 @@ namespace alpaka {
namespace alpaka_serial_sync {
using namespace alpaka_common;

using Platform = alpaka::PltfCpu;
using Platform = alpaka::PlatformCpu;
using Device = alpaka::DevCpu;
using Queue = alpaka::QueueCpuBlocking;
using Event = alpaka::EventCpu;
Expand All @@ -159,7 +159,7 @@ namespace alpaka_serial_sync {
namespace alpaka_tbb_async {
using namespace alpaka_common;

using Platform = alpaka::PltfCpu;
using Platform = alpaka::PlatformCpu;
using Device = alpaka::DevCpu;
using Queue = alpaka::QueueCpuNonBlocking;
using Event = alpaka::EventCpu;
Expand Down
Loading

0 comments on commit c02f349

Please sign in to comment.