From b4ed3a8602557608014e80fceb761661c4db86d2 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Thu, 24 Nov 2022 11:52:24 +0100 Subject: [PATCH] Add support to multi layout portable collections (up to 5) The design is recursive, but ROOT seems not to serialize variadic templates, so we have a 5 elements design. This number is arbitrary and can be changed with relatively little work. The order of the PortableDeviceCollection template parameters is changed from device at the end to device at the begining to accomodate the pseudo variadic templation. A test validates the colleciton with two layouts and three layouts. The layout and view can be accessed by index or type, on the condition the type is present only once in the collection. The default index is set to 0 to minimize the need for chages in the single layout case. Empty template parameters square bracket ("<>") are needed in some cases. Convenience scripts are added to generate the XML need for dictionary generation but has to be edited if there are commonalities between multiple collections. --- DataFormats/Portable/README.md | 30 +++ .../Portable/interface/PortableCollection.h | 27 ++ .../interface/PortableCollectionCommon.h | 103 ++++++++ .../interface/PortableDeviceCollection.h | 214 +++++++++++++++- .../interface/PortableHostCollection.h | 233 ++++++++++++++++++ .../PortableHostCollectionReadRules.h | 61 +++++ .../Portable/interface/PortableObject.h | 2 +- .../interface/alpaka/PortableCollection.h | 54 +++- .../interface/alpaka/PortableObject.h | 2 +- .../scripts/portableDeviceCollectionHints | 27 ++ .../scripts/portableHostCollectionHints | 33 +++ .../interface/TestHostCollection.h | 4 + .../PortableTestObjects/interface/TestSoA.h | 28 +++ .../interface/alpaka/TestDeviceCollection.h | 4 + .../src/alpaka/classes_cuda_def.xml | 8 + .../src/alpaka/classes_rocm_def.xml | 8 + .../PortableTestObjects/src/classes.cc | 2 + .../PortableTestObjects/src/classes_def.xml | 43 +++- .../AlpakaTest/plugins/TestAlpakaAnalyzer.cc | 161 ++++++++++++ .../AlpakaTest/plugins/alpaka/TestAlgo.dev.cc | 231 ++++++++++++++++- .../AlpakaTest/plugins/alpaka/TestAlgo.h | 9 + .../alpaka/TestAlpakaGlobalProducer.cc | 16 ++ .../alpaka/TestAlpakaGlobalProducerE.cc | 16 +- .../plugins/alpaka/TestAlpakaProducer.cc | 27 +- .../alpaka/TestAlpakaStreamProducer.cc | 18 ++ .../TestAlpakaStreamSynchronizingProducer.cc | 6 + .../plugins/alpaka/TestHelperClass.cc | 8 + .../plugins/alpaka/TestHelperClass.h | 6 + HeterogeneousCore/AlpakaTest/test/writer.py | 6 +- 29 files changed, 1372 insertions(+), 15 deletions(-) create mode 100644 DataFormats/Portable/interface/PortableCollectionCommon.h create mode 100755 DataFormats/Portable/scripts/portableDeviceCollectionHints create mode 100755 DataFormats/Portable/scripts/portableHostCollectionHints diff --git a/DataFormats/Portable/README.md b/DataFormats/Portable/README.md index 1724c3123d9fc..7a9b80bea684d 100644 --- a/DataFormats/Portable/README.md +++ b/DataFormats/Portable/README.md @@ -126,3 +126,33 @@ should explicitly use the `PortableHostObject` and `PortableHostCollection Modules that implement portable interfaces (_e.g._ producers) should use the generic types based on `ALPAKA_ACCELERATOR_NAMESPACE::PortableObject` or `PortableObject`, and `ALPAKA_ACCELERATOR_NAMESPACE::PortableCollection` or `PortableCollection`. + +## Multi layout collections + +Some use cases require multiple sets of columns of different sizes. This is can be achieved in a single +`PortableCollection` using `PortableCollection2`, `PortableCollection3` and so on up to +`PortableCollection5<...>`. The numbered, fixed size wrappers are needed in order to be added to the ROOT dictionary. +Behind the scenes recursive `PortableHostMultiCollection` and +`ALPAKA_ACCELERATOR_NAMESPACE::PortableDeviceMultiCollection` (note the reversed parameter order) provide +the actual class definitions. + +## ROOT dictionary declaration helper scripts + +In order to be serialized by ROOT, the products need to be added to its dictionary. This happens during `scram build` +as instructed in `/src/classes_dev.xml` and `/src/alpaka/classes_cuda_def.xml` and +`/src/alpaka/classes_rocm_def.xml`. Two scripts generate the code to be added to the xml files. +Both scripts expect the collections to be aliased as in: +``` +using TestDeviceMultiCollection3 = PortableCollection3; +``` + +For the host xml, SoA layouts have to be listed and duplicates should be removed manually is multiple +collections share a same layout. The scripts are called as follows: +``` +./DataFormats/Portable/scripts/portableHostCollectionHints portabletest::TestHostMultiCollection3 \ + portabletest::TestSoALayout portabletest::TestSoALayout2 portabletest::TestSoALayout3 + +./DataFormats/Portable/scripts/portableDeviceCollectionHints portabletest::TestHostMultiCollection3 +``` +The layouts should not be added as parameters for the device collection. Those script can be use equally with the +single layout collections or multi layout collections. \ No newline at end of file diff --git a/DataFormats/Portable/interface/PortableCollection.h b/DataFormats/Portable/interface/PortableCollection.h index abc64b99cb0d3..3f69ffdd95491 100644 --- a/DataFormats/Portable/interface/PortableCollection.h +++ b/DataFormats/Portable/interface/PortableCollection.h @@ -22,12 +22,18 @@ namespace traits { using CollectionType = PortableHostCollection; }; + template + class PortableMultiCollectionTrait; } // namespace traits // type alias for a generic SoA-based product template >> using PortableCollection = typename traits::PortableCollectionTrait::CollectionType; +// type alias for a generic SoA-based product +template +using PortableMultiCollection = typename traits::PortableMultiCollectionTrait::CollectionType; + // define how to copy PortableCollection between host and device namespace cms::alpakatools { template @@ -40,6 +46,16 @@ namespace cms::alpakatools { } }; + template + struct CopyToHost> { + template + static auto copyAsync(TQueue& queue, PortableDeviceMultiCollection const& srcData) { + PortableHostMultiCollection dstData(srcData.sizes(), queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); + return dstData; + } + }; + template struct CopyToDevice> { template @@ -50,6 +66,17 @@ namespace cms::alpakatools { return dstData; } }; + + template + struct CopyToDevice> { + template + static auto copyAsync(TQueue& queue, PortableHostMultiCollection const& srcData) { + using TDevice = typename alpaka::trait::DevType::type; + PortableDeviceMultiCollection dstData(srcData.sizes(), queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); + return dstData; + } + }; } // namespace cms::alpakatools #endif // DataFormats_Portable_interface_PortableCollection_h diff --git a/DataFormats/Portable/interface/PortableCollectionCommon.h b/DataFormats/Portable/interface/PortableCollectionCommon.h new file mode 100644 index 0000000000000..c1f98abc8f9f3 --- /dev/null +++ b/DataFormats/Portable/interface/PortableCollectionCommon.h @@ -0,0 +1,103 @@ +#ifndef DataFormats_Portable_interface_PortableCollectionCommon_h +#define DataFormats_Portable_interface_PortableCollectionCommon_h + +#include +#include +#include + +namespace portablecollection { + + // Note: if there are other uses for this, it could be moved to a central place + template + constexpr void constexpr_for(F&& f) { + if constexpr (Start < End) { + f(std::integral_constant()); + constexpr_for(std::forward(f)); + } + } + + template + struct CollectionLeaf { + CollectionLeaf() = default; + CollectionLeaf(std::byte* buffer, int32_t elements) : layout_(buffer, elements), view_(layout_) {} + template + CollectionLeaf(std::byte* buffer, std::array const& sizes) + : layout_(buffer, sizes[Idx]), view_(layout_) { + static_assert(N >= Idx); + } + using Layout = T; + using View = typename Layout::View; + using ConstView = typename Layout::ConstView; + Layout layout_; // + View view_; //! + // Make sure types are not void. + static_assert(not std::is_same::value); + }; + + template + struct CollectionImpl : public CollectionLeaf, public CollectionImpl { + CollectionImpl() = default; + CollectionImpl(std::byte* buffer, int32_t elements) : CollectionLeaf(buffer, elements) {} + + template + CollectionImpl(std::byte* buffer, std::array const& sizes) + : CollectionLeaf(buffer, sizes), + CollectionImpl(CollectionLeaf::layout_.metadata().nextByte(), sizes) {} + }; + + template + struct CollectionImpl : public CollectionLeaf { + CollectionImpl() = default; + CollectionImpl(std::byte* buffer, int32_t elements) : CollectionLeaf(buffer, elements) {} + + template + CollectionImpl(std::byte* buffer, std::array const& sizes) : CollectionLeaf(buffer, sizes) { + static_assert(N == Idx + 1); + } + }; + + template + struct Collections : public CollectionImpl<0, Args...> {}; + + // return the type at the Idx position in Args... + template + using TypeResolver = typename std::tuple_element>::type; + + // count how many times the type T occurs in Args... + template + inline constexpr std::size_t typeCount = ((std::is_same::value ? 1 : 0) + ... + 0); + + // count the non-void elements of Args... + template + inline constexpr std::size_t membersCount = sizeof...(Args); + + // if the type T occurs in Tuple, TupleTypeIndex has a static member value with the corresponding index; + // otherwise there is no such data member. + template + struct TupleTypeIndex {}; + + template + struct TupleTypeIndex> { + static_assert(typeCount == 0, "the requested type appears more than once among the arguments"); + static constexpr std::size_t value = 0; + }; + + template + struct TupleTypeIndex> { + static_assert(not std::is_same_v); + static_assert(typeCount == 1, "the requested type does not appear among the arguments"); + static constexpr std::size_t value = 1 + TupleTypeIndex>::value; + }; + + // if the type T occurs in Args..., TypeIndex has a static member value with the corresponding index; + // otherwise there is no such data member. + template + using TypeIndex = TupleTypeIndex>; + + // return the index where the type T occurs in Args... + template + inline constexpr std::size_t typeIndex = TypeIndex::value; + +} // namespace portablecollection + +#endif // DataFormats_Portable_interface_PortableCollectionCommon_h \ No newline at end of file diff --git a/DataFormats/Portable/interface/PortableDeviceCollection.h b/DataFormats/Portable/interface/PortableDeviceCollection.h index 84ed057e82f8c..568e15e2ca6df 100644 --- a/DataFormats/Portable/interface/PortableDeviceCollection.h +++ b/DataFormats/Portable/interface/PortableDeviceCollection.h @@ -9,6 +9,7 @@ #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "DataFormats/Portable/interface/PortableCollectionCommon.h" // generic SoA-based product in device memory template >> @@ -75,4 +76,215 @@ class PortableDeviceCollection { View view_; //! }; -#endif // DataFormats_Portable_interface_PortableDeviceCollection_h +// generic SoA-based product in device memory +template +class PortableDeviceMultiCollection { + //static_assert(alpaka::isDevice); + static_assert(not std::is_same_v, + "Use PortableHostCollection instead of PortableDeviceCollection"); + + template + static constexpr std::size_t count_t_ = portablecollection::typeCount; + + template + static constexpr std::size_t index_t_ = portablecollection::typeIndex; + + static constexpr std::size_t members_ = sizeof...(Args) + 1; + +public: + using Buffer = cms::alpakatools::device_buffer; + using ConstBuffer = cms::alpakatools::const_device_buffer; + using Implementation = portablecollection::CollectionImpl<0, T0, Args...>; + + using SizesArray = std::array; + + template + using Layout = portablecollection::TypeResolver; + + //template + //using View = typename Layout::View; + // Workaround for flaky expansion of tempaltes by nvcc (expanding with "Args" instead of "Args... + template + using View = typename std::tuple_element>::type::View; + + //template + //using ConstView = typename Layout::ConstView; + // Workaround for flaky expansion of tempaltes by nvcc (expanding with "Args" instead of "Args..." + template + using ConstView = typename std::tuple_element>::type::ConstView; + +private: + template + using Leaf = portablecollection::CollectionLeaf>; + + template + Leaf& get() { + return static_cast&>(impl_); + } + + template + Leaf const& get() const { + return static_cast const&>(impl_); + } + + template + Leaf>& get() { + return static_cast>&>(impl_); + } + + template + Leaf> const& get() const { + return static_cast> const&>(impl_); + } + +public: + PortableDeviceMultiCollection() = default; + + PortableDeviceMultiCollection(int32_t elements, TDev const& device) + : buffer_{cms::alpakatools::make_device_buffer(device, Layout<>::computeDataSize(elements))}, + impl_{buffer_->data(), elements} { + // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 + assert(reinterpret_cast(buffer_->data()) % Layout<>::alignment == 0); + static_assert(members_ == 1); + } + + template >> + PortableDeviceMultiCollection(int32_t elements, TQueue const& queue) + : buffer_{cms::alpakatools::make_device_buffer(queue, Layout<>::computeDataSize(elements))}, + impl_{buffer_->data(), elements} { + // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 + assert(reinterpret_cast(buffer_->data()) % Layout<>::alignment == 0); + static_assert(members_ == 1); + } + + static int32_t computeDataSize(const SizesArray& sizes) { + int32_t ret = 0; + portablecollection::constexpr_for<0, members_>( + [&sizes, &ret](auto i) { ret += Layout::computeDataSize(sizes[i]); }); + return ret; + } + + PortableDeviceMultiCollection(const SizesArray& sizes, TDev const& device) + // allocate device memory + : buffer_{cms::alpakatools::make_device_buffer(device, computeDataSize(sizes))}, + impl_{buffer_->data(), sizes} { + portablecollection::constexpr_for<0, members_>( + [&](auto i) { assert(reinterpret_cast(buffer_->data()) % Layout::alignment == 0); }); + constexpr auto alignment = Layout<0>::alignment; + portablecollection::constexpr_for<1, members_>( + [&alignment](auto i) { static_assert(alignment == Layout::alignment); }); + } + + template >> + PortableDeviceMultiCollection(const SizesArray& sizes, TQueue const& queue) + // allocate device memory asynchronously on the given work queue + : buffer_{cms::alpakatools::make_device_buffer(queue, computeDataSize(sizes))}, + impl_{buffer_->data(), sizes} { + portablecollection::constexpr_for<0, members_>( + [&](auto i) { assert(reinterpret_cast(buffer_->data()) % Layout::alignment == 0); }); + constexpr auto alignment = Layout<0>::alignment; + portablecollection::constexpr_for<1, members_>( + [&alignment](auto i) { static_assert(alignment == Layout::alignment); }); + } + + // non-copyable + PortableDeviceMultiCollection(PortableDeviceMultiCollection const&) = delete; + PortableDeviceMultiCollection& operator=(PortableDeviceMultiCollection const&) = delete; + + // movable + PortableDeviceMultiCollection(PortableDeviceMultiCollection&&) = default; + PortableDeviceMultiCollection& operator=(PortableDeviceMultiCollection&&) = default; + + // default destructor + ~PortableDeviceMultiCollection() = default; + + // access the View by index + template Idx)>> + View& view() { + return get().view_; + } + + template Idx)>> + ConstView const& view() const { + return get().view_; + } + + template Idx)>> + ConstView const& const_view() const { + return get().view_; + } + + template Idx)>> + View& operator*() { + return get().view_; + } + + template Idx)>> + ConstView const& operator*() const { + return get().view_; + } + + template Idx)>> + View* operator->() { + return &get().view_; + } + + template Idx)>> + ConstView const* operator->() const { + return &get().view_; + } + + // access the View by type + template + typename T::View& view() { + return get().view_; + } + + template + typename T::ConstView const& view() const { + return get().view_; + } + + template + typename T::ConstView const& const_view() const { + return get().view_; + } + + template + typename T::View& operator*() { + return get().view_; + } + + template + typename T::ConstView const& operator*() const { + return get().view_; + } + + template + typename T::View* operator->() { + return &get().view_; + } + + template + typename T::ConstView const* operator->() const { + return &get().view_; + } + + // access the Buffer + Buffer buffer() { return *buffer_; } + ConstBuffer buffer() const { return *buffer_; } + ConstBuffer const_buffer() const { return *buffer_; } + + // Extract the sizes array + SizesArray sizes() const { + SizesArray ret; + portablecollection::constexpr_for<0, members_>([&](auto i) { ret[i] = get().layout_.metadata().size(); }); + return ret; + } + +private: + std::optional buffer_; //! + Implementation impl_; // (serialized: this is where the layouts live) +}; + +#endif // DataFormats_Portable_interface_PortableDeviceCollection_h \ No newline at end of file diff --git a/DataFormats/Portable/interface/PortableHostCollection.h b/DataFormats/Portable/interface/PortableHostCollection.h index 8b098688455e8..61dde4c58f425 100644 --- a/DataFormats/Portable/interface/PortableHostCollection.h +++ b/DataFormats/Portable/interface/PortableHostCollection.h @@ -9,6 +9,7 @@ #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "HeterogeneousCore/AlpakaInterface/interface/host.h" #include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "DataFormats/Portable/interface/PortableCollectionCommon.h" // generic SoA-based product in host memory template @@ -86,4 +87,236 @@ class PortableHostCollection { View view_; //! }; +// generic SoA-based product in host memory +template +class PortableHostMultiCollection { + template + static constexpr std::size_t count_t_ = portablecollection::typeCount; + + template + static constexpr std::size_t index_t_ = portablecollection::typeIndex; + + static constexpr std::size_t members_ = portablecollection::membersCount; + +public: + using Buffer = cms::alpakatools::host_buffer; + using ConstBuffer = cms::alpakatools::const_host_buffer; + using Implementation = portablecollection::CollectionImpl<0, T0, Args...>; + + using SizesArray = std::array; + + template Idx)>> + using Layout = portablecollection::TypeResolver; + template Idx)>> + using View = typename Layout::View; + template Idx)>> + using ConstView = typename Layout::ConstView; + +private: + template + using Leaf = portablecollection::CollectionLeaf>; + + template + Leaf& get() { + return static_cast&>(impl_); + } + + template + Leaf const& get() const { + return static_cast const&>(impl_); + } + + template + portablecollection::CollectionLeaf, T>& get() { + return static_cast, T>&>(impl_); + } + + template + const portablecollection::CollectionLeaf, T>& get() const { + return static_cast, T>&>(impl_); + } + + static int32_t computeDataSize(const std::array& sizes) { + int32_t ret = 0; + portablecollection::constexpr_for<0, members_>( + [&sizes, &ret](auto i) { ret += Layout::computeDataSize(sizes[i]); }); + return ret; + } + +public: + PortableHostMultiCollection() = default; + + PortableHostMultiCollection(int32_t elements, alpaka_common::DevHost const& host) + // allocate pageable host memory + : buffer_{cms::alpakatools::make_host_buffer(Layout<>::computeDataSize(elements))}, + impl_{buffer_->data(), elements} { + // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 + assert(reinterpret_cast(buffer_->data()) % Layout<>::alignment == 0); + static_assert(members_ == 1); + } + + template >> + PortableHostMultiCollection(int32_t elements, TQueue const& queue) + // allocate pinned host memory associated to the given work queue, accessible by the queue's device + : buffer_{cms::alpakatools::make_host_buffer(queue, Layout<>::computeDataSize(elements))}, + impl_{buffer_->data(), elements} { + // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 + assert(reinterpret_cast(buffer_->data()) % Layout<>::alignment == 0); + static_assert(members_ == 1); + } + + PortableHostMultiCollection(const std::array& sizes, alpaka_common::DevHost const& host) + // allocate pinned host memory associated to the given work queue, accessible by the queue's device + : buffer_{cms::alpakatools::make_host_buffer(computeDataSize(sizes))}, + impl_{buffer_->data(), sizes} { + // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 + portablecollection::constexpr_for<0, members_>( + [&](auto i) { assert(reinterpret_cast(buffer_->data()) % Layout::alignment == 0); }); + constexpr auto alignment = Layout<0>::alignment; + portablecollection::constexpr_for<1, members_>( + [&alignment](auto i) { static_assert(alignment == Layout::alignment); }); + } + + template >> + PortableHostMultiCollection(const std::array& sizes, TQueue const& queue) + // allocate pinned host memory associated to the given work queue, accessible by the queue's device + : buffer_{cms::alpakatools::make_host_buffer(queue, computeDataSize(sizes))}, + impl_{buffer_->data(), sizes} { + // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 + portablecollection::constexpr_for<0, members_>( + [&](auto i) { assert(reinterpret_cast(buffer_->data()) % Layout::alignment == 0); }); + constexpr auto alignment = Layout<0>::alignment; + portablecollection::constexpr_for<1, members_>( + [&alignment](auto i) { static_assert(alignment == Layout::alignment); }); + } + + // non-copyable + PortableHostMultiCollection(PortableHostMultiCollection const&) = delete; + PortableHostMultiCollection& operator=(PortableHostMultiCollection const&) = delete; + + // movable + PortableHostMultiCollection(PortableHostMultiCollection&&) = default; + PortableHostMultiCollection& operator=(PortableHostMultiCollection&&) = default; + + // default destructor + ~PortableHostMultiCollection() = default; + + // access the View by index + template Idx)>> + View& view() { + return get().view_; + } + + template Idx)>> + ConstView const& view() const { + return get().view_; + } + + template Idx)>> + ConstView const& const_view() const { + return get().view_; + } + + template Idx)>> + View& operator*() { + return get().view_; + } + + template Idx)>> + ConstView const& operator*() const { + return get().view_; + } + + template Idx)>> + View* operator->() { + return &get().view_; + } + + template Idx)>> + ConstView const* operator->() const { + return &get().view_; + } + + // access the View by type + template + typename T::View& view() { + return get().view_; + } + + template + typename T::ConstView const& view() const { + return get().view_; + } + + template + typename T::ConstView const& const_view() const { + return get().view_; + } + + template + typename T::View& operator*() { + return get().view_; + } + + template + typename T::ConstView const& operator*() const { + return get().view_; + } + + template + typename T::View* operator->() { + return &get().view_; + } + + template + typename T::ConstView const* operator->() const { + return &get().view_; + } + + // access the Buffer + Buffer buffer() { return *buffer_; } + ConstBuffer buffer() const { return *buffer_; } + ConstBuffer const_buffer() const { return *buffer_; } + + // Extract the sizes array + SizesArray sizes() const { + SizesArray ret; + portablecollection::constexpr_for<0, members_>([&](auto i) { ret[i] = get().layout_.metadata().size(); }); + return ret; + } + // part of the ROOT read streamer + static void ROOTReadStreamer(PortableHostMultiCollection* newObj, Implementation& onfileImpl) { + newObj->~PortableHostMultiCollection(); + // use the global "host" object returned by cms::alpakatools::host() + std::array sizes; + portablecollection::constexpr_for<0, members_>([&sizes, &onfileImpl](auto i) { + sizes[i] = static_cast const&>(onfileImpl).layout_.metadata().size(); + }); + new (newObj) PortableHostMultiCollection(sizes, cms::alpakatools::host()); + portablecollection::constexpr_for<0, members_>([&newObj, &onfileImpl](auto i) { + static_cast&>(newObj->impl_).layout_.ROOTReadStreamer(static_cast const&>(onfileImpl).layout_); + static_cast&>(onfileImpl).layout_.ROOTStreamerCleaner(); + }); + } + +private: + std::optional buffer_; //! + Implementation impl_; // (serialized: this is where the layouts live) +}; + +// Singleton case does not need to be aliased. A special template covers it. + +// This aliasing is needed to work with ROOT serialization. Bare templates make dictionary compilation fail. +template +using PortableHostCollection2 = ::PortableHostMultiCollection; + +template +using PortableHostCollection3 = ::PortableHostMultiCollection; + +template +using PortableHostCollection4 = ::PortableHostMultiCollection; + +template +using PortableHostCollection5 = ::PortableHostMultiCollection; + #endif // DataFormats_Portable_interface_PortableHostCollection_h diff --git a/DataFormats/Portable/interface/PortableHostCollectionReadRules.h b/DataFormats/Portable/interface/PortableHostCollectionReadRules.h index e207665b757e2..bc6a6e730e238 100644 --- a/DataFormats/Portable/interface/PortableHostCollectionReadRules.h +++ b/DataFormats/Portable/interface/PortableHostCollectionReadRules.h @@ -37,6 +37,35 @@ static void readPortableHostCollection_v1(char *target, TVirtualObject *from_buf Collection::ROOTReadStreamer(newObj, onfile.layout_); } +// read function for PortableHostCollection, called for every event +template +static void readPortableHostMultiCollection_v1(char *target, TVirtualObject *from_buffer) { + // extract the actual types + using Collection = T; + using Implementation = typename Collection::Implementation; + + // valid only for PortableHostCollection + //static_assert(std::is_same_v>); + + // proxy for the object being read from file + struct OnFile { + Implementation &impl_; + }; + + // address in memory of the buffer containing the object being read from file + char *address = static_cast(from_buffer->GetObject()); + // offset of the "layout_" data member + static ptrdiff_t impl_offset = from_buffer->GetClass()->GetDataMemberOffset("impl_"); + // reference to the Layout object being read from file + OnFile onfile = {*(Implementation *)(address + impl_offset)}; + + // pointer to the Collection object being constructed in memory + Collection *newObj = (Collection *)target; + + // move the data from the on-file layout to the newly constructed object + Collection::ROOTReadStreamer(newObj, onfile.impl_); +} + // put set_PortableHostCollection_read_rules in the ROOT namespace to let it forward declare GenerateInitInstance namespace ROOT { @@ -67,10 +96,42 @@ namespace ROOT { return true; } + + // set the read rules for PortableHostMultiCollection; + // this is called only once, when the dictionary is loaded. + template + static bool set_PortableHostMultiCollection_read_rules(std::string const &type) { + // forward declaration + TGenericClassInfo *GenerateInitInstance(T const *); + + // build the read rules + std::vector readrules(1); + ROOT::Internal::TSchemaHelper &rule = readrules[0]; + rule.fTarget = "buffer_,impl_"; + rule.fSourceClass = type; + rule.fSource = type + "::Implementation impl_;"; + rule.fCode = type + "::ROOTReadStreamer(newObj, onfile.impl_)"; + rule.fVersion = "[1-]"; + rule.fChecksum = ""; + rule.fInclude = ""; + rule.fEmbed = false; + rule.fFunctionPtr = reinterpret_cast(::readPortableHostMultiCollection_v1); + rule.fAttributes = ""; + + // set the read rules + TGenericClassInfo *instance = GenerateInitInstance((T const *)nullptr); + instance->SetReadRules(readrules); + + return true; + } } // namespace ROOT #define SET_PORTABLEHOSTCOLLECTION_READ_RULES(COLLECTION) \ static bool EDM_CONCATENATE(set_PortableHostCollection_read_rules_done_at_, __LINE__) [[maybe_unused]] = \ ROOT::set_PortableHostCollection_read_rules(EDM_STRINGIZE(COLLECTION)) +#define SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES(COLLECTION) \ + static bool EDM_CONCATENATE(set_PortableHostMultiCollection_read_rules_done_at_, __LINE__) [[maybe_unused]] = \ + ROOT::set_PortableHostMultiCollection_read_rules(EDM_STRINGIZE(COLLECTION)) + #endif // DataFormats_Portable_interface_PortableHostCollectionReadRules_h diff --git a/DataFormats/Portable/interface/PortableObject.h b/DataFormats/Portable/interface/PortableObject.h index c9aadb160bb05..9df5ce0bcc96f 100644 --- a/DataFormats/Portable/interface/PortableObject.h +++ b/DataFormats/Portable/interface/PortableObject.h @@ -54,4 +54,4 @@ namespace cms::alpakatools { }; } // namespace cms::alpakatools -#endif // DataFormats_Portable_interface_PortableObject_h +#endif // DataFormats_Portable_interface_PortableObject_h \ No newline at end of file diff --git a/DataFormats/Portable/interface/alpaka/PortableCollection.h b/DataFormats/Portable/interface/alpaka/PortableCollection.h index 1f9fa22e49cd8..e7bd78e4c0023 100644 --- a/DataFormats/Portable/interface/alpaka/PortableCollection.h +++ b/DataFormats/Portable/interface/alpaka/PortableCollection.h @@ -18,4 +18,56 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } // namespace ALPAKA_ACCELERATOR_NAMESPACE -#endif // DataFormats_Portable_interface_alpaka_PortableCollection_h +namespace ALPAKA_ACCELERATOR_NAMESPACE { + +#if defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + // Singleton case does not need to be aliased. A special template covers it. + + // This aliasing is needed to work with ROOT serialization. Bare templates make dictionary compilation fail. + template + using PortableCollection2 = ::PortableHostMultiCollection; + + template + using PortableCollection3 = ::PortableHostMultiCollection; + + template + using PortableCollection4 = ::PortableHostMultiCollection; + + template + using PortableCollection5 = ::PortableHostMultiCollection; +#else + // Singleton case does not need to be aliased. A special template covers it. + + // This aliasing is needed to work with ROOT serialization. Bare templates make dictionary compilation fail. + template + using PortableCollection2 = ::PortableDeviceMultiCollection; + + template + using PortableCollection3 = ::PortableDeviceMultiCollection; + + template + using PortableCollection4 = ::PortableDeviceMultiCollection; + + template + using PortableCollection5 = ::PortableDeviceMultiCollection; +#endif // ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace traits { +// specialise the trait for the device provided by the ALPAKA_ACCELERATOR_NAMESPACE +#if defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + template + class PortableMultiCollectionTrait { + using CollectionType = ::PortableHostMultiCollection; + }; +#else + template + class PortableMultiCollectionTrait { + using CollectionType = ::PortableDeviceMultiCollection; + }; +#endif + +} // namespace traits + +#endif // DataFormats_Portable_interface_alpaka_PortableCollection_h \ No newline at end of file diff --git a/DataFormats/Portable/interface/alpaka/PortableObject.h b/DataFormats/Portable/interface/alpaka/PortableObject.h index 417173176b203..05a5e2d7b64cd 100644 --- a/DataFormats/Portable/interface/alpaka/PortableObject.h +++ b/DataFormats/Portable/interface/alpaka/PortableObject.h @@ -18,4 +18,4 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } // namespace ALPAKA_ACCELERATOR_NAMESPACE -#endif // DataFormats_Portable_interface_alpaka_PortableObject_h +#endif // DataFormats_Portable_interface_alpaka_PortableObject_h \ No newline at end of file diff --git a/DataFormats/Portable/scripts/portableDeviceCollectionHints b/DataFormats/Portable/scripts/portableDeviceCollectionHints new file mode 100755 index 0000000000000..56bc9e90350e0 --- /dev/null +++ b/DataFormats/Portable/scripts/portableDeviceCollectionHints @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 + +import sys + +# Get collectionand SoAs names +if len(sys.argv) != 2: + raise RuntimeError("Expecting one portable collection name.") + +collectionName = sys.argv[1] + +print("In /src/alpaka/classes_cuda_def.xml (with necessary includes in /src/aplaka/classes_cuda.h):\n") +print("") +print(" ") +print(" " % collectionName) +print(" \" persistent=\"false\"/>" % collectionName) +print(" >\" persistent=\"false\"/>" % collectionName) +print("\n") + +print("In /src/alpaka/classes_rocm_def.xml (with necessary includes in /src/aplaka/classes_rocm.h):\n") +print("") +print(" ") +print(" " % collectionName) +print(" \" persistent=\"false\"/>" % collectionName) +print(" >\" persistent=\"false\"/>" % collectionName) +print("\n") \ No newline at end of file diff --git a/DataFormats/Portable/scripts/portableHostCollectionHints b/DataFormats/Portable/scripts/portableHostCollectionHints new file mode 100755 index 0000000000000..d92e9cb2f132e --- /dev/null +++ b/DataFormats/Portable/scripts/portableHostCollectionHints @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 + +import sys + +# Get collectionand SoAs names +if len(sys.argv) < 3: + raise RuntimeError("Expecting at least one portable collection name and one layout name.") + +collectionName = sys.argv[1] +layouts = sys.argv[2:] +for i in range(len(layouts)): + layouts[i] += "<128, false>" + +print("In /src/classes_def.xml (with necessary includes in /src/classes.h):\n") +print("") +for l in layouts: + print(" "% l) +print() +if len(layouts) > 1: + print(" ") + for i in range(0, len(layouts)): + print(" \"/>") + print("\n ") + for i in range(0, len(layouts)): + print(" \"/>" % (i, layouts[i])) + print("") +print(" ") +print(" "% collectionName) +print(" \" splitLevel=\"0\"/>"% collectionName) +print("") diff --git a/DataFormats/PortableTestObjects/interface/TestHostCollection.h b/DataFormats/PortableTestObjects/interface/TestHostCollection.h index f7f4ffd64b7d8..65150341ad1d2 100644 --- a/DataFormats/PortableTestObjects/interface/TestHostCollection.h +++ b/DataFormats/PortableTestObjects/interface/TestHostCollection.h @@ -9,6 +9,10 @@ namespace portabletest { // SoA with x, y, z, id fields in host memory using TestHostCollection = PortableHostCollection; + using TestHostMultiCollection2 = PortableHostCollection2; + + using TestHostMultiCollection3 = PortableHostCollection3; + } // namespace portabletest #endif // DataFormats_PortableTestObjects_interface_TestHostCollection_h diff --git a/DataFormats/PortableTestObjects/interface/TestSoA.h b/DataFormats/PortableTestObjects/interface/TestSoA.h index 0fd5c6d956269..acb4f6ed83308 100644 --- a/DataFormats/PortableTestObjects/interface/TestSoA.h +++ b/DataFormats/PortableTestObjects/interface/TestSoA.h @@ -34,6 +34,34 @@ namespace portabletest { using TestSoA = TestSoALayout<>; + GENERATE_SOA_LAYOUT(TestSoALayout2, + // columns: one value per element + SOA_COLUMN(double, x2), + SOA_COLUMN(double, y2), + SOA_COLUMN(double, z2), + SOA_COLUMN(int32_t, id2), + // scalars: one value for the whole structure + SOA_SCALAR(double, r2), + // Eigen columns + // the typedef is needed because commas confuse macros + SOA_EIGEN_COLUMN(Matrix, m2)) + + using TestSoA2 = TestSoALayout2<>; + + GENERATE_SOA_LAYOUT(TestSoALayout3, + // columns: one value per element + SOA_COLUMN(double, x3), + SOA_COLUMN(double, y3), + SOA_COLUMN(double, z3), + SOA_COLUMN(int32_t, id3), + // scalars: one value for the whole structure + SOA_SCALAR(double, r3), + // Eigen columns + // the typedef is needed because commas confuse macros + SOA_EIGEN_COLUMN(Matrix, m3)) + + using TestSoA3 = TestSoALayout3<>; + } // namespace portabletest #endif // DataFormats_PortableTestObjects_interface_TestSoA_h diff --git a/DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h b/DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h index cde180c95b607..1facc29e19fd3 100644 --- a/DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h +++ b/DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h @@ -20,6 +20,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // SoA with x, y, z, id fields in device global memory using TestDeviceCollection = PortableCollection; + using TestDeviceMultiCollection2 = PortableCollection2; + + using TestDeviceMultiCollection3 = PortableCollection3; + } // namespace portabletest } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml b/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml index fc3678362dc71..9e9cc36321fc7 100644 --- a/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml +++ b/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml @@ -6,4 +6,12 @@ + + + + + + + + diff --git a/DataFormats/PortableTestObjects/src/alpaka/classes_rocm_def.xml b/DataFormats/PortableTestObjects/src/alpaka/classes_rocm_def.xml index 65ce1c888572b..8bc27b95ccb63 100644 --- a/DataFormats/PortableTestObjects/src/alpaka/classes_rocm_def.xml +++ b/DataFormats/PortableTestObjects/src/alpaka/classes_rocm_def.xml @@ -6,4 +6,12 @@ + + + + + + + + diff --git a/DataFormats/PortableTestObjects/src/classes.cc b/DataFormats/PortableTestObjects/src/classes.cc index e11de1a3d7d36..88e2dea528d49 100644 --- a/DataFormats/PortableTestObjects/src/classes.cc +++ b/DataFormats/PortableTestObjects/src/classes.cc @@ -4,4 +4,6 @@ #include "DataFormats/PortableTestObjects/interface/TestHostObject.h" SET_PORTABLEHOSTCOLLECTION_READ_RULES(portabletest::TestHostCollection); +SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES(portabletest::TestHostMultiCollection2); +SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES(portabletest::TestHostMultiCollection3); SET_PORTABLEHOSTOBJECT_READ_RULES(portabletest::TestHostObject); diff --git a/DataFormats/PortableTestObjects/src/classes_def.xml b/DataFormats/PortableTestObjects/src/classes_def.xml index 54e9bc24249c2..b1162f5440329 100644 --- a/DataFormats/PortableTestObjects/src/classes_def.xml +++ b/DataFormats/PortableTestObjects/src/classes_def.xml @@ -7,4 +7,45 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/HeterogeneousCore/AlpakaTest/plugins/TestAlpakaAnalyzer.cc b/HeterogeneousCore/AlpakaTest/plugins/TestAlpakaAnalyzer.cc index e1834ff95a31f..cbeae5e4fef81 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/TestAlpakaAnalyzer.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/TestAlpakaAnalyzer.cc @@ -78,6 +78,50 @@ namespace { assert(view.metadata().addressOf_m() == &view[0].m().coeffRef(0, 0)); } + template + void checkViewAddresses2(T const& view) { + assert(view.metadata().addressOf_x2() == view.x2()); + assert(view.metadata().addressOf_x2() == &view.x2(0)); + assert(view.metadata().addressOf_x2() == &view[0].x2()); + assert(view.metadata().addressOf_y2() == view.y2()); + assert(view.metadata().addressOf_y2() == &view.y2(0)); + assert(view.metadata().addressOf_y2() == &view[0].y2()); + assert(view.metadata().addressOf_z2() == view.z2()); + assert(view.metadata().addressOf_z2() == &view.z2(0)); + assert(view.metadata().addressOf_z2() == &view[0].z2()); + assert(view.metadata().addressOf_id2() == view.id2()); + assert(view.metadata().addressOf_id2() == &view.id2(0)); + assert(view.metadata().addressOf_id2() == &view[0].id2()); + assert(view.metadata().addressOf_m2() == view.m2()); + assert(view.metadata().addressOf_m2() == &view.m2(0).coeffRef(0, 0)); + assert(view.metadata().addressOf_m2() == &view[0].m2().coeffRef(0, 0)); + assert(view.metadata().addressOf_r2() == &view.r2()); + //assert(view.metadata().addressOf_r2() == &view.r2(0)); // cannot access a scalar with an index + //assert(view.metadata().addressOf_r2() == &view[0].r2()); // cannot access a scalar via a SoA row-like accessor + } + + template + void checkViewAddresses3(T const& view) { + assert(view.metadata().addressOf_x3() == view.x3()); + assert(view.metadata().addressOf_x3() == &view.x3(0)); + assert(view.metadata().addressOf_x3() == &view[0].x3()); + assert(view.metadata().addressOf_y3() == view.y3()); + assert(view.metadata().addressOf_y3() == &view.y3(0)); + assert(view.metadata().addressOf_y3() == &view[0].y3()); + assert(view.metadata().addressOf_z3() == view.z3()); + assert(view.metadata().addressOf_z3() == &view.z3(0)); + assert(view.metadata().addressOf_z3() == &view[0].z3()); + assert(view.metadata().addressOf_id3() == view.id3()); + assert(view.metadata().addressOf_id3() == &view.id3(0)); + assert(view.metadata().addressOf_id3() == &view[0].id3()); + assert(view.metadata().addressOf_m3() == view.m3()); + assert(view.metadata().addressOf_m3() == &view.m3(0).coeffRef(0, 0)); + assert(view.metadata().addressOf_m3() == &view[0].m3().coeffRef(0, 0)); + assert(view.metadata().addressOf_r3() == &view.r3()); + //assert(view.metadata().addressOf_r3() == &view.r3(0)); // cannot access a scalar with an index + //assert(view.metadata().addressOf_r3() == &view[0].r3()); // cannot access a scalar via a SoA row-like accessor + } + } // namespace class TestAlpakaAnalyzer : public edm::global::EDAnalyzer<> { @@ -85,6 +129,9 @@ class TestAlpakaAnalyzer : public edm::global::EDAnalyzer<> { TestAlpakaAnalyzer(edm::ParameterSet const& config) : source_{config.getParameter("source")}, token_{consumes(source_)}, + //tokenMulti_{consumes(source_)}, + tokenMulti2_{consumes(source_)}, + tokenMulti3_{consumes(source_)}, expectSize_{config.getParameter("expectSize")}, expectXvalues_{config.getParameter>("expectXvalues")} { if (std::string const& eb = config.getParameter("expectBackend"); not eb.empty()) { @@ -165,6 +212,117 @@ class TestAlpakaAnalyzer : public edm::global::EDAnalyzer<> { << ", got " << cms::alpakatools::toString(backend); } } + + // portabletest::TestHostMultiCollection const& productMulti = event.get(tokenMulti_); + // auto const& viewMulti0 = productMulti.const_view<0>(); + // auto& mviewMulti0 = productMulti.view<0>(); + // auto const& cmviewMulti0 = productMulti.view<0>(); + // auto const& viewMulti1 = productMulti.const_view<1>(); + // auto& mviewMulti1 = productMulti.view<1>(); + // auto const& cmviewMulti1 = productMulti.view<1>(); + + portabletest::TestHostMultiCollection2 const& productMulti2 = event.get(tokenMulti2_); + auto const& viewMulti2_0 = productMulti2.const_view<0>(); + auto& mviewMulti2_0 = productMulti2.view<0>(); + auto const& cmviewMulti2_0 = productMulti2.view<0>(); + auto const& viewMulti2_1 = productMulti2.const_view<1>(); + auto& mviewMulti2_1 = productMulti2.view<1>(); + auto const& cmviewMulti2_1 = productMulti2.view<1>(); + + checkViewAddresses(viewMulti2_0); + checkViewAddresses(mviewMulti2_0); + checkViewAddresses(cmviewMulti2_0); + checkViewAddresses2(viewMulti2_1); + checkViewAddresses2(mviewMulti2_1); + checkViewAddresses2(cmviewMulti2_1); + + assert(viewMulti2_0.r() == 1.); + for (int32_t i = 0; i < viewMulti2_0.metadata().size(); ++i) { + auto vi = viewMulti2_0[i]; + // std::stringstream s; + // s << "i=" << i << " x=" << vi.x() << " y=" << vi.y() << " z=" << vi.z() << " id=" << vi.id() << "'\nm=" << vi.m(); + // std::cout << s.str() << std::endl; + if (not expectXvalues_.empty() and vi.x() != expectXvalues_[i % expectXvalues_.size()]) { + throw cms::Exception("Assert") << "Index " << i << " expected value " + << expectXvalues_[i % expectXvalues_.size()] << ", got " << vi.x(); + } + //assert(vi.x() == 0.); + assert(vi.y() == 0.); + assert(vi.z() == 0.); + assert(vi.id() == i); + assert(vi.m() == matrix * i); + } + assert(viewMulti2_1.r2() == 2.); + for (int32_t i = 0; i < viewMulti2_1.metadata().size(); ++i) { + auto vi = viewMulti2_1[i]; + if (not expectXvalues_.empty() and vi.x2() != expectXvalues_[i % expectXvalues_.size()]) { + throw cms::Exception("Assert") << "Index " << i << " expected value " + << expectXvalues_[i % expectXvalues_.size()] << ", got " << vi.x2(); + } + assert(vi.y2() == 0.); + assert(vi.z2() == 0.); + assert(vi.id2() == i); + assert(vi.m2() == matrix * i); + } + + portabletest::TestHostMultiCollection3 const& productMulti3 = event.get(tokenMulti3_); + auto const& viewMulti3_0 = productMulti3.const_view<0>(); + auto& mviewMulti3_0 = productMulti3.view<0>(); + auto const& cmviewMulti3_0 = productMulti3.view<0>(); + auto const& viewMulti3_1 = productMulti3.const_view<1>(); + auto& mviewMulti3_1 = productMulti3.view<1>(); + auto const& cmviewMulti3_1 = productMulti3.view<1>(); + auto const& viewMulti3_2 = productMulti3.const_view<2>(); + auto& mviewMulti3_2 = productMulti3.view<2>(); + auto const& cmviewMulti3_2 = productMulti3.view<2>(); + + checkViewAddresses(viewMulti3_0); + checkViewAddresses(mviewMulti3_0); + checkViewAddresses(cmviewMulti3_0); + checkViewAddresses2(viewMulti3_1); + checkViewAddresses2(mviewMulti3_1); + checkViewAddresses2(cmviewMulti3_1); + checkViewAddresses3(viewMulti3_2); + checkViewAddresses3(mviewMulti3_2); + checkViewAddresses3(cmviewMulti3_2); + + assert(viewMulti3_0.r() == 1.); + for (int32_t i = 0; i < viewMulti3_0.metadata().size(); ++i) { + auto vi = viewMulti3_0[i]; + if (not expectXvalues_.empty() and vi.x() != expectXvalues_[i % expectXvalues_.size()]) { + throw cms::Exception("Assert") << "Index " << i << " expected value " + << expectXvalues_[i % expectXvalues_.size()] << ", got " << vi.x(); + } + assert(vi.y() == 0.); + assert(vi.z() == 0.); + assert(vi.id() == i); + assert(vi.m() == matrix * i); + } + assert(viewMulti3_1.r2() == 2.); + for (int32_t i = 0; i < viewMulti3_1.metadata().size(); ++i) { + auto vi = viewMulti3_1[i]; + if (not expectXvalues_.empty() and vi.x2() != expectXvalues_[i % expectXvalues_.size()]) { + throw cms::Exception("Assert") << "Index " << i << " expected value " + << expectXvalues_[i % expectXvalues_.size()] << ", got " << vi.x2(); + } + assert(vi.y2() == 0.); + assert(vi.z2() == 0.); + assert(vi.id2() == i); + assert(vi.m2() == matrix * i); + } + + assert(viewMulti3_2.r3() == 3.); + for (int32_t i = 0; i < viewMulti3_2.metadata().size(); ++i) { + auto vi = viewMulti3_2[i]; + if (not expectXvalues_.empty() and vi.x3() != expectXvalues_[i % expectXvalues_.size()]) { + throw cms::Exception("Assert") << "Index " << i << " expected value " + << expectXvalues_[i % expectXvalues_.size()] << ", got " << vi.x3(); + } + assert(vi.y3() == 0.); + assert(vi.z3() == 0.); + assert(vi.id3() == i); + assert(vi.m3() == matrix * i); + } } static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -189,6 +347,9 @@ class TestAlpakaAnalyzer : public edm::global::EDAnalyzer<> { const edm::EDGetTokenT token_; edm::EDGetTokenT backendToken_; std::optional expectBackend_; + //const edm::EDGetTokenT tokenMulti_; + const edm::EDGetTokenT tokenMulti2_; + const edm::EDGetTokenT tokenMulti3_; const int expectSize_; const std::vector expectXvalues_; }; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc index e574da64ef84e..c56ecc3cf1234 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc @@ -19,10 +19,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { class TestAlgoKernel { public: template >> - ALPAKA_FN_ACC void operator()(TAcc const& acc, - portabletest::TestDeviceCollection::View view, - int32_t size, - double xvalue) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceCollection::View view, double xvalue) const { + // global index of the thread within the grid const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}}; const portabletest::Array flags = {{6, 4, 2, 0}}; @@ -32,12 +30,56 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } // make a strided loop over the kernel grid, covering up to "size" elements - for (int32_t i : elements_with_stride(acc, size)) { + for (int32_t i : elements_with_stride(acc, view.metadata().size())) { view[i] = {xvalue, 0., 0., i, flags, matrix * i}; } } }; + class TestAlgoMultiKernel2 { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, + portabletest::TestDeviceMultiCollection2::View<1> view, + double xvalue) const { + // global index of the thread within the grid + const int32_t thread = alpaka::getIdx(acc)[0u]; + const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}}; + + // set this only once in the whole kernel grid + if (thread == 0) { + view.r2() = 2.; + } + + // make a strided loop over the kernel grid, covering up to "size" elements + for (int32_t i : elements_with_stride(acc, view.metadata().size())) { + view[i] = {xvalue, 0., 0., i, matrix * i}; + } + } + }; + + class TestAlgoMultiKernel3 { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, + portabletest::TestDeviceMultiCollection3::View<2> view, + double xvalue) const { + // global index of the thread within the grid + const int32_t thread = alpaka::getIdx(acc)[0u]; + const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}}; + + // set this only once in the whole kernel grid + if (thread == 0) { + view.r3() = 3.; + } + + // make a strided loop over the kernel grid, covering up to "size" elements + for (int32_t i : elements_with_stride(acc, view.metadata().size())) { + view[i] = {xvalue, 0., 0., i, matrix * i}; + } + } + }; + void TestAlgo::fill(Queue& queue, portabletest::TestDeviceCollection& collection, double xvalue) const { // use 64 items per group (this value is arbitrary, but it's a reasonable starting point) uint32_t items = 64; @@ -50,7 +92,25 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // - elements within a single thread on a CPU backend auto workDiv = make_workdiv(groups, items); - alpaka::exec(queue, workDiv, TestAlgoKernel{}, collection.view(), collection->metadata().size(), xvalue); + alpaka::exec(queue, workDiv, TestAlgoKernel{}, collection.view(), xvalue); + } + + void TestAlgo::fillMulti2(Queue& queue, portabletest::TestDeviceMultiCollection2& collection, double xvalue) const { + // use 64 items per group (this value is arbitrary, but it's a reasonable starting point) + uint32_t items = 64; + + // use as many groups as needed to cover the whole problem + uint32_t groups = divide_up_by(collection->metadata().size(), items); + uint32_t groups2 = divide_up_by(collection.view<1>().metadata().size(), items); + + // map items to + // - threads with a single element per thread on a GPU backend + // - elements within a single thread on a CPU backend + auto workDiv = make_workdiv(groups, items); + auto workDiv2 = make_workdiv(groups2, items); + + alpaka::exec(queue, workDiv, TestAlgoKernel{}, collection.view(), xvalue); + alpaka::exec(queue, workDiv2, TestAlgoMultiKernel2{}, collection.view(), xvalue); } class TestAlgoStructKernel { @@ -80,6 +140,27 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, workDiv, TestAlgoStructKernel{}, object.data(), x, y, z, id); } + void TestAlgo::fillMulti3(Queue& queue, portabletest::TestDeviceMultiCollection3& collection, double xvalue) const { + // use 64 items per group (this value is arbitrary, but it's a reasonable starting point) + uint32_t items = 64; + + // use as many groups as needed to cover the whole problem + uint32_t groups = divide_up_by(collection.view().metadata().size(), items); + uint32_t groups2 = divide_up_by(collection.view().metadata().size(), items); + uint32_t groups3 = divide_up_by(collection.view().metadata().size(), items); + + // map items to + // - threads with a single element per thread on a GPU backend + // - elements within a single thread on a CPU backend + auto workDiv = make_workdiv(groups, items); + auto workDiv2 = make_workdiv(groups2, items); + auto workDiv3 = make_workdiv(groups3, items); + + alpaka::exec(queue, workDiv, TestAlgoKernel{}, collection.view(), xvalue); + alpaka::exec(queue, workDiv2, TestAlgoMultiKernel2{}, collection.view(), xvalue); + alpaka::exec(queue, workDiv3, TestAlgoMultiKernel3{}, collection.view(), xvalue); + } + class TestAlgoKernelUpdate { public: template >> @@ -103,6 +184,84 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } }; + class TestAlgoKernelUpdateMulti2 { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, + portabletest::TestSoA::ConstView input, + portabletest::TestSoA2::ConstView input2, + AlpakaESTestDataEDevice::ConstView esData, + portabletest::TestSoA::View output, + portabletest::TestSoA2::View output2) const { + // set this only once in the whole kernel grid + if (once_per_grid(acc)) { + output.r() = input.r(); + output2.r2() = input2.r2(); + } + + // make a strided loop over the kernel grid, covering up to "size" elements + for (int32_t i : elements_with_stride(acc, output.metadata().size())) { + double x = input[i].x(); + if (i < esData.size()) { + x += esData.val(i) + esData.val2(i); + } + output[i] = {x, input[i].y(), input[i].z(), input[i].id(), input[i].flags(), input[i].m()}; + } + for (int32_t i : elements_with_stride(acc, output2.metadata().size())) { + double x2 = input2[i].x2(); + if (i < esData.size()) { + x2 += esData.val(i) + esData.val2(i); + } + output2[i] = {x2, input2[i].y2(), input2[i].z2(), input2[i].id2(), input2[i].m2()}; + } + } + }; + + class TestAlgoKernelUpdateMulti3 { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, + portabletest::TestSoA::ConstView input, + portabletest::TestSoA2::ConstView input2, + portabletest::TestSoA3::ConstView input3, + AlpakaESTestDataEDevice::ConstView esData, + portabletest::TestSoA::View output, + portabletest::TestSoA2::View output2, + portabletest::TestSoA3::View output3) const { + // set this only once in the whole kernel grid + if (once_per_grid(acc)) { + output.r() = input.r(); + output2.r2() = input2.r2(); + output3.r3() = input3.r3(); + } + + // make a strided loop over the kernel grid, covering up to "size" elements + for (int32_t i : elements_with_stride(acc, output.metadata().size())) { + double x = input[i].x(); + if (i < esData.size()) { + x += esData.val(i) + esData.val2(i); + if (0 == i) + printf("Setting x[0] to %f\n", x); + } + output[i] = {x, input[i].y(), input[i].z(), input[i].id(), input[i].flags(), input[i].m()}; + } + for (int32_t i : elements_with_stride(acc, output2.metadata().size())) { + double x2 = input2[i].x2(); + if (i < esData.size()) { + x2 += esData.val(i) + esData.val2(i); + } + output2[i] = {x2, input2[i].y2(), input2[i].z2(), input2[i].id2(), input2[i].m2()}; + } + for (int32_t i : elements_with_stride(acc, output3.metadata().size())) { + double x3 = input3[i].x3(); + if (i < esData.size()) { + x3 += esData.val(i) + esData.val2(i); + } + output3[i] = {x3, input3[i].y3(), input3[i].z3(), input3[i].id3(), input3[i].m3()}; + } + } + }; + portabletest::TestDeviceCollection TestAlgo::update(Queue& queue, portabletest::TestDeviceCollection const& input, AlpakaESTestDataEDevice const& esData) const { @@ -124,4 +283,64 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { return collection; } + portabletest::TestDeviceMultiCollection2 TestAlgo::updateMulti2(Queue& queue, + portabletest::TestDeviceMultiCollection2 const& input, + AlpakaESTestDataEDevice const& esData) const { + portabletest::TestDeviceMultiCollection2 collection{input.sizes(), queue}; + + // use 64 items per group (this value is arbitrary, but it's a reasonable starting point) + uint32_t items = 64; + + // use as many groups as needed to cover the whole problem + auto sizes = collection.sizes(); + uint32_t groups = divide_up_by(*std::max_element(sizes.begin(), sizes.end()), items); + + // map items to + // - threads with a single element per thread on a GPU backend + // - elements within a single thread on a CPU backend + auto workDiv = make_workdiv(groups, items); + + alpaka::exec(queue, + workDiv, + TestAlgoKernelUpdateMulti2{}, + input.view(), + input.view(), + esData.view(), + collection.view(), + collection.view()); + + return collection; + } + + portabletest::TestDeviceMultiCollection3 TestAlgo::updateMulti3(Queue& queue, + portabletest::TestDeviceMultiCollection3 const& input, + AlpakaESTestDataEDevice const& esData) const { + portabletest::TestDeviceMultiCollection3 collection{input.sizes(), queue}; + + // use 64 items per group (this value is arbitrary, but it's a reasonable starting point) + uint32_t items = 64; + + // use as many groups as needed to cover the whole problem + auto sizes = collection.sizes(); + uint32_t groups = divide_up_by(*std::max_element(sizes.begin(), sizes.end()), items); + + // map items to + // - threads with a single element per thread on a GPU backend + // - elements within a single thread on a CPU backend + auto workDiv = make_workdiv(groups, items); + + alpaka::exec(queue, + workDiv, + TestAlgoKernelUpdateMulti3{}, + input.view(), + input.view(), + input.view(), + esData.view(), + collection.view(), + collection.view(), + collection.view()); + + return collection; + } + } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.h b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.h index e9eca3f364b54..9d620984ed186 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.h +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.h @@ -17,6 +17,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { portabletest::TestDeviceCollection update(Queue& queue, portabletest::TestDeviceCollection const& input, AlpakaESTestDataEDevice const& esData) const; + portabletest::TestDeviceMultiCollection2 updateMulti2(Queue& queue, + portabletest::TestDeviceMultiCollection2 const& input, + AlpakaESTestDataEDevice const& esData) const; + portabletest::TestDeviceMultiCollection3 updateMulti3(Queue& queue, + portabletest::TestDeviceMultiCollection3 const& input, + AlpakaESTestDataEDevice const& esData) const; + + void fillMulti2(Queue& queue, portabletest::TestDeviceMultiCollection2& collection, double xvalue = 0.) const; + void fillMulti3(Queue& queue, portabletest::TestDeviceMultiCollection3& collection, double xvalue = 0.) const; }; } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducer.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducer.cc index 78054eb48827e..499ce4b522e5f 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducer.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducer.cc @@ -23,18 +23,30 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { TestAlpakaGlobalProducer(edm::ParameterSet const& config) : esToken_(esConsumes(config.getParameter("eventSetupSource"))), deviceToken_{produces()}, + deviceTokenMulti2_{produces()}, + deviceTokenMulti3_{produces()}, size_{config.getParameter("size").getParameter( + EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE))}, + size2_{config.getParameter("size").getParameter( + EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE))}, + size3_{config.getParameter("size").getParameter( EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE))} {} void produce(edm::StreamID, device::Event& iEvent, device::EventSetup const& iSetup) const override { [[maybe_unused]] auto const& esData = iSetup.getData(esToken_); portabletest::TestDeviceCollection deviceProduct{size_, iEvent.queue()}; + portabletest::TestDeviceMultiCollection2 deviceProductMulti2{{{size_, size2_}}, iEvent.queue()}; + portabletest::TestDeviceMultiCollection3 deviceProductMulti3{{{size_, size2_, size3_}}, iEvent.queue()}; // run the algorithm, potentially asynchronously algo_.fill(iEvent.queue(), deviceProduct); + algo_.fillMulti2(iEvent.queue(), deviceProductMulti2); + algo_.fillMulti3(iEvent.queue(), deviceProductMulti3); iEvent.emplace(deviceToken_, std::move(deviceProduct)); + iEvent.emplace(deviceTokenMulti2_, std::move(deviceProductMulti2)); + iEvent.emplace(deviceTokenMulti3_, std::move(deviceProductMulti3)); } static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -53,7 +65,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { private: const device::ESGetToken esToken_; const device::EDPutToken deviceToken_; + const device::EDPutToken deviceTokenMulti2_; + const device::EDPutToken deviceTokenMulti3_; const int32_t size_; + const int32_t size2_; + const int32_t size3_; // implementation of the algorithm TestAlgo algo_; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerE.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerE.cc index 95d1423fdf2bc..253b8dcad8988 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerE.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerE.cc @@ -24,16 +24,26 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { TestAlpakaGlobalProducerE(edm::ParameterSet const& config) : esToken_(esConsumes(config.getParameter("eventSetupSource"))), getToken_(consumes(config.getParameter("source"))), - putToken_{produces()} {} + getTokenMulti2_(consumes(config.getParameter("source"))), + getTokenMulti3_(consumes(config.getParameter("source"))), + putToken_{produces()}, + putTokenMulti2_{produces()}, + putTokenMulti3_{produces()} {} void produce(edm::StreamID, device::Event& iEvent, device::EventSetup const& iSetup) const override { auto const& esData = iSetup.getData(esToken_); auto const& input = iEvent.get(getToken_); + auto const& inputMulti2 = iEvent.get(getTokenMulti2_); + auto const& inputMulti3 = iEvent.get(getTokenMulti3_); // run the algorithm, potentially asynchronously auto deviceProduct = algo_.update(iEvent.queue(), input, esData); + auto deviceProductMulti2 = algo_.updateMulti2(iEvent.queue(), inputMulti2, esData); + auto deviceProductMulti3 = algo_.updateMulti3(iEvent.queue(), inputMulti3, esData); iEvent.emplace(putToken_, std::move(deviceProduct)); + iEvent.emplace(putTokenMulti2_, std::move(deviceProductMulti2)); + iEvent.emplace(putTokenMulti3_, std::move(deviceProductMulti3)); } static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -47,7 +57,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { private: const device::ESGetToken esToken_; const device::EDGetToken getToken_; + const device::EDGetToken getTokenMulti2_; + const device::EDGetToken getTokenMulti3_; const device::EDPutToken putToken_; + const device::EDPutToken putTokenMulti2_; + const device::EDPutToken putTokenMulti3_; // implementation of the algorithm TestAlgo algo_; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaProducer.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaProducer.cc index d65850985d8fd..3d4e4692a6961 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaProducer.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaProducer.cc @@ -19,7 +19,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { class TestAlpakaProducer : public global::EDProducer<> { public: TestAlpakaProducer(edm::ParameterSet const& config) - : objectToken_{produces()}, collectionToken_{produces()}, size_{config.getParameter("size")} {} + : objectToken_{produces()}, + collectionToken_{produces()}, + deviceTokenMulti2_{produces()}, + deviceTokenMulti3_{produces()}, + size_{config.getParameter("size")}, + size2_{config.getParameter("size2")}, + size3_{config.getParameter("size3")} {} void produce(edm::StreamID sid, device::Event& event, device::EventSetup const&) const override { // run the algorithm, potentially asynchronously @@ -29,21 +35,38 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { portabletest::TestDeviceObject deviceObject{event.queue()}; algo_.fillObject(event.queue(), deviceObject, 5., 12., 13., 42); + portabletest::TestDeviceCollection deviceProduct{size_, event.queue()}; + algo_.fill(event.queue(), deviceProduct); + + portabletest::TestDeviceMultiCollection2 deviceMultiProduct2{{{size_, size2_}}, event.queue()}; + algo_.fillMulti2(event.queue(), deviceMultiProduct2); + + portabletest::TestDeviceMultiCollection3 deviceMultiProduct3{{{size_, size2_, size3_}}, event.queue()}; + algo_.fillMulti3(event.queue(), deviceMultiProduct3); + // put the asynchronous products into the event without waiting - event.emplace(objectToken_, std::move(deviceObject)); event.emplace(collectionToken_, std::move(deviceCollection)); + event.emplace(objectToken_, std::move(deviceObject)); + event.emplace(deviceTokenMulti2_, std::move(deviceMultiProduct2)); + event.emplace(deviceTokenMulti3_, std::move(deviceMultiProduct3)); } static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; desc.add("size"); + desc.add("size2"); + desc.add("size3"); descriptions.addWithDefaultLabel(desc); } private: const device::EDPutToken objectToken_; const device::EDPutToken collectionToken_; + const device::EDPutToken deviceTokenMulti2_; + const device::EDPutToken deviceTokenMulti3_; const int32_t size_; + const int32_t size2_; + const int32_t size3_; // implementation of the algorithm TestAlgo algo_; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamProducer.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamProducer.cc index 8eee00da8e774..74cd08e39f56a 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamProducer.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamProducer.cc @@ -25,10 +25,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { public: TestAlpakaStreamProducer(edm::ParameterSet const& config) : size_{config.getParameter("size").getParameter( + EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE))}, + size2_{config.getParameter("size").getParameter( + EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE))}, + size3_{config.getParameter("size").getParameter( EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE))} { getToken_ = consumes(config.getParameter("source")); esToken_ = esConsumes(config.getParameter("eventSetupSource")); devicePutToken_ = produces(config.getParameter("productInstanceName")); + devicePutTokenMulti2_ = produces(config.getParameter("productInstanceName")); + devicePutTokenMulti3_ = produces(config.getParameter("productInstanceName")); } void produce(device::Event& iEvent, device::EventSetup const& iSetup) override { @@ -36,11 +42,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { [[maybe_unused]] auto const& esData = iSetup.getData(esToken_); auto deviceProduct = std::make_unique(size_, iEvent.queue()); + auto deviceProductMulti2 = std::make_unique( + portabletest::TestDeviceMultiCollection2::SizesArray{{size_, size2_}}, iEvent.queue()); + auto deviceProductMulti3 = std::make_unique( + portabletest::TestDeviceMultiCollection3::SizesArray{{size_, size2_, size3_}}, iEvent.queue()); // run the algorithm, potentially asynchronously algo_.fill(iEvent.queue(), *deviceProduct); + algo_.fillMulti2(iEvent.queue(), *deviceProductMulti2); + algo_.fillMulti3(iEvent.queue(), *deviceProductMulti3); iEvent.put(devicePutToken_, std::move(deviceProduct)); + iEvent.put(devicePutTokenMulti2_, std::move(deviceProductMulti2)); + iEvent.put(devicePutTokenMulti3_, std::move(deviceProductMulti3)); } static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -62,7 +76,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { edm::EDGetTokenT getToken_; device::ESGetToken, AlpakaESTestRecordB> esToken_; device::EDPutToken devicePutToken_; + device::EDPutToken devicePutTokenMulti2_; + device::EDPutToken devicePutTokenMulti3_; const int32_t size_; + const int32_t size2_; + const int32_t size3_; // implementation of the algorithm TestAlgo algo_; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamSynchronizingProducer.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamSynchronizingProducer.cc index 5c53e5aa9de16..613c31498746a 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamSynchronizingProducer.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamSynchronizingProducer.cc @@ -27,6 +27,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { TestAlpakaStreamSynchronizingProducer(edm::ParameterSet const& iConfig) : esTokenDevice_(esConsumes()), putToken_{produces()}, + putTokenMulti2_{produces()}, + putTokenMulti3_{produces()}, helper_{iConfig, consumesCollector()}, hostHelper_{iConfig, consumesCollector()}, expectedInt_{iConfig.getParameter("expectedInt")} {} @@ -44,6 +46,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { void produce(device::Event& iEvent, device::EventSetup const& iSetup) override { iEvent.emplace(putToken_, helper_.moveFrom()); + iEvent.emplace(putTokenMulti2_, helper_.moveFromMulti2()); + iEvent.emplace(putTokenMulti3_, helper_.moveFromMulti3()); } static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -57,6 +61,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { private: const device::ESGetToken esTokenDevice_; const edm::EDPutTokenT putToken_; + const edm::EDPutTokenT putTokenMulti2_; + const edm::EDPutTokenT putTokenMulti3_; TestHelperClass helper_; cms::alpakatest::TestHostOnlyHelperClass const hostHelper_; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.cc index a6c9a7370d717..aa4c26e4c93bd 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.cc @@ -5,6 +5,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { TestHelperClass::TestHelperClass(edm::ParameterSet const& iConfig, edm::ConsumesCollector iC) : getToken_(iC.consumes(iConfig.getParameter("source"))), + getTokenMulti2_(iC.consumes(iConfig.getParameter("source"))), + getTokenMulti3_(iC.consumes(iConfig.getParameter("source"))), esTokenHost_(iC.esConsumes()), esTokenDevice_(iC.esConsumes()) {} @@ -14,9 +16,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { [[maybe_unused]] auto esDataHostHandle = iSetup.getHandle(esTokenHost_); [[maybe_unused]] auto const& esDataDevice = iSetup.getData(esTokenDevice_); portabletest::TestDeviceCollection const& deviceProduct = iEvent.get(getToken_); + portabletest::TestDeviceMultiCollection2 const& deviceProductMulti2 = iEvent.get(getTokenMulti2_); + portabletest::TestDeviceMultiCollection3 const& deviceProductMulti3 = iEvent.get(getTokenMulti3_); hostProduct_ = portabletest::TestHostCollection{deviceProduct->metadata().size(), iEvent.queue()}; + hostProductMulti2_ = portabletest::TestHostMultiCollection2{deviceProductMulti2.sizes(), iEvent.queue()}; + hostProductMulti3_ = portabletest::TestHostMultiCollection3{deviceProductMulti3.sizes(), iEvent.queue()}; alpaka::memcpy(iEvent.queue(), hostProduct_.buffer(), deviceProduct.const_buffer()); + alpaka::memcpy(iEvent.queue(), hostProductMulti2_.buffer(), deviceProductMulti2.const_buffer()); + alpaka::memcpy(iEvent.queue(), hostProductMulti3_.buffer(), deviceProductMulti3.const_buffer()); } } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.h b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.h index bc10779d9229d..d592fb40b4210 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.h +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.h @@ -24,14 +24,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { void makeAsync(device::Event const& iEvent, device::EventSetup const& iSetup); portabletest::TestHostCollection moveFrom() { return std::move(hostProduct_); } + portabletest::TestHostMultiCollection2 moveFromMulti2() { return std::move(hostProductMulti2_); } + portabletest::TestHostMultiCollection3 moveFromMulti3() { return std::move(hostProductMulti3_); } private: const device::EDGetToken getToken_; + const device::EDGetToken getTokenMulti2_; + const device::EDGetToken getTokenMulti3_; const edm::ESGetToken esTokenHost_; const device::ESGetToken esTokenDevice_; // hold the output product between acquire() and produce() portabletest::TestHostCollection hostProduct_; + portabletest::TestHostMultiCollection2 hostProductMulti2_; + portabletest::TestHostMultiCollection3 hostProductMulti3_; }; } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/HeterogeneousCore/AlpakaTest/test/writer.py b/HeterogeneousCore/AlpakaTest/test/writer.py index d23ac528629b8..87c0fc37e45e3 100644 --- a/HeterogeneousCore/AlpakaTest/test/writer.py +++ b/HeterogeneousCore/AlpakaTest/test/writer.py @@ -15,6 +15,8 @@ # either run the producer on a gpu (if available) and copy the product to the cpu, or run the producer directly on the cpu process.testProducer = cms.EDProducer('TestAlpakaProducer@alpaka', size = cms.int32(42), + size2 = cms.int32(33), + size3 = cms.int32(61) # alpaka.backend can be set to a specific backend to force using it, or be omitted or left empty to use the defult backend; # depending on the architecture and available hardware, the supported backends are "serial_sync", "cuda_async", "rocm_async" #alpaka = cms.untracked.PSet( @@ -33,7 +35,9 @@ # run a second producer explicitly on the cpu process.testProducerSerial = makeSerialClone(process.testProducer, - size = cms.int32(99) + size = cms.int32(99), + size2 = cms.int32(51), + size3 = cms.int32(43) ) # analyse the second set of products