Skip to content
This repository has been archived by the owner on Dec 9, 2024. It is now read-only.

Move to Most Recent Alpaka Version + Newest Caching Allocator #314

Merged
merged 8 commits into from
Aug 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,9 @@ cd CMSSW_13_0_0_pre4/src
cmsenv
git cms-init
git remote add SegLink [email protected]:SegmentLinking/cmssw.git
git fetch SegLink CMSSW_13_0_0_pre4_LST_X
git fetch SegLink CMSSW_13_0_0_pre4_LST_X_alpaka
git cms-addpkg RecoTracker Configuration
git checkout CMSSW_13_0_0_pre4_LST_X
git checkout CMSSW_13_0_0_pre4_LST_X_alpaka
cat <<EOF >lst.xml
<tool name="lst" version="1.0">
<client>
Expand Down
19 changes: 2 additions & 17 deletions SDL/Hit.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,26 +118,12 @@ namespace SDL
return alpaka::math::log(acc, val) / ln10;
};

// Hyperbolic functions were just merged into Alpaka early 2023,
// so we have to make use of temporary functions for now.
template<typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE float temp_acosh(TAcc const & acc, float val)
{
return alpaka::math::log(acc, val + alpaka::math::sqrt(acc, val * val - 1));
};

template<typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE float temp_sinh(TAcc const & acc, float val)
{
return 0.5 * (alpaka::math::exp(acc, val) - alpaka::math::exp(acc, -val));
};

template<typename TAcc>
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float eta(TAcc const & acc, float x, float y, float z)
{
float r3 = alpaka::math::sqrt(acc, x*x + y*y + z*z );
float rt = alpaka::math::sqrt(acc, x*x + y*y );
float eta = ((z > 0) - ( z < 0)) * temp_acosh(acc, r3 / rt );
float eta = ((z > 0) - ( z < 0)) * alpaka::math::acosh(acc, r3 / rt );
return eta;
};

Expand Down Expand Up @@ -281,8 +267,7 @@ namespace SDL

hitsInGPU.rts[ihit] = alpaka::math::sqrt(acc, ihit_x*ihit_x + ihit_y*ihit_y);
hitsInGPU.phis[ihit] = SDL::phi(acc, ihit_x,ihit_y);
// Acosh has no supported implementation in Alpaka right now.
hitsInGPU.etas[ihit] = ((ihit_z>0)-(ihit_z<0)) * SDL::temp_acosh(acc, alpaka::math::sqrt(acc, ihit_x*ihit_x+ihit_y*ihit_y+ihit_z*ihit_z)/hitsInGPU.rts[ihit]);
hitsInGPU.etas[ihit] = ((ihit_z>0)-(ihit_z<0)) * alpaka::math::acosh(acc, alpaka::math::sqrt(acc, ihit_x*ihit_x+ihit_y*ihit_y+ihit_z*ihit_z)/hitsInGPU.rts[ihit]);
int found_index = binary_search(modulesInGPU.mapdetId, iDetId, nModules);
uint16_t lastModuleIndex = modulesInGPU.mapIdx[found_index];

Expand Down
2 changes: 1 addition & 1 deletion SDL/Segment.h
Original file line number Diff line number Diff line change
Expand Up @@ -812,7 +812,7 @@ namespace SDL
addMDToMemory(acc, mdsInGPU, hitsInGPU, modulesInGPU, hitIndices2[tid], hitIndices3[tid], pixelModuleIndex, 0,0,0,0,0,0,0,0,0,outerMDIndex);

//in outer hits - pt, eta, phi
float slope = SDL::temp_sinh(acc, hitsInGPU.ys[mdsInGPU.outerHitIndices[innerMDIndex]]);
float slope = alpaka::math::sinh(acc, hitsInGPU.ys[mdsInGPU.outerHitIndices[innerMDIndex]]);
float intercept = hitsInGPU.zs[mdsInGPU.anchorHitIndices[innerMDIndex]] - slope * hitsInGPU.rts[mdsInGPU.anchorHitIndices[innerMDIndex]];
float score_lsq=(hitsInGPU.rts[mdsInGPU.anchorHitIndices[outerMDIndex]] * slope + intercept) - (hitsInGPU.zs[mdsInGPU.anchorHitIndices[outerMDIndex]]);
score_lsq = score_lsq * score_lsq;
Expand Down
4 changes: 3 additions & 1 deletion code/alpaka_interface/AllocatorPolicy.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace lst::alpakatools {
// - Caching: (device and host) caching allocator
enum class AllocatorPolicy { Synchronous = 0, Asynchronous = 1, Caching = 2 };

template <typename TDev, typename = std::enable_if_t<lst::alpakatools::is_device_v<TDev>>>
template <typename TDev, typename = std::enable_if_t<alpaka::isDevice<TDev>>>
constexpr inline AllocatorPolicy allocator_policy = AllocatorPolicy::Synchronous;

#if defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED || defined ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
Expand Down Expand Up @@ -43,6 +43,8 @@ namespace lst::alpakatools {
constexpr inline AllocatorPolicy allocator_policy<alpaka::DevHipRt> =
#if !defined ALPAKA_DISABLE_CACHING_ALLOCATOR
AllocatorPolicy::Caching;
#elif HIP_VERSION >= 50400000 && !defined ALPAKA_DISABLE_ASYNC_ALLOCATOR
AllocatorPolicy::Asynchronous;
#else
AllocatorPolicy::Synchronous;
#endif
Expand Down
4 changes: 2 additions & 2 deletions code/alpaka_interface/AlpakaServiceFwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ namespace alpaka_cuda_async {
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED

#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
namespace alpaka_hip_async {
namespace alpaka_rocm_async {
class AlpakaService;
} // namespace alpaka_hip_async
} // namespace alpaka_rocm_async
#endif // ALPAKA_ACC_GPU_HIP_ENABLED

#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
Expand Down
10 changes: 3 additions & 7 deletions code/alpaka_interface/CachedBufAlloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace lst::alpakatools {
typename TDev,
typename TQueue,
typename = void,
typename = std::enable_if_t<lst::alpakatools::is_device_v<TDev> and lst::alpakatools::is_queue_v<TQueue>>>
typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
struct CachedBufAlloc {
static_assert(alpaka::meta::DependentFalseType<TDev>::value, "This device does not support a caching allocator");
};
Expand Down Expand Up @@ -159,11 +159,7 @@ namespace lst::alpakatools {
};

//! The caching memory allocator implementation for the ROCm/HIP device
template <typename TElem,
typename TDim,
typename TIdx,
typename TQueue,
typename = std::enable_if_t<lst::alpakatools::is_queue_v<TQueue>>>
template <typename TElem, typename TDim, typename TIdx, typename TQueue>
struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevHipRt, TQueue, void> {
template <typename TExtent>
ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevHipRt const& dev, TQueue queue, TExtent const& extent)
Expand Down Expand Up @@ -197,7 +193,7 @@ namespace lst::alpakatools {
typename TExtent,
typename TQueue,
typename TDev,
typename = std::enable_if_t<lst::alpakatools::is_device_v<TDev> and lst::alpakatools::is_queue_v<TQueue>>>
typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
ALPAKA_FN_HOST auto allocCachedBuf(TDev const& dev, TQueue queue, TExtent const& extent = TExtent()) {
return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::allocCachedBuf(dev, queue, extent);
}
Expand Down
8 changes: 4 additions & 4 deletions code/alpaka_interface/CachingAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,16 +82,14 @@ namespace lst::alpakatools {
* - the `Queue` type can be either `Sync` _or_ `Async` on any allocation.
*/

template <typename TDev,
typename TQueue,
typename = std::enable_if_t<lst::alpakatools::is_device_v<TDev> and lst::alpakatools::is_queue_v<TQueue>>>
template <typename TDev, typename TQueue>
class CachingAllocator {
public:
#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
friend class alpaka_cuda_async::AlpakaService;
#endif
#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
friend class alpaka_hip_async::AlpakaService;
friend class alpaka_rocm_async::AlpakaService;
#endif
#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
friend class alpaka_serial_sync::AlpakaService;
Expand All @@ -106,6 +104,8 @@ namespace lst::alpakatools {
using Buffer = alpaka::Buf<Device, std::byte, alpaka::DimInt<1u>, size_t>;

// The "memory device" type can either be the same as the "synchronisation device" type, or be the host CPU.
static_assert(alpaka::isDevice<Device>, "TDev should be an alpaka Device type.");
static_assert(alpaka::isQueue<Queue>, "TQueue should be an alpaka Queue type.");
static_assert(std::is_same_v<Device, alpaka::Dev<Queue>> or std::is_same_v<Device, alpaka::DevCpu>,
"The \"memory device\" type can either be the same as the \"synchronisation device\" type, or be the "
"host CPU.");
Expand Down
35 changes: 35 additions & 0 deletions code/alpaka_interface/CopyToDevice.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#ifndef HeterogeneousCore_AlpakaInterface_interface_CopyToDevice_h
#define HeterogeneousCore_AlpakaInterface_interface_CopyToDevice_h

// TODO: this utility class is specific to CMSSW, but needs to be in a
// package that is suitable as DataFormat dependence

namespace lst::alpakatools {
/**
* This class template needs to be specialized for each host-side
* EventSetup data product that should be implicitly copied to the
* device memory. The specialization is expected to define static
* copyAsync() function as in the following example
*
* \code
* template <>
* struct CopyToDevice<ExampleHostProduct> {
* template <typename TQueue>
* static auto copyAsync(TQueue& queue, ExampleHostProduct const& hostData) {
* // construct ExampleDeviceProduct corresponding the device of the TQueue
* // asynchronous copy hostData to the ExampleDeviceProduct object
* // return ExampleDeviceProduct object by value
* }
* };
* \endcode
*
* The copyAsync() function should not explicitly synchronize the
* queue. The ExampleHostProduct and ExampleDevicxeProduct can be the
* same type, if they internally are able to handle the memory
* allocation difference between host and device.
*/
template <typename THostData>
struct CopyToDevice;
} // namespace lst::alpakatools

#endif
36 changes: 36 additions & 0 deletions code/alpaka_interface/CopyToHost.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#ifndef HeterogeneousCore_AlpakaInterface_interface_CopyToHost_h
#define HeterogeneousCore_AlpakaInterface_interface_CopyToHost_h

// TODO: this utility class is specific to CMSSW, but needs to be in a
// package that is suitable as DataFormat dependence

namespace lst::alpakatools {
/**
* This class template needs to be specialized for each device-side
* Event data product so that the framework can implicitly copy the
* device-side data product to the host memory. The specialization
* is expected to define static copyAsync() function as in the
* following example
*
* \code
* template <>
* struct CopyToHost<ExampleDeviceProduct> {
* template <typename TQueue>
* static ExampleHostProduct copyAsync(TQueue& queue, ExampleDeviceProduct const& deviceData) {
* // construct ExampleHostProduct
* // asynchronous copy deviceData to the ExampleHostProduct object
* // return ExampleHostProduct object by value
* }
* };
* \endcode
*
* The copyAsync() function should not explicitly synchronize the
* queue. The ExampleDeviceProduct and ExampleHostProduct can be the
* same type, if they internally are able to handle the memory
* allocation difference between host and device.
*/
template <typename TDeviceData>
struct CopyToHost;
} // namespace lst::alpakatools

#endif
14 changes: 7 additions & 7 deletions code/alpaka_interface/ScopedContextFwd.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef HeterogeneousCore_AlpakaInterface_interface_ScopedContextFwd_h
#define HeterogeneousCore_AlpakaInterface_interface_ScopedContextFwd_h

#include "traits.h"
#include <alpaka/alpaka.hpp>

// Forward declaration of the alpaka framework Context classes
//
Expand All @@ -11,23 +11,23 @@
namespace lst::alpakatools {

namespace impl {
template <typename TQueue, typename = std::enable_if_t<lst::alpakatools::is_queue_v<TQueue>>>
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
class ScopedContextBase;

template <typename TQueue, typename = std::enable_if_t<lst::alpakatools::is_queue_v<TQueue>>>
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
class ScopedContextGetterBase;
} // namespace impl

template <typename TQueue, typename = std::enable_if_t<lst::alpakatools::is_queue_v<TQueue>>>
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
class ScopedContextAcquire;

template <typename TQueue, typename = std::enable_if_t<lst::alpakatools::is_queue_v<TQueue>>>
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
class ScopedContextProduce;

template <typename TQueue, typename = std::enable_if_t<lst::alpakatools::is_queue_v<TQueue>>>
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
class ScopedContextTask;

template <typename TQueue, typename = std::enable_if_t<lst::alpakatools::is_queue_v<TQueue>>>
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
class ScopedContextAnalyze;

} // namespace lst::alpakatools
Expand Down
8 changes: 4 additions & 4 deletions code/alpaka_interface/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ namespace alpaka_cuda_async {
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED

#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
namespace alpaka_hip_async {
namespace alpaka_rocm_async {
using namespace alpaka_common;

using Platform = alpaka::PltfHipRt;
Expand All @@ -79,13 +79,13 @@ namespace alpaka_hip_async {
using Acc2D = Acc<Dim2D>;
using Acc3D = Acc<Dim3D>;

} // namespace alpaka_hip_async
} // namespace alpaka_rocm_async

#ifdef ALPAKA_ACCELERATOR_NAMESPACE
#define ALPAKA_DUPLICATE_NAMESPACE
#else
#define ALPAKA_ACCELERATOR_NAMESPACE alpaka_hip_async
#define ALPAKA_TYPE_SUFFIX HipAsync
#define ALPAKA_ACCELERATOR_NAMESPACE alpaka_rocm_async
#define ALPAKA_TYPE_SUFFIX ROCmAsync
#endif

#endif // ALPAKA_ACC_GPU_HIP_ENABLED
Expand Down
5 changes: 2 additions & 3 deletions code/alpaka_interface/devices.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,12 @@
#include <alpaka/alpaka.hpp>

#include "config.h"
#include "traits.h"

namespace lst::alpakatools {

namespace detail {

template <typename TPlatform, typename = std::enable_if_t<is_platform_v<TPlatform>>>
template <typename TPlatform, typename = std::enable_if_t<alpaka::isPlatform<TPlatform>>>
inline std::vector<alpaka::Dev<TPlatform>> enumerate_devices() {
using Platform = TPlatform;
using Device = alpaka::Dev<Platform>;
Expand All @@ -32,7 +31,7 @@ namespace lst::alpakatools {
} // namespace detail

// return the alpaka accelerator devices for the given platform
template <typename TPlatform, typename = std::enable_if_t<is_platform_v<TPlatform>>>
template <typename TPlatform, typename = std::enable_if_t<alpaka::isPlatform<TPlatform>>>
inline std::vector<alpaka::Dev<TPlatform>> const& devices() {
static const auto devices = detail::enumerate_devices<TPlatform>();
return devices;
Expand Down
6 changes: 4 additions & 2 deletions code/alpaka_interface/getDeviceCachingAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include <cassert>
#include <memory>

#include <alpaka/alpaka.hpp>

#include "thread_safety_macros.h"
#include "AllocatorConfig.h"
#include "CachingAllocator.h"
Expand All @@ -16,7 +18,7 @@ namespace lst::alpakatools {

template <typename TDev,
typename TQueue,
typename = std::enable_if_t<lst::alpakatools::is_device_v<TDev> and lst::alpakatools::is_queue_v<TQueue>>>
typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
auto allocate_device_allocators() {
using Allocator = CachingAllocator<TDev, TQueue>;
auto const& devices = lst::alpakatools::devices<alpaka::Pltf<TDev>>();
Expand Down Expand Up @@ -72,7 +74,7 @@ namespace lst::alpakatools {

template <typename TDev,
typename TQueue,
typename = std::enable_if_t<lst::alpakatools::is_device_v<TDev> and lst::alpakatools::is_queue_v<TQueue>>>
typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
inline CachingAllocator<TDev, TQueue>& getDeviceCachingAllocator(TDev const& device) {
// initialise all allocators, one per device
CMS_THREAD_SAFE static auto allocators = detail::allocate_device_allocators<TDev, TQueue>();
Expand Down
4 changes: 3 additions & 1 deletion code/alpaka_interface/getHostCachingAllocator.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef HeterogeneousCore_AlpakaInterface_interface_getHostCachingAllocator_h
#define HeterogeneousCore_AlpakaInterface_interface_getHostCachingAllocator_h

#include <alpaka/alpaka.hpp>

#include "thread_safety_macros.h"
#include "AllocatorConfig.h"
#include "CachingAllocator.h"
Expand All @@ -10,7 +12,7 @@

namespace lst::alpakatools {

template <typename TQueue, typename = std::enable_if_t<lst::alpakatools::is_queue_v<TQueue>>>
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
inline CachingAllocator<alpaka_common::DevHost, TQueue>& getHostCachingAllocator() {
// thread safe initialisation of the host allocator
CMS_THREAD_SAFE static CachingAllocator<alpaka_common::DevHost, TQueue> allocator(
Expand Down
Loading