Skip to content
This repository has been archived by the owner on Dec 9, 2024. It is now read-only.

Commit

Permalink
Merge pull request #314 from SegmentLinking/alpaka_upgrade
Browse files Browse the repository at this point in the history
Move to Most Recent Alpaka Version + Newest Caching Allocator
  • Loading branch information
YonsiG authored Aug 29, 2023
2 parents db11848 + 80047ad commit 8517040
Show file tree
Hide file tree
Showing 20 changed files with 280 additions and 214 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,9 @@ cd CMSSW_13_0_0_pre4/src
cmsenv
git cms-init
git remote add SegLink [email protected]:SegmentLinking/cmssw.git
git fetch SegLink CMSSW_13_0_0_pre4_LST_X
git fetch SegLink CMSSW_13_0_0_pre4_LST_X_alpaka
git cms-addpkg RecoTracker Configuration
git checkout CMSSW_13_0_0_pre4_LST_X
git checkout CMSSW_13_0_0_pre4_LST_X_alpaka
cat <<EOF >lst.xml
<tool name="lst" version="1.0">
<client>
Expand Down
19 changes: 2 additions & 17 deletions SDL/Hit.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,26 +118,12 @@ namespace SDL
return alpaka::math::log(acc, val) / ln10;
};

// Hyperbolic functions were just merged into Alpaka early 2023,
// so we have to make use of temporary functions for now.
template<typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE float temp_acosh(TAcc const & acc, float val)
{
return alpaka::math::log(acc, val + alpaka::math::sqrt(acc, val * val - 1));
};

template<typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE float temp_sinh(TAcc const & acc, float val)
{
return 0.5 * (alpaka::math::exp(acc, val) - alpaka::math::exp(acc, -val));
};

template<typename TAcc>
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float eta(TAcc const & acc, float x, float y, float z)
{
float r3 = alpaka::math::sqrt(acc, x*x + y*y + z*z );
float rt = alpaka::math::sqrt(acc, x*x + y*y );
float eta = ((z > 0) - ( z < 0)) * temp_acosh(acc, r3 / rt );
float eta = ((z > 0) - ( z < 0)) * alpaka::math::acosh(acc, r3 / rt );
return eta;
};

Expand Down Expand Up @@ -281,8 +267,7 @@ namespace SDL

hitsInGPU.rts[ihit] = alpaka::math::sqrt(acc, ihit_x*ihit_x + ihit_y*ihit_y);
hitsInGPU.phis[ihit] = SDL::phi(acc, ihit_x,ihit_y);
// Acosh has no supported implementation in Alpaka right now.
hitsInGPU.etas[ihit] = ((ihit_z>0)-(ihit_z<0)) * SDL::temp_acosh(acc, alpaka::math::sqrt(acc, ihit_x*ihit_x+ihit_y*ihit_y+ihit_z*ihit_z)/hitsInGPU.rts[ihit]);
hitsInGPU.etas[ihit] = ((ihit_z>0)-(ihit_z<0)) * alpaka::math::acosh(acc, alpaka::math::sqrt(acc, ihit_x*ihit_x+ihit_y*ihit_y+ihit_z*ihit_z)/hitsInGPU.rts[ihit]);
int found_index = binary_search(modulesInGPU.mapdetId, iDetId, nModules);
uint16_t lastModuleIndex = modulesInGPU.mapIdx[found_index];

Expand Down
2 changes: 1 addition & 1 deletion SDL/Segment.h
Original file line number Diff line number Diff line change
Expand Up @@ -812,7 +812,7 @@ namespace SDL
addMDToMemory(acc, mdsInGPU, hitsInGPU, modulesInGPU, hitIndices2[tid], hitIndices3[tid], pixelModuleIndex, 0,0,0,0,0,0,0,0,0,outerMDIndex);

//in outer hits - pt, eta, phi
float slope = SDL::temp_sinh(acc, hitsInGPU.ys[mdsInGPU.outerHitIndices[innerMDIndex]]);
float slope = alpaka::math::sinh(acc, hitsInGPU.ys[mdsInGPU.outerHitIndices[innerMDIndex]]);
float intercept = hitsInGPU.zs[mdsInGPU.anchorHitIndices[innerMDIndex]] - slope * hitsInGPU.rts[mdsInGPU.anchorHitIndices[innerMDIndex]];
float score_lsq=(hitsInGPU.rts[mdsInGPU.anchorHitIndices[outerMDIndex]] * slope + intercept) - (hitsInGPU.zs[mdsInGPU.anchorHitIndices[outerMDIndex]]);
score_lsq = score_lsq * score_lsq;
Expand Down
4 changes: 3 additions & 1 deletion code/alpaka_interface/AllocatorPolicy.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace lst::alpakatools {
// - Caching: (device and host) caching allocator
enum class AllocatorPolicy { Synchronous = 0, Asynchronous = 1, Caching = 2 };

template <typename TDev, typename = std::enable_if_t<lst::alpakatools::is_device_v<TDev>>>
template <typename TDev, typename = std::enable_if_t<alpaka::isDevice<TDev>>>
constexpr inline AllocatorPolicy allocator_policy = AllocatorPolicy::Synchronous;

#if defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED || defined ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
Expand Down Expand Up @@ -43,6 +43,8 @@ namespace lst::alpakatools {
constexpr inline AllocatorPolicy allocator_policy<alpaka::DevHipRt> =
#if !defined ALPAKA_DISABLE_CACHING_ALLOCATOR
AllocatorPolicy::Caching;
#elif HIP_VERSION >= 50400000 && !defined ALPAKA_DISABLE_ASYNC_ALLOCATOR
AllocatorPolicy::Asynchronous;
#else
AllocatorPolicy::Synchronous;
#endif
Expand Down
4 changes: 2 additions & 2 deletions code/alpaka_interface/AlpakaServiceFwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ namespace alpaka_cuda_async {
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED

#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
namespace alpaka_hip_async {
namespace alpaka_rocm_async {
class AlpakaService;
} // namespace alpaka_hip_async
} // namespace alpaka_rocm_async
#endif // ALPAKA_ACC_GPU_HIP_ENABLED

#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
Expand Down
10 changes: 3 additions & 7 deletions code/alpaka_interface/CachedBufAlloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace lst::alpakatools {
typename TDev,
typename TQueue,
typename = void,
typename = std::enable_if_t<lst::alpakatools::is_device_v<TDev> and lst::alpakatools::is_queue_v<TQueue>>>
typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
struct CachedBufAlloc {
static_assert(alpaka::meta::DependentFalseType<TDev>::value, "This device does not support a caching allocator");
};
Expand Down Expand Up @@ -159,11 +159,7 @@ namespace lst::alpakatools {
};

//! The caching memory allocator implementation for the ROCm/HIP device
template <typename TElem,
typename TDim,
typename TIdx,
typename TQueue,
typename = std::enable_if_t<lst::alpakatools::is_queue_v<TQueue>>>
template <typename TElem, typename TDim, typename TIdx, typename TQueue>
struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevHipRt, TQueue, void> {
template <typename TExtent>
ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevHipRt const& dev, TQueue queue, TExtent const& extent)
Expand Down Expand Up @@ -197,7 +193,7 @@ namespace lst::alpakatools {
typename TExtent,
typename TQueue,
typename TDev,
typename = std::enable_if_t<lst::alpakatools::is_device_v<TDev> and lst::alpakatools::is_queue_v<TQueue>>>
typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
ALPAKA_FN_HOST auto allocCachedBuf(TDev const& dev, TQueue queue, TExtent const& extent = TExtent()) {
return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::allocCachedBuf(dev, queue, extent);
}
Expand Down
8 changes: 4 additions & 4 deletions code/alpaka_interface/CachingAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,16 +82,14 @@ namespace lst::alpakatools {
* - the `Queue` type can be either `Sync` _or_ `Async` on any allocation.
*/

template <typename TDev,
typename TQueue,
typename = std::enable_if_t<lst::alpakatools::is_device_v<TDev> and lst::alpakatools::is_queue_v<TQueue>>>
template <typename TDev, typename TQueue>
class CachingAllocator {
public:
#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
friend class alpaka_cuda_async::AlpakaService;
#endif
#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
friend class alpaka_hip_async::AlpakaService;
friend class alpaka_rocm_async::AlpakaService;
#endif
#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
friend class alpaka_serial_sync::AlpakaService;
Expand All @@ -106,6 +104,8 @@ namespace lst::alpakatools {
using Buffer = alpaka::Buf<Device, std::byte, alpaka::DimInt<1u>, size_t>;

// The "memory device" type can either be the same as the "synchronisation device" type, or be the host CPU.
static_assert(alpaka::isDevice<Device>, "TDev should be an alpaka Device type.");
static_assert(alpaka::isQueue<Queue>, "TQueue should be an alpaka Queue type.");
static_assert(std::is_same_v<Device, alpaka::Dev<Queue>> or std::is_same_v<Device, alpaka::DevCpu>,
"The \"memory device\" type can either be the same as the \"synchronisation device\" type, or be the "
"host CPU.");
Expand Down
35 changes: 35 additions & 0 deletions code/alpaka_interface/CopyToDevice.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#ifndef HeterogeneousCore_AlpakaInterface_interface_CopyToDevice_h
#define HeterogeneousCore_AlpakaInterface_interface_CopyToDevice_h

// TODO: this utility class is specific to CMSSW, but needs to be in a
// package that is suitable as DataFormat dependence

namespace lst::alpakatools {
/**
* This class template needs to be specialized for each host-side
* EventSetup data product that should be implicitly copied to the
* device memory. The specialization is expected to define static
* copyAsync() function as in the following example
*
* \code
* template <>
* struct CopyToDevice<ExampleHostProduct> {
* template <typename TQueue>
* static auto copyAsync(TQueue& queue, ExampleHostProduct const& hostData) {
* // construct ExampleDeviceProduct corresponding the device of the TQueue
* // asynchronous copy hostData to the ExampleDeviceProduct object
* // return ExampleDeviceProduct object by value
* }
* };
* \endcode
*
* The copyAsync() function should not explicitly synchronize the
* queue. The ExampleHostProduct and ExampleDevicxeProduct can be the
* same type, if they internally are able to handle the memory
* allocation difference between host and device.
*/
template <typename THostData>
struct CopyToDevice;
} // namespace lst::alpakatools

#endif
36 changes: 36 additions & 0 deletions code/alpaka_interface/CopyToHost.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#ifndef HeterogeneousCore_AlpakaInterface_interface_CopyToHost_h
#define HeterogeneousCore_AlpakaInterface_interface_CopyToHost_h

// TODO: this utility class is specific to CMSSW, but needs to be in a
// package that is suitable as DataFormat dependence

namespace lst::alpakatools {
/**
* This class template needs to be specialized for each device-side
* Event data product so that the framework can implicitly copy the
* device-side data product to the host memory. The specialization
* is expected to define static copyAsync() function as in the
* following example
*
* \code
* template <>
* struct CopyToHost<ExampleDeviceProduct> {
* template <typename TQueue>
* static ExampleHostProduct copyAsync(TQueue& queue, ExampleDeviceProduct const& deviceData) {
* // construct ExampleHostProduct
* // asynchronous copy deviceData to the ExampleHostProduct object
* // return ExampleHostProduct object by value
* }
* };
* \endcode
*
* The copyAsync() function should not explicitly synchronize the
* queue. The ExampleDeviceProduct and ExampleHostProduct can be the
* same type, if they internally are able to handle the memory
* allocation difference between host and device.
*/
template <typename TDeviceData>
struct CopyToHost;
} // namespace lst::alpakatools

#endif
14 changes: 7 additions & 7 deletions code/alpaka_interface/ScopedContextFwd.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef HeterogeneousCore_AlpakaInterface_interface_ScopedContextFwd_h
#define HeterogeneousCore_AlpakaInterface_interface_ScopedContextFwd_h

#include "traits.h"
#include <alpaka/alpaka.hpp>

// Forward declaration of the alpaka framework Context classes
//
Expand All @@ -11,23 +11,23 @@
namespace lst::alpakatools {

namespace impl {
template <typename TQueue, typename = std::enable_if_t<lst::alpakatools::is_queue_v<TQueue>>>
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
class ScopedContextBase;

template <typename TQueue, typename = std::enable_if_t<lst::alpakatools::is_queue_v<TQueue>>>
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
class ScopedContextGetterBase;
} // namespace impl

template <typename TQueue, typename = std::enable_if_t<lst::alpakatools::is_queue_v<TQueue>>>
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
class ScopedContextAcquire;

template <typename TQueue, typename = std::enable_if_t<lst::alpakatools::is_queue_v<TQueue>>>
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
class ScopedContextProduce;

template <typename TQueue, typename = std::enable_if_t<lst::alpakatools::is_queue_v<TQueue>>>
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
class ScopedContextTask;

template <typename TQueue, typename = std::enable_if_t<lst::alpakatools::is_queue_v<TQueue>>>
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
class ScopedContextAnalyze;

} // namespace lst::alpakatools
Expand Down
8 changes: 4 additions & 4 deletions code/alpaka_interface/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ namespace alpaka_cuda_async {
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED

#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
namespace alpaka_hip_async {
namespace alpaka_rocm_async {
using namespace alpaka_common;

using Platform = alpaka::PltfHipRt;
Expand All @@ -79,13 +79,13 @@ namespace alpaka_hip_async {
using Acc2D = Acc<Dim2D>;
using Acc3D = Acc<Dim3D>;

} // namespace alpaka_hip_async
} // namespace alpaka_rocm_async

#ifdef ALPAKA_ACCELERATOR_NAMESPACE
#define ALPAKA_DUPLICATE_NAMESPACE
#else
#define ALPAKA_ACCELERATOR_NAMESPACE alpaka_hip_async
#define ALPAKA_TYPE_SUFFIX HipAsync
#define ALPAKA_ACCELERATOR_NAMESPACE alpaka_rocm_async
#define ALPAKA_TYPE_SUFFIX ROCmAsync
#endif

#endif // ALPAKA_ACC_GPU_HIP_ENABLED
Expand Down
5 changes: 2 additions & 3 deletions code/alpaka_interface/devices.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,12 @@
#include <alpaka/alpaka.hpp>

#include "config.h"
#include "traits.h"

namespace lst::alpakatools {

namespace detail {

template <typename TPlatform, typename = std::enable_if_t<is_platform_v<TPlatform>>>
template <typename TPlatform, typename = std::enable_if_t<alpaka::isPlatform<TPlatform>>>
inline std::vector<alpaka::Dev<TPlatform>> enumerate_devices() {
using Platform = TPlatform;
using Device = alpaka::Dev<Platform>;
Expand All @@ -32,7 +31,7 @@ namespace lst::alpakatools {
} // namespace detail

// return the alpaka accelerator devices for the given platform
template <typename TPlatform, typename = std::enable_if_t<is_platform_v<TPlatform>>>
template <typename TPlatform, typename = std::enable_if_t<alpaka::isPlatform<TPlatform>>>
inline std::vector<alpaka::Dev<TPlatform>> const& devices() {
static const auto devices = detail::enumerate_devices<TPlatform>();
return devices;
Expand Down
6 changes: 4 additions & 2 deletions code/alpaka_interface/getDeviceCachingAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include <cassert>
#include <memory>

#include <alpaka/alpaka.hpp>

#include "thread_safety_macros.h"
#include "AllocatorConfig.h"
#include "CachingAllocator.h"
Expand All @@ -16,7 +18,7 @@ namespace lst::alpakatools {

template <typename TDev,
typename TQueue,
typename = std::enable_if_t<lst::alpakatools::is_device_v<TDev> and lst::alpakatools::is_queue_v<TQueue>>>
typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
auto allocate_device_allocators() {
using Allocator = CachingAllocator<TDev, TQueue>;
auto const& devices = lst::alpakatools::devices<alpaka::Pltf<TDev>>();
Expand Down Expand Up @@ -72,7 +74,7 @@ namespace lst::alpakatools {

template <typename TDev,
typename TQueue,
typename = std::enable_if_t<lst::alpakatools::is_device_v<TDev> and lst::alpakatools::is_queue_v<TQueue>>>
typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
inline CachingAllocator<TDev, TQueue>& getDeviceCachingAllocator(TDev const& device) {
// initialise all allocators, one per device
CMS_THREAD_SAFE static auto allocators = detail::allocate_device_allocators<TDev, TQueue>();
Expand Down
4 changes: 3 additions & 1 deletion code/alpaka_interface/getHostCachingAllocator.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef HeterogeneousCore_AlpakaInterface_interface_getHostCachingAllocator_h
#define HeterogeneousCore_AlpakaInterface_interface_getHostCachingAllocator_h

#include <alpaka/alpaka.hpp>

#include "thread_safety_macros.h"
#include "AllocatorConfig.h"
#include "CachingAllocator.h"
Expand All @@ -10,7 +12,7 @@

namespace lst::alpakatools {

template <typename TQueue, typename = std::enable_if_t<lst::alpakatools::is_queue_v<TQueue>>>
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
inline CachingAllocator<alpaka_common::DevHost, TQueue>& getHostCachingAllocator() {
// thread safe initialisation of the host allocator
CMS_THREAD_SAFE static CachingAllocator<alpaka_common::DevHost, TQueue> allocator(
Expand Down
Loading

0 comments on commit 8517040

Please sign in to comment.