Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for low-level alpaka based utilities [14.0.x] #44650

Merged
merged 3 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ namespace cms::alpakatools {
uint32_t second; // in a "One to Many" association is the total number of associations
};

ALPAKA_FN_ACC constexpr Counters get() const { return counter_.as_counters; }
ALPAKA_FN_HOST_ACC constexpr Counters get() const { return counter_.as_counters; }

// atomically add as_counters, and return the previous value
template <typename TAcc>
Expand Down
20 changes: 10 additions & 10 deletions HeterogeneousCore/AlpakaInterface/interface/atomicMaxF.h
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
#ifndef HeterogeneousCore_AlpakaCore_interface_atomicMaxF_h
#define HeterogeneousCore_AlpakaCore_interface_atomicMaxF_h
#ifndef HeterogeneousCore_AlpakaInterface_interface_atomicMaxF_h
#define HeterogeneousCore_AlpakaInterface_interface_atomicMaxF_h

#include <alpaka/alpaka.hpp>

#include "FWCore/Utilities/interface/bit_cast.h"
#include "HeterogeneousCore/AlpakaInterface/interface/config.h"

#if defined(__CUDA_ARCH__) or defined(__HIP_DEVICE_COMPILE__)
// FIXME: this should be rewritten using the correct template specialisation for the different accelerator types

template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
static __device__ __forceinline__ float atomicMaxF(const TAcc& acc, float* address, float val) {
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static float atomicMaxF(const TAcc& acc, float* address, float val) {
#if defined(__CUDA_ARCH__) or defined(__HIP_DEVICE_COMPILE__)
// GPU implementation uses __float_as_int / __int_as_float
int ret = __float_as_int(*address);
while (val > __int_as_float(ret)) {
int old = ret;
if ((ret = atomicCAS((int*)address, old, __float_as_int(val))) == old)
break;
}
return __int_as_float(ret);
}
#else
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC ALPAKA_FN_INLINE static float atomicMaxF(const TAcc& acc, float* address, float val) {
// CPU implementation uses edm::bit_cast
int ret = edm::bit_cast<int>(*address);
while (val > edm::bit_cast<float>(ret)) {
Expand All @@ -27,7 +27,7 @@ ALPAKA_FN_ACC ALPAKA_FN_INLINE static float atomicMaxF(const TAcc& acc, float* a
break;
}
return edm::bit_cast<float>(ret);
}
#endif // __CUDA_ARCH__ or __HIP_DEVICE_COMPILE__
}

#endif // HeterogeneousCore_AlpakaCore_interface_atomicMaxF_h
#endif // HeterogeneousCore_AlpakaInterface_interface_atomicMaxF_h
4 changes: 2 additions & 2 deletions HeterogeneousCore/AlpakaInterface/interface/radixSort.h
Original file line number Diff line number Diff line change
Expand Up @@ -353,8 +353,8 @@ namespace cms::alpakatools {
typename T,
int NS = sizeof(T), // number of significant bytes to use in sorting
typename std::enable_if<requires_single_thread_per_block_v<TAcc>, T>::type* = nullptr>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void radixSort(
const TAcc& acc, T const* a, uint16_t* ind, uint16_t* ind2, uint32_t size) {
/* not ALPAKA_FN_ACC to avoid trying to compile it for the CUDA or ROCm back-ends */
ALPAKA_FN_INLINE void radixSort(const TAcc& acc, T const* a, uint16_t* ind, uint16_t* ind2, uint32_t size) {
static_assert(requires_single_thread_per_block_v<TAcc>, "CPU sort (not a radixSort) called wtth wrong accelerator");
// Initialize the index array
std::iota(ind, ind + size, 0);
Expand Down