Skip to content

Commit

Permalink
Merge pull request #44650 from fwyzard/alpaka_code_fixes_140x
Browse files Browse the repository at this point in the history
Fixes for low-level alpaka based utilities [14.0.x]
  • Loading branch information
cmsbuild authored Apr 9, 2024
2 parents f173701 + af05c33 commit 310a744
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ namespace cms::alpakatools {
uint32_t second; // in a "One to Many" association is the total number of associations
};

ALPAKA_FN_ACC constexpr Counters get() const { return counter_.as_counters; }
ALPAKA_FN_HOST_ACC constexpr Counters get() const { return counter_.as_counters; }

// atomically add as_counters, and return the previous value
template <typename TAcc>
Expand Down
20 changes: 10 additions & 10 deletions HeterogeneousCore/AlpakaInterface/interface/atomicMaxF.h
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
#ifndef HeterogeneousCore_AlpakaCore_interface_atomicMaxF_h
#define HeterogeneousCore_AlpakaCore_interface_atomicMaxF_h
#ifndef HeterogeneousCore_AlpakaInterface_interface_atomicMaxF_h
#define HeterogeneousCore_AlpakaInterface_interface_atomicMaxF_h

#include <alpaka/alpaka.hpp>

#include "FWCore/Utilities/interface/bit_cast.h"
#include "HeterogeneousCore/AlpakaInterface/interface/config.h"

#if defined(__CUDA_ARCH__) or defined(__HIP_DEVICE_COMPILE__)
// FIXME: this should be rewritten using the correct template specialisation for the different accelerator types

template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
static __device__ __forceinline__ float atomicMaxF(const TAcc& acc, float* address, float val) {
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static float atomicMaxF(const TAcc& acc, float* address, float val) {
#if defined(__CUDA_ARCH__) or defined(__HIP_DEVICE_COMPILE__)
// GPU implementation uses __float_as_int / __int_as_float
int ret = __float_as_int(*address);
while (val > __int_as_float(ret)) {
int old = ret;
if ((ret = atomicCAS((int*)address, old, __float_as_int(val))) == old)
break;
}
return __int_as_float(ret);
}
#else
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC ALPAKA_FN_INLINE static float atomicMaxF(const TAcc& acc, float* address, float val) {
// CPU implementation uses edm::bit_cast
int ret = edm::bit_cast<int>(*address);
while (val > edm::bit_cast<float>(ret)) {
Expand All @@ -27,7 +27,7 @@ ALPAKA_FN_ACC ALPAKA_FN_INLINE static float atomicMaxF(const TAcc& acc, float* a
break;
}
return edm::bit_cast<float>(ret);
}
#endif // __CUDA_ARCH__ or __HIP_DEVICE_COMPILE__
}

#endif // HeterogeneousCore_AlpakaCore_interface_atomicMaxF_h
#endif // HeterogeneousCore_AlpakaInterface_interface_atomicMaxF_h
4 changes: 2 additions & 2 deletions HeterogeneousCore/AlpakaInterface/interface/radixSort.h
Original file line number Diff line number Diff line change
Expand Up @@ -353,8 +353,8 @@ namespace cms::alpakatools {
typename T,
int NS = sizeof(T), // number of significant bytes to use in sorting
typename std::enable_if<requires_single_thread_per_block_v<TAcc>, T>::type* = nullptr>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void radixSort(
const TAcc& acc, T const* a, uint16_t* ind, uint16_t* ind2, uint32_t size) {
/* not ALPAKA_FN_ACC to avoid trying to compile it for the CUDA or ROCm back-ends */
ALPAKA_FN_INLINE void radixSort(const TAcc& acc, T const* a, uint16_t* ind, uint16_t* ind2, uint32_t size) {
static_assert(requires_single_thread_per_block_v<TAcc>, "CPU sort (not a radixSort) called wtth wrong accelerator");
// Initialize the index array
std::iota(ind, ind + size, 0);
Expand Down

0 comments on commit 310a744

Please sign in to comment.