Skip to content

Commit

Permalink
[WIP] Adds validation of code asumption.
Browse files Browse the repository at this point in the history
  • Loading branch information
ericcano committed Oct 26, 2023
1 parent d2c77bf commit a0cf55d
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions HeterogeneousCore/AlpakaInterface/interface/prefixScan.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@
namespace cms {
namespace alpakatools {

template <typename T, typename = std::enable_if_t<std::is_integral_v<T>>>
constexpr bool isPowerOf2(T v) {
// returns true iif v has only one bit set.
while (v) {
if (v & 1)
return !(v >> 1);
else
v >>= 1;
}
return false;
}

template <typename TAcc, typename T, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void warpPrefixScan(const TAcc& acc, int32_t laneId, T const* ci, T* co, uint32_t i) {
// ci and co may be the same
Expand Down Expand Up @@ -42,6 +54,7 @@ namespace cms {
ALPAKA_ASSERT_OFFLOAD(size <= warpSize * warpSize);
ALPAKA_ASSERT_OFFLOAD(0 == blockDimension % warpSize);
auto first = blockThreadIdx;
ALPAKA_ASSERT_OFFLOAD(isPowerOf2(warpSize));
auto laneId = blockThreadIdx & (warpSize - 1);

for (auto i = first; i < size; i += blockDimension) {
Expand Down

0 comments on commit a0cf55d

Please sign in to comment.