Skip to content

Commit

Permalink
Split implementation to separate header files
Browse files Browse the repository at this point in the history
Rename classes to CamelCase and move them to the detail namespace:
  - uniform_elements_along to detail::UniformElementsAlong
  - uniform_groups_along to detail::UniformGroupsAlong
  - uniform_group_elements_along to detail::UniformGroupElementsAlong
  - uniform_elements_nd to detail::UniformElementsND
  - independent_groups_along to detail::IndependentGroupsAlong
  - independent_group_elements_along to detail::IndependentGroupElementsAlong

Move the implementation to separate header files.

Introduce helper functions with the old names.
  • Loading branch information
fwyzard committed Apr 11, 2024
1 parent f7c445c commit 9e26825
Show file tree
Hide file tree
Showing 7 changed files with 1,049 additions and 834 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#ifndef HeterogeneousCore_AlpakaInterface_interface_detail_IndependentGroupElementsAlong_h
#define HeterogeneousCore_AlpakaInterface_interface_detail_IndependentGroupElementsAlong_h

#include <algorithm>
#include <cstddef>
#include <type_traits>

#include <alpaka/alpaka.hpp>

#include "HeterogeneousCore/AlpakaInterface/interface/config.h"

namespace cms::alpakatools::detail {

using namespace alpaka_common;

/* IndependentGroupElementsAlong
*
* `independent_group_elements_along<Dim>(acc, ...)` is a shorthand for
* `IndependentGroupElementsAlong<TAcc, Dim>(acc, ...)` that can infer the accelerator type from the argument.
*/

template <typename TAcc,
std::size_t Dim,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
class IndependentGroupElementsAlong {
public:
ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc)
: elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
extent_{stride_} {}

ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc, Idx extent)
: elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
extent_{extent} {}

ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc, Idx first, Idx extent)
: elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_ + first},
stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
extent_{extent} {}

class const_iterator;
using iterator = const_iterator;

ALPAKA_FN_ACC inline const_iterator begin() const { return const_iterator(elements_, stride_, extent_, thread_); }

ALPAKA_FN_ACC inline const_iterator end() const { return const_iterator(elements_, stride_, extent_, extent_); }

class const_iterator {
friend class IndependentGroupElementsAlong;

ALPAKA_FN_ACC inline const_iterator(Idx elements, Idx stride, Idx extent, Idx first)
: elements_{elements},
stride_{stride},
extent_{extent},
first_{std::min(first, extent)},
index_{first_},
range_{std::min(first + elements, extent)} {}

public:
ALPAKA_FN_ACC inline Idx operator*() const { return index_; }

// pre-increment the iterator
ALPAKA_FN_ACC inline const_iterator& operator++() {
if constexpr (requires_single_thread_per_block_v<TAcc>) {
// increment the index along the elements processed by the current thread
++index_;
if (index_ < range_)
return *this;
}

// increment the thread index with the block stride
first_ += stride_;
index_ = first_;
range_ = std::min(first_ + elements_, extent_);
if (index_ < extent_)
return *this;

// the iterator has reached or passed the end of the extent, clamp it to the extent
first_ = extent_;
index_ = extent_;
range_ = extent_;
return *this;
}

// post-increment the iterator
ALPAKA_FN_ACC inline const_iterator operator++(int) {
const_iterator old = *this;
++(*this);
return old;
}

ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const {
return (index_ == other.index_) and (first_ == other.first_);
}

ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const { return not(*this == other); }

private:
// non-const to support iterator copy and assignment
Idx elements_;
Idx stride_;
Idx extent_;
// modified by the pre/post-increment operator
Idx first_;
Idx index_;
Idx range_;
};

private:
const Idx elements_;
const Idx thread_;
const Idx stride_;
const Idx extent_;
};

} // namespace cms::alpakatools::detail

#endif // HeterogeneousCore_AlpakaInterface_interface_detail_IndependentGroupElementsAlong_h
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#ifndef HeterogeneousCore_AlpakaInterface_interface_detail_IndependentGroupsAlong_h
#define HeterogeneousCore_AlpakaInterface_interface_detail_IndependentGroupsAlong_h

#include <algorithm>
#include <cstddef>
#include <type_traits>

#include <alpaka/alpaka.hpp>

#include "HeterogeneousCore/AlpakaInterface/interface/config.h"

namespace cms::alpakatools::detail {

using namespace alpaka_common;

/* IndependentGroupsAlong
*
* `IndependentGroupsAlong<TAcc, Dim>(acc, groups)` returns a one-dimensional iteratable range than spans the group
* indices from 0 to `groups`; the groups are assigned to the blocks along the `Dim` dimension. If `groups` is not
* specified, it defaults to the number of blocks along the `Dim` dimension.
*
* `independent_groups_along<Dim>(acc, ...)` is a shorthand for `IndependentGroupsAlong<TAcc, Dim>(acc, ...)` that can
* infer the accelerator type from the argument.
*
* In a 1-dimensional kernel, `independent_groups(acc, ...)` is a shorthand for
* `IndependentGroupsAlong<TAcc, 0>(acc, ...)`.
*
* In an N-dimensional kernel, dimension 0 is the one that increases more slowly (e.g. the outer loop), followed by
* dimension 1, up to dimension N-1 that increases fastest (e.g. the inner loop).
* For convenience when converting CUDA or HIP code, `independent_groups_x(acc, ...)`, `_y` and `_z` are shorthands
* for `IndependentGroupsAlong<TAcc, N-1>(acc, ...)`, `<N-2>` and `<N-3>`.
*
* `independent_groups_along<Dim>(acc, ...)` should be called consistently by all the threads in a block. All threads
* in a block see the same loop iterations, while threads in different blocks may see a different number of iterations.
* If the work division has more blocks than the required number of groups, the first blocks will perform one
* iteration of the loop, while the other blocks will exit the loop immediately.
* If the work division has less blocks than the required number of groups, some of the blocks will perform more than
* one iteration, in order to cover then whole problem space.
*
* For example,
*
* for (auto group: independent_groups_along<Dim>(acc, 7))
*
* will return the group range from 0 to 6, distributed across all blocks in the work division.
* If the work division has more than 7 blocks, the first 7 will perform one iteration of the loop, while the other
* blocks will exit the loop immediately. For example if the work division has 8 blocks, the blocks from 0 to 6 will
* process one group while block 7 will no process any.
* If the work division has less than 7 blocks, some of the blocks will perform more than one iteration of the loop,
* in order to cover then whole problem space. For example if the work division has 4 blocks, block 0 will process the
* groups 0 and 4, block 1 will process groups 1 and 5, group 2 will process groups 2 and 6, and block 3 will process
* group 3.
*/

template <typename TAcc,
std::size_t Dim,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
class IndependentGroupsAlong {
public:
ALPAKA_FN_ACC inline IndependentGroupsAlong(TAcc const& acc)
: first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
extent_{stride_} {}

ALPAKA_FN_ACC inline IndependentGroupsAlong(TAcc const& acc, Idx groups)
: first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
extent_{groups} {}

class const_iterator;
using iterator = const_iterator;

ALPAKA_FN_ACC inline const_iterator begin() const { return const_iterator(stride_, extent_, first_); }

ALPAKA_FN_ACC inline const_iterator end() const { return const_iterator(stride_, extent_, extent_); }

class const_iterator {
friend class IndependentGroupsAlong;

ALPAKA_FN_ACC inline const_iterator(Idx stride, Idx extent, Idx first)
: stride_{stride}, extent_{extent}, first_{std::min(first, extent)} {}

public:
ALPAKA_FN_ACC inline Idx operator*() const { return first_; }

// pre-increment the iterator
ALPAKA_FN_ACC inline const_iterator& operator++() {
// increment the first-element-in-block index by the grid stride
first_ += stride_;
if (first_ < extent_)
return *this;

// the iterator has reached or passed the end of the extent, clamp it to the extent
first_ = extent_;
return *this;
}

// post-increment the iterator
ALPAKA_FN_ACC inline const_iterator operator++(int) {
const_iterator old = *this;
++(*this);
return old;
}

ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const { return (first_ == other.first_); }

ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const { return not(*this == other); }

private:
// non-const to support iterator copy and assignment
Idx stride_;
Idx extent_;
// modified by the pre/post-increment operator
Idx first_;
};

private:
const Idx first_;
const Idx stride_;
const Idx extent_;
};

} // namespace cms::alpakatools::detail

#endif // HeterogeneousCore_AlpakaInterface_interface_detail_IndependentGroupsAlong_h
Loading

0 comments on commit 9e26825

Please sign in to comment.