From f29eaf7cc71e635a5242239e0e099ed6444416eb Mon Sep 17 00:00:00 2001 From: Christopher Mauney Date: Mon, 2 Oct 2023 14:45:33 -0600 Subject: [PATCH 01/22] initial commit --- ports-of-call/portable_arrays.hpp | 439 +++++++++--------------------- 1 file changed, 135 insertions(+), 304 deletions(-) diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index 81a2ad6d..394e4ec6 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -26,73 +26,122 @@ #include "portability.hpp" #include +#include #include #include // size_t #include // memset() +#include +#include +#include #include // swap() +constexpr std::size_t MAXDIM = 6; +using narr = std::array; +namespace detail { + +template +constexpr void set_value(narr &ndat, NX value) { + ndat[Ind] = value; +} + +template +constexpr void set_values(narr &ndat, NX value) { + set_value(ndat, value); +} + +template +constexpr void set_values(narr &ndat, NX value, NXs... nxs) { + set_value(ndat, value); + set_values(ndat, nxs...); +} + +template +size_t computeIndex(const narr &nd, const size_t index) { + return index; +} +template +size_t computeIndex(const narr &nd, const size_t index, const Tail... tail) { + return index * nd[Ind] + computeIndex(nd, tail...); +} + +template +PORTABLE_INLINE_FUNCTION auto varmul(NX v) { + return v; +} + +template +PORTABLE_INLINE_FUNCTION auto varmul(NX v, NXs... vs) { + return v * varmul(vs...); +} + +} // namespace detail + +template +PORTABLE_INLINE_FUNCTION auto nx_arr(NXs... nxs) { + narr r; + constexpr std::size_t NP = sizeof...(NXs); + detail::set_values(r, nxs...); + for (auto i = NP; i < MAXDIM; ++i) + r[i] = 1; + return r; +} + +template +size_t compute_index(const narr &nd, const Indicies... idxs) { + return 0 + detail::computeIndex<0>(nd, idxs...); +} + +template +PORTABLE_INLINE_FUNCTION auto nx_mul(NXs... nxs) { + return detail::varmul(nxs...); +} + template class PortableMDArray { public: - static constexpr int MAXDIM = 6; + PORTABLE_FUNCTION PortableMDArray(T *data, narr nxa, std::size_t rank) + : pdata_(data), nxs_(nxa), rank_(rank) {} + // rank_{std::distance(nxs_.cbegin(), std::find(nxs_.cbegin(), nxs_.cend(), + // 1))} {} + + template + PORTABLE_FUNCTION PortableMDArray(T *p, NXs... nxs) noexcept + : PortableMDArray(p, nx_arr(nxs...), sizeof...(NXs)) {} // ctors // default ctor: simply set null PortableMDArray PORTABLE_FUNCTION - PortableMDArray() noexcept - : pdata_(nullptr), nx1_(0), nx2_(0), nx3_(0), nx4_(0), nx5_(0), nx6_(0) {} - PORTABLE_FUNCTION PortableMDArray(T *data, int nx1) noexcept - : pdata_(data), nx1_(nx1), nx2_(1), nx3_(1), nx4_(1), nx5_(1), nx6_(1) {} - PORTABLE_FUNCTION - PortableMDArray(T *data, int nx2, int nx1) noexcept - : pdata_(data), nx1_(nx1), nx2_(nx2), nx3_(1), nx4_(1), nx5_(1), nx6_(1) { - } - PORTABLE_FUNCTION - PortableMDArray(T *data, int nx3, int nx2, int nx1) noexcept - : pdata_(data), nx1_(nx1), nx2_(nx2), nx3_(nx3), nx4_(1), nx5_(1), - nx6_(1) {} - PORTABLE_FUNCTION - PortableMDArray(T *data, int nx4, int nx3, int nx2, int nx1) noexcept - : pdata_(data), nx1_(nx1), nx2_(nx2), nx3_(nx3), nx4_(nx4), nx5_(1), - nx6_(1) {} - PORTABLE_FUNCTION - PortableMDArray(T *data, int nx5, int nx4, int nx3, int nx2, int nx1) noexcept - : pdata_(data), nx1_(nx1), nx2_(nx2), nx3_(nx3), nx4_(nx4), nx5_(nx5), - nx6_(1) {} - PORTABLE_FUNCTION - PortableMDArray(T *data, int nx6, int nx5, int nx4, int nx3, int nx2, - int nx1) noexcept - : pdata_(data), nx1_(nx1), nx2_(nx2), nx3_(nx3), nx4_(nx4), nx5_(nx5), - nx6_(nx6) {} + PortableMDArray() noexcept : pdata_(nullptr), nxs_{{0}}, rank_{0} {} // define copy constructor and overload assignment operator so both do deep // copies. PortableMDArray(const PortableMDArray &t) noexcept; PortableMDArray &operator=(const PortableMDArray &t) noexcept; - // public functions to allocate/deallocate memory for 1D-5D data - PORTABLE_FUNCTION void NewPortableMDArray(T *data, int nx1) noexcept; - PORTABLE_FUNCTION void NewPortableMDArray(T *data, int nx2, int nx1) noexcept; - PORTABLE_FUNCTION void NewPortableMDArray(T *data, int nx3, int nx2, - int nx1) noexcept; - PORTABLE_FUNCTION void NewPortableMDArray(T *data, int nx4, int nx3, int nx2, - int nx1) noexcept; - PORTABLE_FUNCTION void NewPortableMDArray(T *data, int nx5, int nx4, int nx3, - int nx2, int nx1) noexcept; - PORTABLE_FUNCTION void NewPortableMDArray(T *data, int nx6, int nx5, int nx4, - int nx3, int nx2, int nx1) noexcept; - + template + PORTABLE_FUNCTION void NewPortableMDArray(T *data, NXs... nxs) noexcept { + pdata_ = data; + nxs_ = nx_arr(nxs...); + rank_ = sizeof...(NXs); + } // public function to swap underlying data pointers of two equally-sized // arrays void SwapPortableMDArray(PortableMDArray &array2); // functions to get array dimensions - PORTABLE_FORCEINLINE_FUNCTION int GetDim1() const { return nx1_; } - PORTABLE_FORCEINLINE_FUNCTION int GetDim2() const { return nx2_; } - PORTABLE_FORCEINLINE_FUNCTION int GetDim3() const { return nx3_; } - PORTABLE_FORCEINLINE_FUNCTION int GetDim4() const { return nx4_; } - PORTABLE_FORCEINLINE_FUNCTION int GetDim5() const { return nx5_; } - PORTABLE_FORCEINLINE_FUNCTION int GetDim6() const { return nx6_; } + template + PORTABLE_FUNCTION constexpr auto GetDim() const { + return nxs_[I]; + } + // PORTABLE_FUNCTION constexpr GetSize() const { return computeSize()} + + // legacy API, TODO: deprecate + PORTABLE_FORCEINLINE_FUNCTION int GetDim1() const { return GetDim<0>(); } + PORTABLE_FORCEINLINE_FUNCTION int GetDim2() const { return GetDim<1>(); } + PORTABLE_FORCEINLINE_FUNCTION int GetDim3() const { return GetDim<2>(); } + PORTABLE_FORCEINLINE_FUNCTION int GetDim4() const { return GetDim<3>(); } + PORTABLE_FORCEINLINE_FUNCTION int GetDim5() const { return GetDim<4>(); } + PORTABLE_FORCEINLINE_FUNCTION int GetDim6() const { return GetDim<5>(); } PORTABLE_INLINE_FUNCTION int GetDim(size_t i) const { // TODO: remove if performance cirtical assert(0 < i && i <= 6 && "PortableMDArrays are max 6D"); @@ -113,48 +162,21 @@ class PortableMDArray { return -1; } - // a function to get the total size of the array PORTABLE_FORCEINLINE_FUNCTION int GetSize() const { - return nx1_ * nx2_ * nx3_ * nx4_ * nx5_ * nx6_; + return std::accumulate(nxs_.cbegin(), nxs_.cend(), 1, + std::multiplies()); } PORTABLE_FORCEINLINE_FUNCTION std::size_t GetSizeInBytes() const { - return nx1_ * nx2_ * nx3_ * nx4_ * nx5_ * nx6_ * sizeof(T); - } - - PORTABLE_INLINE_FUNCTION size_t GetRank() const { - for (int i = 6; i >= 1; i--) { - if (GetDim(i) > 1) return i; - } - return 0; + return GetSize() * sizeof(T); } - PORTABLE_INLINE_FUNCTION void Reshape(int nx6, int nx5, int nx4, int nx3, - int nx2, int nx1) { - assert(nx6 * nx5 * nx4 * nx3 * nx2 * nx1 == GetSize()); - nx1_ = nx1; - nx2_ = nx2; - nx3_ = nx3; - nx4_ = nx4; - nx5_ = nx5; - nx6_ = nx6; + PORTABLE_INLINE_FUNCTION size_t GetRank() const { return rank_; } + template + PORTABLE_INLINE_FUNCTION void Reshape(NXs... nxs) { + assert(nx_mul(nxs...) == GetSize()); + nxs_ = nx_arr(nxs...); + rank_ = sizeof...(NXs); } - PORTABLE_INLINE_FUNCTION void Reshape(int nx5, int nx4, int nx3, int nx2, - int nx1) { - Reshape(1, nx5, nx4, nx3, nx2, nx1); - } - PORTABLE_INLINE_FUNCTION void Reshape(int nx4, int nx3, int nx2, int nx1) { - Reshape(1, 1, nx4, nx3, nx2, nx1); - } - PORTABLE_INLINE_FUNCTION void Reshape(int nx3, int nx2, int nx1) { - Reshape(1, 1, 1, nx3, nx2, nx1); - } - PORTABLE_INLINE_FUNCTION void Reshape(int nx2, int nx1) { - Reshape(1, 1, 1, 1, nx2, nx1); - } - PORTABLE_INLINE_FUNCTION void Reshape(int nx1) { - Reshape(1, 1, 1, 1, 1, nx1); - } - PORTABLE_FORCEINLINE_FUNCTION bool IsShallowSlice() { return true; } PORTABLE_FORCEINLINE_FUNCTION bool IsEmpty() { return GetSize() < 1; } // "getter" function to access private data member @@ -172,61 +194,16 @@ class PortableMDArray { // operator[] // "non-const variants" called for "PortableMDArray()" provide read/write - // access via returning by reference, enabling assignment on returned l-value, - // e.g.: a(3) = 3.0; - PORTABLE_FORCEINLINE_FUNCTION T &operator()() { return pdata_[0]; } - - PORTABLE_FORCEINLINE_FUNCTION T &operator()(const int n) { return pdata_[n]; } - // "const variants" called for "const PortableMDArray" returns T by value, - // since T is typically a built-in type (versus "const T &" to avoid copying - // for general types) - PORTABLE_FORCEINLINE_FUNCTION T &operator()() const { return pdata_[0]; } - PORTABLE_FORCEINLINE_FUNCTION T &operator()(const int n) const { - return pdata_[n]; - } - PORTABLE_FORCEINLINE_FUNCTION T &operator()(const int n, const int i) { - return pdata_[i + nx1_ * n]; - } - PORTABLE_FORCEINLINE_FUNCTION T &operator()(const int n, const int i) const { - return pdata_[i + nx1_ * n]; - } - PORTABLE_FORCEINLINE_FUNCTION T &operator()(const int n, const int j, - const int i) { - return pdata_[i + nx1_ * (j + nx2_ * n)]; - } - PORTABLE_FORCEINLINE_FUNCTION T &operator()(const int n, const int j, - const int i) const { - return pdata_[i + nx1_ * (j + nx2_ * n)]; - } - PORTABLE_FORCEINLINE_FUNCTION T &operator()(const int n, const int k, - const int j, const int i) { - return pdata_[i + nx1_ * (j + nx2_ * (k + nx3_ * n))]; + // access via returning by reference, enabling assignment on returned + // l-value, e.g.: a(3) = 3.0; + template + PORTABLE_FORCEINLINE_FUNCTION T &operator()(const Is... idxs) { + return pdata_[compute_index(nxs_, idxs...)]; } - PORTABLE_FORCEINLINE_FUNCTION T &operator()(const int n, const int k, - const int j, const int i) const { - return pdata_[i + nx1_ * (j + nx2_ * (k + nx3_ * n))]; - } - PORTABLE_FORCEINLINE_FUNCTION T & - operator()(const int m, const int n, const int k, const int j, const int i) { - return pdata_[i + nx1_ * (j + nx2_ * (k + nx3_ * (n + nx4_ * m)))]; - } - PORTABLE_FORCEINLINE_FUNCTION T &operator()(const int m, const int n, - const int k, const int j, - const int i) const { - return pdata_[i + nx1_ * (j + nx2_ * (k + nx3_ * (n + nx4_ * m)))]; - } - // int l?, int o? - PORTABLE_FORCEINLINE_FUNCTION T &operator()(const int p, const int m, - const int n, const int k, - const int j, const int i) { - return pdata_[i + - nx1_ * (j + nx2_ * (k + nx3_ * (n + nx4_ * (m + nx5_ * p))))]; - } - PORTABLE_FORCEINLINE_FUNCTION T &operator()(const int p, const int m, - const int n, const int k, - const int j, const int i) const { - return pdata_[i + - nx1_ * (j + nx2_ * (k + nx3_ * (n + nx4_ * (m + nx5_ * p))))]; + + template + PORTABLE_FORCEINLINE_FUNCTION T &operator()(const Is... idxs) const { + return pdata_[compute_index(nxs_, idxs...)]; } PortableMDArray &operator*=(T scale) { @@ -263,19 +240,16 @@ class PortableMDArray { private: T *pdata_; - int nx1_, nx2_, nx3_, nx4_, nx5_, nx6_; + narr nxs_; + int rank_; }; // copy constructor (does a shallow copy) template PortableMDArray::PortableMDArray(const PortableMDArray &src) noexcept { - nx1_ = src.nx1_; - nx2_ = src.nx2_; - nx3_ = src.nx3_; - nx4_ = src.nx4_; - nx5_ = src.nx5_; - nx6_ = src.nx6_; + nxs_ = src.nxs_; + rank_ = src.rank_; if (src.pdata_) pdata_ = src.pdata_; } @@ -285,12 +259,8 @@ template PortableMDArray & PortableMDArray::operator=(const PortableMDArray &src) noexcept { if (this != &src) { - nx1_ = src.nx1_; - nx2_ = src.nx2_; - nx3_ = src.nx3_; - nx4_ = src.nx4_; - nx5_ = src.nx5_; - nx6_ = src.nx6_; + nxs_ = src.nxs; + rank_ = src.rank_; pdata_ = src.pdata_; } return *this; @@ -300,19 +270,17 @@ PortableMDArray::operator=(const PortableMDArray &src) noexcept { // note this POINTER equivalence, not data equivalence template bool PortableMDArray::operator==(const PortableMDArray &rhs) const { - return (pdata_ == rhs.pdata_ && nx1_ == rhs.nx1_ && nx2_ == rhs.nx2_ && - nx3_ == rhs.nx3_ && nx4_ == rhs.nx4_ && nx5_ == rhs.nx5_ && - nx6_ == rhs.nx6_); + return (pdata_ == rhs.pdata_ && nxs_ == rhs.nxs_); // NB rank is implied } //---------------------------------------------------------------------------------------- //! \fn PortableMDArray::InitWithShallowSlice() -// \brief shallow copy of nvar elements in dimension dim of an array, starting -// at index=indx. Copies pointer to data, but not data itself. +// \brief shallow copy of nvar elements in dimension dim of an array, +// starting at index=indx. Copies pointer to data, but not data itself. // Shallow slice is only able to address the "nvar" range in "dim", and all -// entries of the src array for d PORTABLE_FUNCTION void @@ -320,162 +288,25 @@ PortableMDArray::InitWithShallowSlice(const PortableMDArray &src, const int dim, const int indx, const int nvar) { pdata_ = src.pdata_; - if (dim == 6) { - nx6_ = nvar; - nx5_ = src.nx5_; - nx4_ = src.nx4_; - nx3_ = src.nx3_; - nx2_ = src.nx2_; - nx1_ = src.nx1_; - pdata_ += indx * (nx1_ * nx2_ * nx3_ * nx4_ * nx5_); - } else if (dim == 5) { - nx6_ = 1; - nx5_ = nvar; - nx4_ = src.nx4_; - nx3_ = src.nx3_; - nx2_ = src.nx2_; - nx1_ = src.nx1_; - pdata_ += indx * (nx1_ * nx2_ * nx3_ * nx4_); - } else if (dim == 4) { - nx6_ = 1; - nx5_ = 1; - nx4_ = nvar; - nx3_ = src.nx3_; - nx2_ = src.nx2_; - nx1_ = src.nx1_; - pdata_ += indx * (nx1_ * nx2_ * nx3_); - } else if (dim == 3) { - nx6_ = 1; - nx5_ = 1; - nx4_ = 1; - nx3_ = nvar; // nx3 - nx2_ = src.nx2_; // nx2 - nx1_ = src.nx1_; // nx1 - pdata_ += indx * (nx1_ * nx2_); - } else if (dim == 2) { - nx6_ = 1; - nx5_ = 1; - nx4_ = 1; - nx3_ = 1; - nx2_ = nvar; - nx1_ = src.nx1_; - pdata_ += indx * (nx1_); - } else if (dim == 1) { - nx6_ = 1; - nx5_ = 1; - nx4_ = 1; - nx3_ = 1; - nx2_ = 1; - nx1_ = nvar; - pdata_ += indx; - } - return; -} - -//---------------------------------------------------------------------------------------- -//! \fn PortableMDArray::NewPortableMDArray() -// \brief allocate new 1D array with elements initialized to zero. - -template -PORTABLE_FUNCTION void -PortableMDArray::NewPortableMDArray(T *data, int nx1) noexcept { - nx1_ = nx1; - nx2_ = 1; - nx3_ = 1; - nx4_ = 1; - nx5_ = 1; - nx6_ = 1; - pdata_ = data; -} - -//---------------------------------------------------------------------------------------- -//! \fn PortableMDArray::NewPortableMDArray() -// \brief 2d data allocation - -template -PORTABLE_FUNCTION void -PortableMDArray::NewPortableMDArray(T *data, int nx2, int nx1) noexcept { - nx1_ = nx1; - nx2_ = nx2; - nx3_ = 1; - nx4_ = 1; - nx5_ = 1; - nx6_ = 1; - pdata_ = data; -} - -//---------------------------------------------------------------------------------------- -//! \fn PortableMDArray::NewPortableMDArray() -// \brief 3d data allocation + std::size_t offs = indx; + nxs_[dim - 1] = nvar; -template -PORTABLE_FUNCTION void -PortableMDArray::NewPortableMDArray(T *data, int nx3, int nx2, - int nx1) noexcept { - nx1_ = nx1; - nx2_ = nx2; - nx3_ = nx3; - nx4_ = 1; - nx5_ = 1; - nx6_ = 1; - pdata_ = data; -} - -//---------------------------------------------------------------------------------------- -//! \fn PortableMDArray::NewPortableMDArray() -// \brief 4d data allocation - -template -PORTABLE_FUNCTION void -PortableMDArray::NewPortableMDArray(T *data, int nx4, int nx3, int nx2, - int nx1) noexcept { - nx1_ = nx1; - nx2_ = nx2; - nx3_ = nx3; - nx4_ = nx4; - nx5_ = 1; - nx6_ = 1; - pdata_ = data; -} - -//---------------------------------------------------------------------------------------- -//! \fn PortableMDArray::NewPortableMDArray() -// \brief 5d data allocation - -template -PORTABLE_FUNCTION void -PortableMDArray::NewPortableMDArray(T *data, int nx5, int nx4, int nx3, - int nx2, int nx1) noexcept { - nx1_ = nx1; - nx2_ = nx2; - nx3_ = nx3; - nx4_ = nx4; - nx5_ = nx5; - nx6_ = 1; - pdata_ = data; -} - -//---------------------------------------------------------------------------------------- -//! \fn PortableMDArray::NewPortableMDArray() -// \brief 6d data allocation + for (std::size_t i = 0; i < dim - 1; ++i) { + nxs_[i] = src.nxs_[i]; + offs *= nxs_[i]; + } + for (std::size_t i = dim; i < MAXDIM; ++i) { + nxs_[i] = 1; + } + pdata_ += offs; -template -PORTABLE_FUNCTION void -PortableMDArray::NewPortableMDArray(T *data, int nx6, int nx5, int nx4, - int nx3, int nx2, int nx1) noexcept { - nx1_ = nx1; - nx2_ = nx2; - nx3_ = nx3; - nx4_ = nx4; - nx5_ = nx5; - nx6_ = nx6; - pdata_ = data; + return; } //---------------------------------------------------------------------------------------- //! \fn PortableMDArray::SwapPortableMDArray() -// \brief swap pdata_ pointers of two equally sized PortableMDArrays (shallow -// swap) +// \brief swap pdata_ pointers of two equally sized PortableMDArrays +// (shallow swap) // Does not allocate memory for either array template From 7a8e259eb0a27258b27e28ce516dfbab9e054c2e Mon Sep 17 00:00:00 2001 From: Christopher Mauney Date: Mon, 2 Oct 2023 15:24:52 -0600 Subject: [PATCH 02/22] spelling + comments --- ports-of-call/portable_arrays.hpp | 36 ++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index 394e4ec6..54be08f8 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -35,40 +35,52 @@ #include #include // swap() +// maximum number of dimensions constexpr std::size_t MAXDIM = 6; +// array type of dimensions/strides using narr = std::array; + namespace detail { +// set_value end-case template constexpr void set_value(narr &ndat, NX value) { ndat[Ind] = value; } +// set_valueS end case, dispatch to set_value template constexpr void set_values(narr &ndat, NX value) { set_value(ndat, value); } +// set_values general case, set `Ind` position of array (tracking from RIGHT) +// with head of size list `NX` template constexpr void set_values(narr &ndat, NX value, NXs... nxs) { set_value(ndat, value); set_values(ndat, nxs...); } +// compute_index base case, i.e. fastest moving index template -size_t computeIndex(const narr &nd, const size_t index) { +size_t compute_index(const narr &nd, const size_t index) { return index; } + +// compute_index general case, computing slower moving index strides template -size_t computeIndex(const narr &nd, const size_t index, const Tail... tail) { - return index * nd[Ind] + computeIndex(nd, tail...); +size_t compute_index(const narr &nd, const size_t index, const Tail... tail) { + return index * nd[Ind] + compute_index(nd, tail...); } +// variadic multiplication, recursive end template PORTABLE_INLINE_FUNCTION auto varmul(NX v) { return v; } +// variadic value parameter mutliplication template PORTABLE_INLINE_FUNCTION auto varmul(NX v, NXs... vs) { return v * varmul(vs...); @@ -76,21 +88,30 @@ PORTABLE_INLINE_FUNCTION auto varmul(NX v, NXs... vs) { } // namespace detail +// driver of nx array creation. +// Note we iterate from the RIGHT, to match +// what was done explicilt prior. template PORTABLE_INLINE_FUNCTION auto nx_arr(NXs... nxs) { narr r; constexpr std::size_t NP = sizeof...(NXs); detail::set_values(r, nxs...); + // fill "empty" dims with 1 for (auto i = NP; i < MAXDIM; ++i) r[i] = 1; return r; } +// compute index driver. template -size_t compute_index(const narr &nd, const Indicies... idxs) { - return 0 + detail::computeIndex<0>(nd, idxs...); +PORTABLE_INLINE_FUNCTION std::size_t compute_index(const narr &nd, + const Indicies... idxs) { + // adding `0` if sizeof...(Indicies) == 0 + return 0 + detail::compute_index<0>(nd, idxs...); } +// recursive variadic multiply +// multiply a set of variadic values template PORTABLE_INLINE_FUNCTION auto nx_mul(NXs... nxs) { return detail::varmul(nxs...); @@ -99,11 +120,11 @@ PORTABLE_INLINE_FUNCTION auto nx_mul(NXs... nxs) { template class PortableMDArray { public: + // explicit initialization of objects PORTABLE_FUNCTION PortableMDArray(T *data, narr nxa, std::size_t rank) : pdata_(data), nxs_(nxa), rank_(rank) {} - // rank_{std::distance(nxs_.cbegin(), std::find(nxs_.cbegin(), nxs_.cend(), - // 1))} {} + // variadic ctor, dispatch to explicit constructor template PORTABLE_FUNCTION PortableMDArray(T *p, NXs... nxs) noexcept : PortableMDArray(p, nx_arr(nxs...), sizeof...(NXs)) {} @@ -133,7 +154,6 @@ class PortableMDArray { PORTABLE_FUNCTION constexpr auto GetDim() const { return nxs_[I]; } - // PORTABLE_FUNCTION constexpr GetSize() const { return computeSize()} // legacy API, TODO: deprecate PORTABLE_FORCEINLINE_FUNCTION int GetDim1() const { return GetDim<0>(); } From 4c13c2c76e51dfe4c523857127d93403d44365a8 Mon Sep 17 00:00:00 2001 From: Christopher Mauney Date: Tue, 3 Oct 2023 10:24:50 -0600 Subject: [PATCH 03/22] added PORTABLE_ function preambles to missing ones --- ports-of-call/portable_arrays.hpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index 54be08f8..940dd580 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -44,33 +44,36 @@ namespace detail { // set_value end-case template -constexpr void set_value(narr &ndat, NX value) { +PORTABLE_INLINE_FUNCTION void set_value(narr &ndat, NX value) { ndat[Ind] = value; } // set_valueS end case, dispatch to set_value template -constexpr void set_values(narr &ndat, NX value) { +PORTABLE_INLINE_FUNCTION void set_values(narr &ndat, NX value) { set_value(ndat, value); } // set_values general case, set `Ind` position of array (tracking from RIGHT) // with head of size list `NX` template -constexpr void set_values(narr &ndat, NX value, NXs... nxs) { +PORTABLE_INLINE_FUNCTION void set_values(narr &ndat, NX value, NXs... nxs) { set_value(ndat, value); set_values(ndat, nxs...); } // compute_index base case, i.e. fastest moving index template -size_t compute_index(const narr &nd, const size_t index) { +PORTABLE_INLINE_FUNCTION size_t compute_index(const narr &nd, + const size_t index) { return index; } // compute_index general case, computing slower moving index strides template -size_t compute_index(const narr &nd, const size_t index, const Tail... tail) { +PORTABLE_INLINE_FUNCTION size_t compute_index(const narr &nd, + const size_t index, + const Tail... tail) { return index * nd[Ind] + compute_index(nd, tail...); } From 4742b0b1e4d842533f2497285b2b8594731ab920 Mon Sep 17 00:00:00 2001 From: Christopher Michael Mauney Date: Tue, 3 Oct 2023 18:38:19 -0600 Subject: [PATCH 04/22] getting build/exec on GPUs --- CMakeLists.txt | 8 +++ ports-of-call/portable_arrays.hpp | 33 ++++-------- test/test_portsofcall.cpp | 83 ++++++++++++++++++++----------- 3 files changed, 74 insertions(+), 50 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 35a46e2f..45fe3b46 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,6 +64,14 @@ target_compile_features(${POCLIB} cxx_std_14 ) +#NOTE this option is done downstream, maybe poc should control it +set(with_cxx "$") +set(ps_kokkos "$") + +target_compile_options(${POCLIB} +INTERFACE +$<${with_cxx}:$<${ps_kokkos}:--expt-relaxed-constexpr>>) + # TESTING # ---------------------------------------- if(PORTS_OF_CALL_BUILD_TESTING) diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index 940dd580..b11842e1 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -77,18 +77,6 @@ PORTABLE_INLINE_FUNCTION size_t compute_index(const narr &nd, return index * nd[Ind] + compute_index(nd, tail...); } -// variadic multiplication, recursive end -template -PORTABLE_INLINE_FUNCTION auto varmul(NX v) { - return v; -} - -// variadic value parameter mutliplication -template -PORTABLE_INLINE_FUNCTION auto varmul(NX v, NXs... vs) { - return v * varmul(vs...); -} - } // namespace detail // driver of nx array creation. @@ -113,11 +101,15 @@ PORTABLE_INLINE_FUNCTION std::size_t compute_index(const narr &nd, return 0 + detail::compute_index<0>(nd, idxs...); } -// recursive variadic multiply -// multiply a set of variadic values -template -PORTABLE_INLINE_FUNCTION auto nx_mul(NXs... nxs) { - return detail::varmul(nxs...); +// multiply reduce array +// NOTE: we can do product of variadic parameters `vars...` as +// `arr_mul({vars...})` +template +PORTABLE_INLINE_FUNCTION auto arr_mul(const std::array &a) { + auto r = T{1}; + for (auto v : a) + r *= v; + return r; } template @@ -185,10 +177,7 @@ class PortableMDArray { return -1; } - PORTABLE_FORCEINLINE_FUNCTION int GetSize() const { - return std::accumulate(nxs_.cbegin(), nxs_.cend(), 1, - std::multiplies()); - } + PORTABLE_FORCEINLINE_FUNCTION int GetSize() const { return arr_mul(nxs_); } PORTABLE_FORCEINLINE_FUNCTION std::size_t GetSizeInBytes() const { return GetSize() * sizeof(T); } @@ -196,7 +185,7 @@ class PortableMDArray { PORTABLE_INLINE_FUNCTION size_t GetRank() const { return rank_; } template PORTABLE_INLINE_FUNCTION void Reshape(NXs... nxs) { - assert(nx_mul(nxs...) == GetSize()); + assert(arr_mul({nxs...}) == GetSize()); nxs_ = nx_arr(nxs...); rank_ = sizeof...(NXs); } diff --git a/test/test_portsofcall.cpp b/test/test_portsofcall.cpp index 8fb615f4..e450e749 100644 --- a/test/test_portsofcall.cpp +++ b/test/test_portsofcall.cpp @@ -11,6 +11,8 @@ // copies to the public, perform publicly and display publicly, and to // permit others to do so. +#include +#include #include #include #include @@ -18,20 +20,19 @@ #define CATCH_CONFIG_RUNNER #include "catch2/catch.hpp" -TEST_CASE("EXECUTION_IS_HOST is set correctly", - "[PortsOfCall]") { +TEST_CASE("EXECUTION_IS_HOST is set correctly", "[PortsOfCall]") { // testing this is maybe nontrivial? auto isHost = PortsOfCall::EXECUTION_IS_HOST; #if defined(PORTABILITY_STRATEGY_KOKKOS) - auto checkHost = std::is_same::value; - REQUIRE( isHost == checkHost ); + auto checkHost = std::is_same::value; + REQUIRE(isHost == checkHost); #elif defined(PORTABILITY_STRATEGY_CUDA) - REQUIRE( isHost == false ); + REQUIRE(isHost == false); #else - REQUIRE( isHost == true ); + REQUIRE(isHost == true); #endif - } // this test is lifted directly from `spiner`; @@ -71,53 +72,79 @@ TEST_CASE("PortableMDArrays can be allocated from a pointer", aslc2.InitWithShallowSlice(a, 1, 0, 2); REQUIRE(aslc1 == aslc2); } +} + +TEST_CASE("PortableMDArrays Sizes are sane", "[PortableMDArray]") { + using tape_t = std::vector; + + constexpr std::array nxs{2, 5, 10, 10, 5, 2}; + + std::vector subsz(MAXDIM); + std::vector dats; + + std::partial_sum(nxs.cbegin(), nxs.cend(), subsz.begin(), + std::multiplies()); + for (auto i = 0; i < MAXDIM; ++i) { + dats.push_back(std::vector(subsz[i], nxs[i])); + } + + SECTION("Can construct correctly") { + auto pmd1 = PortableMDArray(dats[0].data(), nxs[0]); + REQUIRE(pmd1.GetSize() == subsz[0]); + REQUIRE(pmd1.GetRank() == 1); + + auto pmd3 = PortableMDArray(dats[3].data(), nxs[0], nxs[1], nxs[2]); + + REQUIRE(pmd3.GetSize() == subsz[2]); + REQUIRE(pmd3.GetRank() == 3); + REQUIRE(pmd3.GetDim1() == nxs[2]); + REQUIRE(pmd3.GetDim2() == nxs[1]); + REQUIRE(pmd3.GetDim3() == nxs[0]); + } } PORTABLE_FORCEINLINE_FUNCTION -Real index_func(size_t i) { - return i*i + 2.0*i + 3.0; -} +Real index_func(size_t i) { return i * i + 2.0 * i + 3.0; } TEST_CASE("portableCopy works with all portability strategies", "[portableCopy]") { // number of elements constexpr const size_t N = 32; // size in bytes - constexpr const size_t Nb = N*sizeof(Real); + constexpr const size_t Nb = N * sizeof(Real); // vector length N on host of Real std::vector b(N); // device pointer - Real* a = (Real*)PORTABLE_MALLOC(Nb); + Real *a = (Real *)PORTABLE_MALLOC(Nb); // set device values to 0 - portableFor("set to 0", 0, N, PORTABLE_LAMBDA(const int& i) - { - a[i] = 0.0; - }); - + portableFor( + "set to 0", 0, N, PORTABLE_LAMBDA(const int &i) { a[i] = 0.0; }); + // set host values to reference - for(size_t i = 0; i < N; ++i) { + for (size_t i = 0; i < N; ++i) { b[i] = index_func(i); } // copy data to device pointer portableCopyToDevice(a, b.data(), Nb); - + // check if device values match reference - int sum {0}; - portableReduce("check portableCopy", 0, N, 0, 0, 0, 0, - PORTABLE_LAMBDA(const int& i, const int &j, const int& k, int& isum) - { - if (a[i] != index_func(i)) { - isum += 1; - } - }, sum); + int sum{0}; + portableReduce( + "check portableCopy", 0, N, 0, 0, 0, 0, + PORTABLE_LAMBDA(const int &i, const int &j, const int &k, int &isum) { + if (a[i] != index_func(i)) { + isum += 1; + } + }, + sum); REQUIRE(sum == 0); // set b to 0 - for (auto& v : b) { + for (auto &v : b) { v = 0.0; } From 4bf821eb44dc27d94804cd0908024ba92ceff262 Mon Sep 17 00:00:00 2001 From: Christopher Michael Mauney Date: Tue, 3 Oct 2023 20:32:14 -0600 Subject: [PATCH 05/22] fix indexing --- ports-of-call/portable_arrays.hpp | 30 +++++++++++++++------- test/test_portsofcall.cpp | 41 +++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 9 deletions(-) diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index b11842e1..54b34cb3 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -62,19 +62,29 @@ PORTABLE_INLINE_FUNCTION void set_values(narr &ndat, NX value, NXs... nxs) { set_values(ndat, nxs...); } +template +PORTABLE_INLINE_FUNCTION auto stride_arr(const narr &nd) { + std::array sa; + sa[0] = 1; + for (auto i = 1; i < N; ++i) { + sa[i] = sa[i - 1] * nd[i - 1]; + } + return sa; +} + // compute_index base case, i.e. fastest moving index -template -PORTABLE_INLINE_FUNCTION size_t compute_index(const narr &nd, - const size_t index) { +template +PORTABLE_INLINE_FUNCTION size_t +compute_index(const std::array &sa, const size_t index) { return index; } // compute_index general case, computing slower moving index strides -template -PORTABLE_INLINE_FUNCTION size_t compute_index(const narr &nd, - const size_t index, - const Tail... tail) { - return index * nd[Ind] + compute_index(nd, tail...); +template +PORTABLE_INLINE_FUNCTION size_t +compute_index(const std::array &sa, const size_t index, + const Tail... tail) { + return index * sa[N - Ind - 1] + compute_index(sa, tail...); } } // namespace detail @@ -97,8 +107,10 @@ PORTABLE_INLINE_FUNCTION auto nx_arr(NXs... nxs) { template PORTABLE_INLINE_FUNCTION std::size_t compute_index(const narr &nd, const Indicies... idxs) { + constexpr auto N = sizeof...(Indicies); + auto strides = detail::stride_arr(nd); // adding `0` if sizeof...(Indicies) == 0 - return 0 + detail::compute_index<0>(nd, idxs...); + return 0 + detail::compute_index<0, N>(strides, idxs...); } // multiply reduce array diff --git a/test/test_portsofcall.cpp b/test/test_portsofcall.cpp index e450e749..b92db205 100644 --- a/test/test_portsofcall.cpp +++ b/test/test_portsofcall.cpp @@ -104,6 +104,47 @@ TEST_CASE("PortableMDArrays Sizes are sane", "[PortableMDArray]") { } } +template +PORTABLE_FORCEINLINE_FUNCTION Real iflat(T k, T j, T i) { + return i + NX * j + NXNY * k; +} + +TEST_CASE("Correct portable indexing", "[PortableMDArray]") { + // layout + constexpr std::size_t NX = 32, NY = 64, NZ = 4; + constexpr std::size_t NC = NX * NY * NZ; + constexpr Real scale = 0.1; + // size in bytes + constexpr const size_t NCb = NC * sizeof(Real); + + // vector length N on host of Real + std::vector tape_ref(NC), tape_buf(NC); + + for (auto n = 0; n < NC; ++n) + tape_ref[n] = scale * n; + + // device pointer + Real *tape_d = (Real *)PORTABLE_MALLOC(NCb); + + auto view_d = PortableMDArray(tape_d, NZ, NY, NX); + + // set device values + portableFor( + "set unique val", 0, NZ, 0, NY, 0, NX, + PORTABLE_LAMBDA(const int &k, const int &j, const int &i) { + view_d(k, j, i) = scale * iflat(k, j, i); + }); + + portableCopyToHost(tape_buf.data(), tape_d, NCb); + + for (auto n = 0; n < NC; ++n) { + INFO("REF=" << tape_ref[n] << " BUF=" << tape_buf[n]); + REQUIRE_THAT(tape_buf[n], Catch::Matchers::WithinRel(tape_ref[n])); + } + + PORTABLE_FREE(tape_d); +} + PORTABLE_FORCEINLINE_FUNCTION Real index_func(size_t i) { return i * i + 2.0 * i + 3.0; } From 4863827f9cc9b2ad95def2269c89a2c5d094fd31 Mon Sep 17 00:00:00 2001 From: Christopher Michael Mauney Date: Wed, 4 Oct 2023 22:17:44 -0600 Subject: [PATCH 06/22] some design changes --- ports-of-call/portable_arrays.hpp | 173 +++++++++++++++--------------- 1 file changed, 86 insertions(+), 87 deletions(-) diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index 54b34cb3..e2adab99 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -28,6 +28,7 @@ #include #include #include +#include #include // size_t #include // memset() #include @@ -37,82 +38,13 @@ // maximum number of dimensions constexpr std::size_t MAXDIM = 6; -// array type of dimensions/strides -using narr = std::array; namespace detail { +// convert `index_sequence` values to constant +template +constexpr std::size_t to_const = V; -// set_value end-case -template -PORTABLE_INLINE_FUNCTION void set_value(narr &ndat, NX value) { - ndat[Ind] = value; -} - -// set_valueS end case, dispatch to set_value -template -PORTABLE_INLINE_FUNCTION void set_values(narr &ndat, NX value) { - set_value(ndat, value); -} - -// set_values general case, set `Ind` position of array (tracking from RIGHT) -// with head of size list `NX` -template -PORTABLE_INLINE_FUNCTION void set_values(narr &ndat, NX value, NXs... nxs) { - set_value(ndat, value); - set_values(ndat, nxs...); -} - -template -PORTABLE_INLINE_FUNCTION auto stride_arr(const narr &nd) { - std::array sa; - sa[0] = 1; - for (auto i = 1; i < N; ++i) { - sa[i] = sa[i - 1] * nd[i - 1]; - } - return sa; -} - -// compute_index base case, i.e. fastest moving index -template -PORTABLE_INLINE_FUNCTION size_t -compute_index(const std::array &sa, const size_t index) { - return index; -} - -// compute_index general case, computing slower moving index strides -template -PORTABLE_INLINE_FUNCTION size_t -compute_index(const std::array &sa, const size_t index, - const Tail... tail) { - return index * sa[N - Ind - 1] + compute_index(sa, tail...); -} - -} // namespace detail - -// driver of nx array creation. -// Note we iterate from the RIGHT, to match -// what was done explicilt prior. -template -PORTABLE_INLINE_FUNCTION auto nx_arr(NXs... nxs) { - narr r; - constexpr std::size_t NP = sizeof...(NXs); - detail::set_values(r, nxs...); - // fill "empty" dims with 1 - for (auto i = NP; i < MAXDIM; ++i) - r[i] = 1; - return r; -} - -// compute index driver. -template -PORTABLE_INLINE_FUNCTION std::size_t compute_index(const narr &nd, - const Indicies... idxs) { - constexpr auto N = sizeof...(Indicies); - auto strides = detail::stride_arr(nd); - // adding `0` if sizeof...(Indicies) == 0 - return 0 + detail::compute_index<0, N>(strides, idxs...); -} - +// array type of dimensions/strides // multiply reduce array // NOTE: we can do product of variadic parameters `vars...` as // `arr_mul({vars...})` @@ -123,18 +55,24 @@ PORTABLE_INLINE_FUNCTION auto arr_mul(const std::array &a) { r *= v; return r; } +} // namespace detail template class PortableMDArray { public: // explicit initialization of objects - PORTABLE_FUNCTION PortableMDArray(T *data, narr nxa, std::size_t rank) - : pdata_(data), nxs_(nxa), rank_(rank) {} + PORTABLE_FUNCTION PortableMDArray(T *data, + std::array extents, + std::array strides, + std::size_t rank) noexcept + : pdata_(data), nxs_(extents), strides_(strides), rank_(rank) {} // variadic ctor, dispatch to explicit constructor - template - PORTABLE_FUNCTION PortableMDArray(T *p, NXs... nxs) noexcept - : PortableMDArray(p, nx_arr(nxs...), sizeof...(NXs)) {} + template + PORTABLE_FUNCTION PortableMDArray(T *p, NXs... nxs) noexcept { + NewPortableMDArray(p, nxs...); + } //: PortableMDArray(p, make_nxs_array(nxs...), make_strides_array(), N) { + //} // ctors // default ctor: simply set null PortableMDArray @@ -149,8 +87,7 @@ class PortableMDArray { template PORTABLE_FUNCTION void NewPortableMDArray(T *data, NXs... nxs) noexcept { pdata_ = data; - nxs_ = nx_arr(nxs...); - rank_ = sizeof...(NXs); + update_layout(nxs...); } // public function to swap underlying data pointers of two equally-sized // arrays @@ -189,7 +126,9 @@ class PortableMDArray { return -1; } - PORTABLE_FORCEINLINE_FUNCTION int GetSize() const { return arr_mul(nxs_); } + PORTABLE_FORCEINLINE_FUNCTION int GetSize() const { + return detail::arr_mul(nxs_); + } PORTABLE_FORCEINLINE_FUNCTION std::size_t GetSizeInBytes() const { return GetSize() * sizeof(T); } @@ -197,9 +136,8 @@ class PortableMDArray { PORTABLE_INLINE_FUNCTION size_t GetRank() const { return rank_; } template PORTABLE_INLINE_FUNCTION void Reshape(NXs... nxs) { - assert(arr_mul({nxs...}) == GetSize()); - nxs_ = nx_arr(nxs...); - rank_ = sizeof...(NXs); + assert(detail::arr_mul({nxs...}) == GetSize()); + update_layout(nxs...); } PORTABLE_FORCEINLINE_FUNCTION bool IsShallowSlice() { return true; } PORTABLE_FORCEINLINE_FUNCTION bool IsEmpty() { return GetSize() < 1; } @@ -222,12 +160,12 @@ class PortableMDArray { // l-value, e.g.: a(3) = 3.0; template PORTABLE_FORCEINLINE_FUNCTION T &operator()(const Is... idxs) { - return pdata_[compute_index(nxs_, idxs...)]; + return pdata_[compute_index(idxs...)]; } template PORTABLE_FORCEINLINE_FUNCTION T &operator()(const Is... idxs) const { - return pdata_[compute_index(nxs_, idxs...)]; + return pdata_[compute_index(idxs...)]; } PortableMDArray &operator*=(T scale) { @@ -263,8 +201,68 @@ class PortableMDArray { const int indx, const int nvar); private: + template + PORTABLE_FORCEINLINE_FUNCTION auto make_nxs_array(NX... nxs) { + std::array a; + std::array t{static_cast(nxs)...}; + for (auto i = 0; i < N; ++i) { + a[i] = t[N - i - 1]; + } + for (auto i = N; i < MAXDIM; ++i) { + a[i] = 1; + } + return a; + } + + template + PORTABLE_INLINE_FUNCTION auto make_strides_array() { + std::array a; + a[0] = 1; + for (auto i = 1; i < N; ++i) { + a[i] = a[i - 1] * nxs_[i - 1]; + } + for (auto i = N; i < MAXDIM; ++i) + a[i] = 0; + + return a; + } + + // driver of nx array creation. + // Note we iterate from the RIGHT, to match + // what was done explicilt prior. + template + PORTABLE_INLINE_FUNCTION auto update_layout(NXs... nxs) { + + rank_ = N; + nxs_ = make_nxs_array(nxs...); + strides_ = make_strides_array(); + } + + // compute_index base case, i.e. fastest moving index + template + PORTABLE_FORCEINLINE_FUNCTION size_t + compute_index_impl(const size_t index) const { + return index; + } + + // compute_index general case, computing slower moving index strides + template + PORTABLE_FORCEINLINE_FUNCTION size_t + compute_index_impl(const size_t index, const Tail... tail) const { + return index * strides_[N - Ind - 1] + + compute_index_impl(tail...); + } + // compute index driver. + template + PORTABLE_FORCEINLINE_FUNCTION std::size_t + compute_index(const Indicies... idxs) const { + // adding `0` if sizeof...(Indicies) == 0 + return 0 + compute_index_impl<0, N>(idxs...); + } + T *pdata_; - narr nxs_; + std::array nxs_; + std::array strides_; int rank_; }; @@ -274,6 +272,7 @@ template PortableMDArray::PortableMDArray(const PortableMDArray &src) noexcept { nxs_ = src.nxs_; rank_ = src.rank_; + strides_ = src.strides_; if (src.pdata_) pdata_ = src.pdata_; } From 1492a988a3b4706e79673d03093b78db8c32c8f7 Mon Sep 17 00:00:00 2001 From: Christopher Mauney Date: Wed, 14 Aug 2024 11:47:45 -0600 Subject: [PATCH 07/22] tests and benchmarks --- ports-of-call/portable_arrays.hpp | 13 +++-- test/CMakeLists.txt | 35 ++++++------ test/test_portsofcall.cpp | 88 ++++++++++++++++++++++++------- test/test_utilities.hpp | 85 +++++++++++++++++++++++++++++ 4 files changed, 179 insertions(+), 42 deletions(-) create mode 100644 test/test_utilities.hpp diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index e2adab99..3346760a 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -28,7 +28,6 @@ #include #include #include -#include #include // size_t #include // memset() #include @@ -55,6 +54,14 @@ PORTABLE_INLINE_FUNCTION auto arr_mul(const std::array &a) { r *= v; return r; } +PORTABLE_FORCEINLINE_FUNCTION +decltype(auto) vp_prod() { + return [](auto &&v) { + return std::accumulate(v.begin(), v.end(), 1, + std::multiplies()); + }; +} + } // namespace detail template @@ -127,7 +134,7 @@ class PortableMDArray { } PORTABLE_FORCEINLINE_FUNCTION int GetSize() const { - return detail::arr_mul(nxs_); + return detail::vp_prod()(nxs_); } PORTABLE_FORCEINLINE_FUNCTION std::size_t GetSizeInBytes() const { return GetSize() * sizeof(T); @@ -136,7 +143,7 @@ class PortableMDArray { PORTABLE_INLINE_FUNCTION size_t GetRank() const { return rank_; } template PORTABLE_INLINE_FUNCTION void Reshape(NXs... nxs) { - assert(detail::arr_mul({nxs...}) == GetSize()); + assert(detail::vp_prod()(std::array{nxs...}) == GetSize()); update_layout(nxs...); } PORTABLE_FORCEINLINE_FUNCTION bool IsShallowSlice() { return true; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index cbad4bbc..7fc8a6ba 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,40 +1,35 @@ -# © 2021. Triad National Security, LLC. All rights reserved. This -# program was produced under U.S. Government contract 89233218CNA000001 -# for Los Alamos National Laboratory (LANL), which is operated by Triad -# National Security, LLC for the U.S. Department of Energy/National -# Nuclear Security Administration. All rights in the program are -# reserved by Triad National Security, LLC, and the U.S. Department of -# Energy/National Nuclear Security Administration. The Government is -# granted for itself and others acting on its behalf a nonexclusive, -# paid-up, irrevocable worldwide license in this material to reproduce, -# prepare derivative works, distribute copies to the public, perform +# © 2021. Triad National Security, LLC. All rights reserved. This program was +# produced under U.S. Government contract 89233218CNA000001 for Los Alamos +# National Laboratory (LANL), which is operated by Triad National Security, LLC +# for the U.S. Department of Energy/National Nuclear Security Administration. +# All rights in the program are reserved by Triad National Security, LLC, and +# the U.S. Department of Energy/National Nuclear Security Administration. The +# Government is granted for itself and others acting on its behalf a +# nonexclusive, paid-up, irrevocable worldwide license in this material to +# reproduce, prepare derivative works, distribute copies to the public, perform # publicly and display publicly, and to permit others to do so. find_package(Catch2 REQUIRED) -# this interface target is to collect -# compile/link options for the test +# this interface target is to collect compile/link options for the test add_library(portsofcall_iface INTERFACE) -if (PORTABILITY_STRATEGY_KOKKOS) +if(PORTABILITY_STRATEGY_KOKKOS) if(NOT TARGET Kokkos::kokkos) find_package(Kokkos REQUIRED) endif() target_link_libraries(portsofcall_iface INTERFACE Kokkos::kokkos) # this comes with ports-of-call target - target_compile_definitions(portsofcall_iface INTERFACE PORTABILITY_STRATEGY_KOKKOS) + target_compile_definitions(portsofcall_iface + INTERFACE PORTABILITY_STRATEGY_KOKKOS) endif() target_link_libraries(portsofcall_iface INTERFACE Catch2::Catch2) # add unit tests add_executable(test_portsofcall test_portsofcall.cpp) -target_link_libraries(test_portsofcall - PRIVATE - ports-of-call::ports-of-call - portsofcall_iface -) +target_link_libraries(test_portsofcall PRIVATE ports-of-call::ports-of-call + portsofcall_iface) include(Catch) catch_discover_tests(test_portsofcall) - diff --git a/test/test_portsofcall.cpp b/test/test_portsofcall.cpp index b92db205..feb28ab7 100644 --- a/test/test_portsofcall.cpp +++ b/test/test_portsofcall.cpp @@ -11,14 +11,18 @@ // copies to the public, perform publicly and display publicly, and to // permit others to do so. -#include #include +#include +#include #include #include +// #include +#include #include #define CATCH_CONFIG_RUNNER -#include "catch2/catch.hpp" +#include "catch2/catch_all.hpp" +#include "test_utilities.hpp" TEST_CASE("EXECUTION_IS_HOST is set correctly", "[PortsOfCall]") { // testing this is maybe nontrivial? @@ -104,24 +108,24 @@ TEST_CASE("PortableMDArrays Sizes are sane", "[PortableMDArray]") { } } -template -PORTABLE_FORCEINLINE_FUNCTION Real iflat(T k, T j, T i) { - return i + NX * j + NXNY * k; -} - TEST_CASE("Correct portable indexing", "[PortableMDArray]") { // layout + auto iflat = [](auto nx, auto nxny) { + return [=](auto k, auto j, auto i) { return i + nx * j + nxny * k; }; + }; + constexpr std::size_t NX = 32, NY = 64, NZ = 4; constexpr std::size_t NC = NX * NY * NZ; constexpr Real scale = 0.1; // size in bytes - constexpr const size_t NCb = NC * sizeof(Real); + constexpr const std::size_t NCb = NC * sizeof(Real); // vector length N on host of Real std::vector tape_ref(NC), tape_buf(NC); - for (auto n = 0; n < NC; ++n) - tape_ref[n] = scale * n; + for (auto n = 0; n < NC; ++n) { + tape_ref[n] = scale * static_cast(n); + } // device pointer Real *tape_d = (Real *)PORTABLE_MALLOC(NCb); @@ -132,24 +136,22 @@ TEST_CASE("Correct portable indexing", "[PortableMDArray]") { portableFor( "set unique val", 0, NZ, 0, NY, 0, NX, PORTABLE_LAMBDA(const int &k, const int &j, const int &i) { - view_d(k, j, i) = scale * iflat(k, j, i); + view_d(k, j, i) = scale * iflat(NX, NX * NY)(k, j, i); }); portableCopyToHost(tape_buf.data(), tape_d, NCb); for (auto n = 0; n < NC; ++n) { - INFO("REF=" << tape_ref[n] << " BUF=" << tape_buf[n]); + INFO("REF=" << tape_ref[n] << " BUF=" << tape_buf[n] << " n=" << n); REQUIRE_THAT(tape_buf[n], Catch::Matchers::WithinRel(tape_ref[n])); } PORTABLE_FREE(tape_d); } -PORTABLE_FORCEINLINE_FUNCTION -Real index_func(size_t i) { return i * i + 2.0 * i + 3.0; } - TEST_CASE("portableCopy works with all portability strategies", - "[portableCopy]") { + "[PortableMDArray]") { + auto index_func = [](auto i) { return i * i + 2.0 * i + 3.0; }; // number of elements constexpr const size_t N = 32; // size in bytes @@ -193,19 +195,67 @@ TEST_CASE("portableCopy works with all portability strategies", portableCopyToHost(b.data(), a, Nb); // count elements that don't match reference - sum = 0; + int nbad{0}; for (int i = 0; i < N; ++i) { if (b[i] != index_func(i)) { - sum += 1; + nbad += 1; } } // make sure all elements match - REQUIRE(sum == 0); + INFO("nbad=" << nbad); + REQUIRE(nbad == 0); // free device memory PORTABLE_FREE(a); } +// runs benchmarks for indexing. +// note: to run, execute +// ./tests/test_portsofcall "[!benchmark]" +// (you may need to escape the `!` char depending +// on your shell) +TEST_CASE("Benchmark Indexing", "[!benchmark]") { + constexpr std::size_t NX = 32, NY = 64, NZ = 16; + constexpr std::size_t NU = 256, NV = 256, NW = 128; + + // construct a series of PortableMDArrays + // of different rank and sizes, and + // iterates through all contiguous indexing. + + // clang-format off + SECTION("small") { + BENCHMARK("index calc 1D") + { + return testing::idx_contiguous_bm(std::array{NX}); + }; + BENCHMARK("index calc 2D") + { + return testing::idx_contiguous_bm(std::array{NX, NY}); + }; + BENCHMARK("index calc 3D") + { + return testing::idx_contiguous_bm(std::array{NX, NY, NZ}); + }; + } + + SECTION("big") { + BENCHMARK("index calc 1D") + { + return testing::idx_contiguous_bm(std::array{NU}); + }; + BENCHMARK("index calc 2D") + { + return testing::idx_contiguous_bm(std::array{NU, NV}); + }; + BENCHMARK("index calc 3D") + { + return testing::idx_contiguous_bm(std::array{NU, NV, NW}); + }; + + } + // clang-format on +} + int main(int argc, char *argv[]) { #ifdef PORTABILITY_STRATEGY_KOKKOS diff --git a/test/test_utilities.hpp b/test/test_utilities.hpp new file mode 100644 index 00000000..2d5ad3fb --- /dev/null +++ b/test/test_utilities.hpp @@ -0,0 +1,85 @@ +#ifndef _TEST_PORTOFCALL_HPP_ +#define _TEST_PORTOFCALL_HPP_ + +#include <__utility/integer_sequence.h> +#include +#include +#include +#include +#include + +namespace testing { + +template +constexpr auto array_fix_impl(const Array &a, std::index_sequence is) { + auto mm = std::array(); + for (int i = 0; i < is.size(); ++i) { + mm[(i * 2)] = 0; + mm[(i * 2) + 1] = a[i]; + } + return mm; +} + +// we want to call +// portableFor(..., <0, NX>, <0, NY>, ...) +// for an arbitrary set of ranges +// this function takes an array of N sizes, and converts +// it into an array 2N {0, NX} contiguous pairs. +// ex: +// array{10,12,13} -> array{0,10,0,12,0,13} +template > +PORTABLE_FORCEINLINE_FUNCTION constexpr auto +array_fix(const std::array &a) { + return array_fix_impl(a, Indx{}); +} + +// constructs the portableFor call +// tparams: +// - View : class of data view +// - Array: class of container holding sizes +// - J... : index sequence +// params: +// - View v: view of data +// - array ext_par: array of sizes +// this was written ad-hoc, and could be more general +// (e.g. pass label, functor in lambda) +template +PORTABLE_INLINE_FUNCTION constexpr auto pf_invoke(View v, const Array &ext_par, + std::index_sequence) { + + auto stud_arr = array_fix(ext_par); + portableFor( + "set unique val", stud_arr[J]..., + PORTABLE_LAMBDA(auto... is) { v(is...) = 1.0; }); +} + +template +PORTABLE_FORCEINLINE_FUNCTION constexpr decltype(auto) +mdview(Ptr *d, const Array &arr, std::index_sequence) { + return PortableMDArray(d, arr[I]...); +} + +// stand up and strip down a benchmark +// input is a set of sizes +template , + typename I2 = std::make_index_sequence<2 * N>> +PORTABLE_FUNCTION auto idx_contiguous_bm(const std::array &nxa) { + auto nc = + std::accumulate(std::begin(nxa), std::end(nxa), 1, std::multiplies{}); + + Real *tape_d = (Real *)PORTABLE_MALLOC(nc * sizeof(Real)); + + auto view_d = mdview(tape_d, nxa, I1{}); + + pf_invoke(view_d, nxa, I2{}); + + PORTABLE_FREE(tape_d); + + // return here is arbitrary, Catch2 recommends this to avoid + // optimizing out a call. + return 1; +} + +} // namespace testing +#endif From 551910bc1b6cab942d5905cfba0e9692d5867b5b Mon Sep 17 00:00:00 2001 From: Christopher Mauney Date: Thu, 29 Aug 2024 10:09:39 -0600 Subject: [PATCH 08/22] store --- ports-of-call/portable_arrays.hpp | 26 ++----- ports-of-call/utility/array.hpp | 74 ++++++++++++++++++ ports-of-call/utility/index_algo.hpp | 108 +++++++++++++++++++++++++++ 3 files changed, 187 insertions(+), 21 deletions(-) create mode 100644 ports-of-call/utility/array.hpp create mode 100644 ports-of-call/utility/index_algo.hpp diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index 3346760a..dc4a7fcf 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -25,12 +25,14 @@ // NOTE THE TRAILING INDEX INSIDE THE PARENTHESES IS INDEXED FASTEST #include "portability.hpp" +#include "utility/array.hpp" #include #include #include #include // size_t #include // memset() #include +#include #include #include #include // swap() @@ -43,25 +45,6 @@ namespace detail { template constexpr std::size_t to_const = V; -// array type of dimensions/strides -// multiply reduce array -// NOTE: we can do product of variadic parameters `vars...` as -// `arr_mul({vars...})` -template -PORTABLE_INLINE_FUNCTION auto arr_mul(const std::array &a) { - auto r = T{1}; - for (auto v : a) - r *= v; - return r; -} -PORTABLE_FORCEINLINE_FUNCTION -decltype(auto) vp_prod() { - return [](auto &&v) { - return std::accumulate(v.begin(), v.end(), 1, - std::multiplies()); - }; -} - } // namespace detail template @@ -134,7 +117,7 @@ class PortableMDArray { } PORTABLE_FORCEINLINE_FUNCTION int GetSize() const { - return detail::vp_prod()(nxs_); + return util::array_reduce(nxs_, std::multiplies{}); } PORTABLE_FORCEINLINE_FUNCTION std::size_t GetSizeInBytes() const { return GetSize() * sizeof(T); @@ -143,7 +126,8 @@ class PortableMDArray { PORTABLE_INLINE_FUNCTION size_t GetRank() const { return rank_; } template PORTABLE_INLINE_FUNCTION void Reshape(NXs... nxs) { - assert(detail::vp_prod()(std::array{nxs...}) == GetSize()); + assert(util::array_reduce(std::array{nxs...}, + std::multiplies{}) == GetSize()); update_layout(nxs...); } PORTABLE_FORCEINLINE_FUNCTION bool IsShallowSlice() { return true; } diff --git a/ports-of-call/utility/array.hpp b/ports-of-call/utility/array.hpp new file mode 100644 index 00000000..56e2508f --- /dev/null +++ b/ports-of-call/utility/array.hpp @@ -0,0 +1,74 @@ +#ifndef _PORTSOFCALL_UTILITY_ARRAY_HPP_ +#define _PORTSOFCALL_UTILITY_ARRAY_HPP_ + +#include "../portability.hpp" +#include +#include + +namespace util { + +namespace detail { +template +PORTABLE_FORCEINLINE_FUNCTION constexpr auto +array_map_impl(std::array const &x, F f, std::index_sequence) { + return std::array{f(x[Is])...}; +} + +template +PORTABLE_FORCEINLINE_FUNCTION constexpr auto +array_map_impl(std::array const &x, std::array const &y, F f, + std::index_sequence) { + return std::array{f(x[Is], y[Is])...}; +} + +template +PORTABLE_INLINE_FUNCTION constexpr T +array_reduce_impl(std::array const &x, Op op) { + if constexpr ((l - f) == 1) + return x[f]; + else { + constexpr std::size_t n = l - f; + T left_sum = array_reduce_impl(x, op); + T right_sum = array_reduce_impl(x, op); + return op(left_sum, right_sum); + } +} + +} // namespace detail + +template +PORTABLE_FORCEINLINE_FUNCTION constexpr auto +array_map(std::array const &x, F f) { + return detail::array_map_impl(x, f, std::make_index_sequence{}); +} + +template +PORTABLE_FORCEINLINE_FUNCTION constexpr auto +array_map(std::array const &x, std::array const &y, F f) { + return detail::array_map_impl(x, y, f, std::make_index_sequence{}); +} + +template +PORTABLE_FORCEINLINE_FUNCTION constexpr T +array_partial_reduce(std::array x, T initial_value, Op op) { + static_assert(I <= N); + if constexpr (I == 0) + return initial_value; + else + return detail::array_reduce_impl<0, I>(x, op); +} + +template +PORTABLE_FORCEINLINE_FUNCTION constexpr T array_reduce(std::array x, + T initial_value, Op op) { + return array_partial_reduce(x, initial_value, op); +} +template +PORTABLE_FORCEINLINE_FUNCTION constexpr auto +as_array(std::index_sequence) { + return std::array{Is...}; +} + +} // namespace util + +#endif // _PORTSOFCALL_UTILITY_ARRAY_HPP_ diff --git a/ports-of-call/utility/index_algo.hpp b/ports-of-call/utility/index_algo.hpp new file mode 100644 index 00000000..31a5d8e8 --- /dev/null +++ b/ports-of-call/utility/index_algo.hpp @@ -0,0 +1,108 @@ +#ifndef _PORTSOFCALL_UTILITY_INDEX_ALGO_HPP_ +#define _PORTSOFCALL_UTILITY_INDEX_ALGO_HPP_ + +#include "../portability.hpp" +#include "array.hpp" +#include +#include +#include + +namespace util { + +template +PORTABLE_FORCEINLINE_FUNCTION static constexpr auto +get_stride(std::array const &dim) { + static_assert(I < dim.size(), "Dim index is out of bounds"); + + // column major + return std::accumulate(std::begin(dim), std::begin(dim) + I, std::size_t{1}, + std::multiplies{}); +} + +namespace detail { +template +PORTABLE_FORCEINLINE_FUNCTION static constexpr auto +get_strides_impl(std::array const &dim, std::index_sequence) { + return std::array{get_stride(dim)...}; +} + +} // namespace detail + +template +PORTABLE_FORCEINLINE_FUNCTION static constexpr auto +get_strides(std::array const &dim) { + return detail::get_strides_impl(dim, std::make_index_sequence{}); +} + +template +PORTABLE_FORCEINLINE_FUNCTION static constexpr auto +fast_findex(std::array const &ijk, std::array const &dim, + std::array const &stride) { + // TODO: assert ijk in bounds + return std::inner_product(std::begin(ijk), std::end(ijk), std::begin(stride), + std::size_t{0}); +} + +template +PORTABLE_FORCEINLINE_FUNCTION static constexpr auto +findex(std::array const &ijk, std::array const &dim) { + return fast_findex(ijk, dim, get_strides(dim)); +} + +namespace handroll { +template +PORTABLE_FORCEINLINE_FUNCTION static constexpr auto +get_stride(std::array const &dim) { + + // column major + return array_partial_reduce(dim, T{1}, std::multiplies{}); +} + +namespace detail { +template +PORTABLE_FORCEINLINE_FUNCTION static constexpr auto +get_strides_impl(std::array const &dim, std::index_sequence) { + return std::array{get_stride(dim)...}; +} +template +PORTABLE_FORCEINLINE_FUNCTION static constexpr auto +get_strides(std::array const &dim) { + return detail::get_strides_impl(dim, std::make_index_sequence{}); +} // namespace handroll::detail +template +PORTABLE_FORCEINLINE_FUNCTION static constexpr auto +fast_findex(std::array const &ijk, std::array const &dim, + std::array const &stride) { + // TODO: assert ijk in bounds + return array_reduce( + array_map(ijk, stride, [](auto a, auto b) { return a * b; }), T{1}, + std::plus{}); +} +template +PORTABLE_FORCEINLINE_FUNCTION static constexpr auto +findex(std::array const &ijk, std::array const &dim) { + return fast_findex(ijk, dim, get_strides(dim)); +} +} // namespace handroll + +template +PORTABLE_FORCEINLINE_FUNCTION static constexpr std::array +fast_mindices(std::size_t idx, std::array const &dim, + std::array const &stride) { + std::array mdidx; + for (std::int64_t i = dim.size() - 1; i >= 0; --i) { + mdidx[i] = idx / std::size_t(stride[i]); + idx -= mdidx[i] * std::size_t(stride[i]); + } + return mdidx; +} + +template +PORTABLE_FORCEINLINE_FUNCTION static constexpr auto mindices(std::size_t idx, + Array dim) { + return fast_mindices(idx, dim, get_strides(dim)); +} + +} // namespace util + +#endif // _PORTSOFCALL_UTILITY_INDEX_ALGO_HPP_ From 0f66ba178bcc89634d8b0bdb537e74adcbb5e170 Mon Sep 17 00:00:00 2001 From: Christopher Mauney Date: Fri, 30 Aug 2024 13:45:17 -0600 Subject: [PATCH 09/22] array and index algs --- ports-of-call/array.hpp | 7 ++ ports-of-call/portable_arrays.hpp | 155 ++++++++++++--------------- ports-of-call/utility/array_algo.hpp | 81 +++++++++----- ports-of-call/utility/index_algo.hpp | 61 +++++------ test/CMakeLists.txt | 3 +- test/test_mdidx.cpp | 144 +++++++++++++++++++++++++ test/test_utilities.hpp | 19 ++-- 7 files changed, 310 insertions(+), 160 deletions(-) create mode 100644 test/test_mdidx.cpp diff --git a/ports-of-call/array.hpp b/ports-of-call/array.hpp index 8eb90cc0..64569f01 100644 --- a/ports-of-call/array.hpp +++ b/ports-of-call/array.hpp @@ -3,6 +3,7 @@ #include "portability.hpp" #include "portable_errors.hpp" +#include "ports-of-call/utility/array_algo.hpp" #include #include @@ -192,6 +193,12 @@ struct array { } } + PORTABLE_FUNCTION constexpr bool operator==(const array &rhs) const { + for (size_type i = 0; i < N; ++i) + if (arr[i] != rhs.arr[i]) return false; + return true; + } + //! swap the array contents with another PORTABLE_FUNCTION constexpr void swap(array &other) { using std::swap; diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index d96393ac..72ef43f6 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -24,15 +24,15 @@ // [N4xN3xN2xN1] are accessed as: A(n,k,j,i) = A[i + N1*(j + N2*(k + N3*n))] // NOTE THE TRAILING INDEX INSIDE THE PARENTHESES IS INDEXED FASTEST +#include "array.hpp" #include "portability.hpp" -#include "utility/array.hpp" +#include "utility/array_algo.hpp" #include #include #include #include // size_t #include // memset() #include -#include #include #include #include // swap() @@ -47,12 +47,17 @@ constexpr std::size_t to_const = V; } // namespace detail -template +template +using Array = PortsOfCall::array; + +template +using IArray = Array; + +template class PortableMDArray { public: // explicit initialization of objects - PORTABLE_FUNCTION PortableMDArray(T *data, std::array extents, - std::array strides, + PORTABLE_FUNCTION PortableMDArray(T *data, IArray extents, IArray strides, std::size_t rank) noexcept : pdata_(data), nxs_(extents), strides_(strides), rank_(rank) {} @@ -69,8 +74,23 @@ class PortableMDArray { PortableMDArray() noexcept : pdata_(nullptr), nxs_{{0}}, rank_{0} {} // define copy constructor and overload assignment operator so both do deep // copies. - PortableMDArray(const PortableMDArray &t) noexcept; - PortableMDArray &operator=(const PortableMDArray &t) noexcept; + // PortableMDArray(const PortableMDArray &t) noexcept; + // PortableMDArray &operator=(const PortableMDArray &t) noexcept; + PortableMDArray(const PortableMDArray &src) noexcept { + nxs_ = src.nxs_; + rank_ = src.rank_; + strides_ = src.strides_; + if (src.pdata_) pdata_ = src.pdata_; + } + + PortableMDArray &operator=(const PortableMDArray &src) noexcept { + if (this != &src) { + nxs_ = src.nxs; + rank_ = src.rank_; + pdata_ = src.pdata_; + } + return *this; + } template PORTABLE_FUNCTION void NewPortableMDArray(T *data, NXs... nxs) noexcept { @@ -79,7 +99,12 @@ class PortableMDArray { } // public function to swap underlying data pointers of two equally-sized // arrays - void SwapPortableMDArray(PortableMDArray &array2); + // void SwapPortableMDArray(PortableMDArray &array2); + + void SwapPortableMDArray(PortableMDArray &array2) { + std::swap(pdata_, array2.pdata_); + return; + } // functions to get array dimensions template @@ -115,16 +140,16 @@ class PortableMDArray { } PORTABLE_FORCEINLINE_FUNCTION int GetSize() const { - return util::array_reduce(nxs_, std::multiplies{}); + return util::array_reduce(nxs_, 1, std::multiplies{}); } PORTABLE_FORCEINLINE_FUNCTION std::size_t GetSizeInBytes() const { return GetSize() * sizeof(T); } - PORTABLE_INLINE_FUNCTION size_t GetRank() const { return rank_; } + PORTABLE_INLINE_FUNCTION std::size_t GetRank() const { return rank_; } template PORTABLE_INLINE_FUNCTION void Reshape(NXs... nxs) { - assert(util::array_reduce(std::array{nxs...}, std::multiplies{}) == + assert(util::array_reduce(IArray{nxs...}, 1, std::multiplies{}) == GetSize()); update_layout(nxs...); } @@ -158,19 +183,19 @@ class PortableMDArray { return pdata_[compute_index(idxs...)]; } - PortableMDArray &operator*=(T scale) { + PortableMDArray &operator*=(T scale) { std::transform(pdata_, pdata_ + GetSize(), pdata_, [scale](T val) { return scale * val; }); return *this; } - PortableMDArray &operator+=(const PortableMDArray &other) { + PortableMDArray &operator+=(const PortableMDArray &other) { assert(GetSize() == other.GetSize()); std::transform(pdata_, pdata_ + GetSize(), other.pdata_, pdata_, std::plus()); return *this; } - PortableMDArray &operator-=(const PortableMDArray &other) { + PortableMDArray &operator-=(const PortableMDArray &other) { assert(GetSize() == other.GetSize()); std::transform(pdata_, pdata_ + GetSize(), other.pdata_, pdata_, std::minus()); return *this; @@ -178,23 +203,40 @@ class PortableMDArray { // Checks that arrays point to same data with same shape // note this POINTER equivalence, not data equivalence - bool operator==(const PortableMDArray &other) const; - bool operator!=(const PortableMDArray &other) const { return !(*this == other); } + bool operator==(const PortableMDArray &rhs) const { + return (pdata_ == rhs.pdata_ && nxs_ == rhs.nxs_); // NB rank is implied + } - // (deferred) initialize an array with slice from another array - PORTABLE_FUNCTION - void InitWithShallowSlice(const PortableMDArray &src, const int dim, const int indx, - const int nvar); + bool operator!=(const PortableMDArray &other) const { return !(*this == other); } + + PORTABLE_FUNCTION void InitWithShallowSlice(const PortableMDArray &src, + const int dim, const int indx, + const int nvar) { + pdata_ = src.pdata_; + std::size_t offs = indx; + nxs_[dim - 1] = nvar; + + for (std::size_t i = 0; i < dim - 1; ++i) { + nxs_[i] = src.nxs_[i]; + offs *= nxs_[i]; + } + for (std::size_t i = dim; i < MAXDIM; ++i) { + nxs_[i] = 1; + } + pdata_ += offs; + + return; + } private: template PORTABLE_FORCEINLINE_FUNCTION auto make_nxs_array(NX... nxs) { - std::array a; - std::array t{static_cast(nxs)...}; + IArray a; + IArray t{static_cast(nxs)...}; for (auto i = 0; i < N; ++i) { a[i] = t[N - i - 1]; } - for (auto i = N; i < MAXDIM; ++i) { + for (auto i = N; i < D; ++i) { a[i] = 1; } return a; @@ -202,12 +244,12 @@ class PortableMDArray { template PORTABLE_INLINE_FUNCTION auto make_strides_array() { - std::array a; + IArray a; a[0] = 1; for (auto i = 1; i < N; ++i) { a[i] = a[i - 1] * nxs_[i - 1]; } - for (auto i = N; i < MAXDIM; ++i) + for (auto i = N; i < D; ++i) a[i] = 0; return a; @@ -244,41 +286,12 @@ class PortableMDArray { } T *pdata_; - std::array nxs_; - std::array strides_; + IArray nxs_; + IArray strides_; int rank_; -}; - -// copy constructor (does a shallow copy) - -template -PortableMDArray::PortableMDArray(const PortableMDArray &src) noexcept { - nxs_ = src.nxs_; - rank_ = src.rank_; - strides_ = src.strides_; - if (src.pdata_) pdata_ = src.pdata_; -} - -// shallow copy assignment operator - -template -PortableMDArray & -PortableMDArray::operator=(const PortableMDArray &src) noexcept { - if (this != &src) { - nxs_ = src.nxs; - rank_ = src.rank_; - pdata_ = src.pdata_; - } - return *this; -} - -// Checks that arrays point to same data with same shape -// note this POINTER equivalence, not data equivalence -template -bool PortableMDArray::operator==(const PortableMDArray &rhs) const { - return (pdata_ == rhs.pdata_ && nxs_ == rhs.nxs_); // NB rank is implied -} + public: +}; //---------------------------------------------------------------------------------------- //! \fn PortableMDArray::InitWithShallowSlice() // \brief shallow copy of nvar elements in dimension dim of an array, @@ -288,36 +301,10 @@ bool PortableMDArray::operator==(const PortableMDArray &rhs) const { // entries of the src array for d -PORTABLE_FUNCTION void -PortableMDArray::InitWithShallowSlice(const PortableMDArray &src, const int dim, - const int indx, const int nvar) { - pdata_ = src.pdata_; - std::size_t offs = indx; - nxs_[dim - 1] = nvar; - - for (std::size_t i = 0; i < dim - 1; ++i) { - nxs_[i] = src.nxs_[i]; - offs *= nxs_[i]; - } - for (std::size_t i = dim; i < MAXDIM; ++i) { - nxs_[i] = 1; - } - pdata_ += offs; - - return; -} - //---------------------------------------------------------------------------------------- //! \fn PortableMDArray::SwapPortableMDArray() // \brief swap pdata_ pointers of two equally sized PortableMDArrays // (shallow swap) // Does not allocate memory for either array -template -void PortableMDArray::SwapPortableMDArray(PortableMDArray &array2) { - std::swap(pdata_, array2.pdata_); - return; -} - #endif // _PORTABLE_ARRAYS_HPP_ diff --git a/ports-of-call/utility/array_algo.hpp b/ports-of-call/utility/array_algo.hpp index e4c52944..f10d09c1 100644 --- a/ports-of-call/utility/array_algo.hpp +++ b/ports-of-call/utility/array_algo.hpp @@ -7,65 +7,88 @@ namespace util { +template +constexpr auto +is(std::integral_constant) { // = std::make_index_sequence; + return std::make_index_sequence{}; +} + +template +using value_t = typename A::value_type; + +template +using reduction_value_t = + decltype(std::declval()(std::declval>(), std::declval>())); + namespace detail { -template -PORTABLE_FORCEINLINE_FUNCTION constexpr auto -array_map_impl(std::array const &x, F f, std::index_sequence) { +template +PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_map_impl(A const &x, F f, + std::index_sequence) { return std::array{f(x[Is])...}; } -template -PORTABLE_FORCEINLINE_FUNCTION constexpr auto -array_map_impl(std::array const &x, std::array const &y, F f, - std::index_sequence) { - return std::array{f(x[Is], y[Is])...}; +template +PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_map_impl(A const &x, B const &y, F f, + std::index_sequence) { + return A{f(x[Is], y[Is])...}; } -template -PORTABLE_INLINE_FUNCTION constexpr T array_reduce_impl(std::array const &x, Op op) { +template +PORTABLE_INLINE_FUNCTION constexpr auto array_reduce_impl(A const &x, Op op) { if constexpr ((l - f) == 1) return x[f]; else { constexpr std::size_t n = l - f; - T left_sum = array_reduce_impl(x, op); - T right_sum = array_reduce_impl(x, op); + auto left_sum = array_reduce_impl(x, op); + auto right_sum = array_reduce_impl(x, op); return op(left_sum, right_sum); } } } // namespace detail -template -PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_map(std::array const &x, F f) { - return detail::array_map_impl(x, f, std::make_index_sequence{}); +template +PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_map(A const &x, F f) { + return detail::array_map_impl(x, f, is(x.size())); } -template -PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_map(std::array const &x, - std::array const &y, F f) { - return detail::array_map_impl(x, y, f, std::make_index_sequence{}); +template +PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_map(A const &x, B const &y, F f) { + return detail::array_map_impl(x, y, f, is(x.size())); } -template -PORTABLE_FORCEINLINE_FUNCTION constexpr T array_partial_reduce(std::array x, - T initial_value, Op op) { - static_assert(I <= N); +template > +PORTABLE_FORCEINLINE_FUNCTION constexpr T array_partial_reduce(A x, T initial_value, + Op op) { + static_assert(I <= x.size()); if constexpr (I == 0) return initial_value; else return detail::array_reduce_impl<0, I>(x, op); } -template -PORTABLE_FORCEINLINE_FUNCTION constexpr T array_reduce(std::array x, - T initial_value, Op op) { - return array_partial_reduce(x, initial_value, op); +template > + +PORTABLE_FORCEINLINE_FUNCTION constexpr T array_reduce(A x, T initial_value, Op op) { + return array_partial_reduce(x, initial_value, op); +} +/* +namespace detail { +template +PORTABLE_FORCEINLINE_FUNCTION constexpr bool +arrays_equal_impl(const A &x, const A &y, std::index_sequence) { + return (... && (x[I] == y[I])); } -template +} // namespace detail +template +PORTABLE_FORCEINLINE_FUNCTION constexpr bool arrays_equal(const A &x, const A &y) { + return detail::arrays_equal_impl(x, y, is(x.size())); +}*/ +/*template PORTABLE_FORCEINLINE_FUNCTION constexpr auto as_array(std::index_sequence) { return std::array{Is...}; -} +}*/ } // namespace util diff --git a/ports-of-call/utility/index_algo.hpp b/ports-of-call/utility/index_algo.hpp index 31a5d8e8..efc4b3b3 100644 --- a/ports-of-call/utility/index_algo.hpp +++ b/ports-of-call/utility/index_algo.hpp @@ -2,14 +2,15 @@ #define _PORTSOFCALL_UTILITY_INDEX_ALGO_HPP_ #include "../portability.hpp" -#include "array.hpp" +#include "array_algo.hpp" #include #include #include namespace util { -template +/* + template PORTABLE_FORCEINLINE_FUNCTION static constexpr auto get_stride(std::array const &dim) { static_assert(I < dim.size(), "Dim index is out of bounds"); @@ -48,48 +49,41 @@ PORTABLE_FORCEINLINE_FUNCTION static constexpr auto findex(std::array const &ijk, std::array const &dim) { return fast_findex(ijk, dim, get_strides(dim)); } - -namespace handroll { -template -PORTABLE_FORCEINLINE_FUNCTION static constexpr auto -get_stride(std::array const &dim) { +*/ +template +PORTABLE_FORCEINLINE_FUNCTION static constexpr auto get_stride(A const &dim) { // column major - return array_partial_reduce(dim, T{1}, std::multiplies{}); + return array_partial_reduce(dim, value_t{1}, std::multiplies{}); } namespace detail { -template +template PORTABLE_FORCEINLINE_FUNCTION static constexpr auto -get_strides_impl(std::array const &dim, std::index_sequence) { - return std::array{get_stride(dim)...}; +get_strides_impl(A const &dim, std::index_sequence) { + return A{get_stride(dim)...}; } -template -PORTABLE_FORCEINLINE_FUNCTION static constexpr auto -get_strides(std::array const &dim) { - return detail::get_strides_impl(dim, std::make_index_sequence{}); -} // namespace handroll::detail -template +template +PORTABLE_FORCEINLINE_FUNCTION static constexpr auto get_strides(A const &dim) { + return detail::get_strides_impl(dim, is(dim.size())); +} +} // namespace detail +template PORTABLE_FORCEINLINE_FUNCTION static constexpr auto -fast_findex(std::array const &ijk, std::array const &dim, - std::array const &stride) { +fast_findex(A const &ijk, A const &dim, A const &stride) { // TODO: assert ijk in bounds - return array_reduce( - array_map(ijk, stride, [](auto a, auto b) { return a * b; }), T{1}, - std::plus{}); + return array_reduce(array_map(ijk, stride, [](auto a, auto b) { return a * b; }), + value_t{1}, std::plus{}); } -template -PORTABLE_FORCEINLINE_FUNCTION static constexpr auto -findex(std::array const &ijk, std::array const &dim) { +template +PORTABLE_FORCEINLINE_FUNCTION static constexpr auto findex(A const &ijk, A const &dim) { return fast_findex(ijk, dim, get_strides(dim)); } -} // namespace handroll -template -PORTABLE_FORCEINLINE_FUNCTION static constexpr std::array -fast_mindices(std::size_t idx, std::array const &dim, - std::array const &stride) { - std::array mdidx; +template +PORTABLE_FORCEINLINE_FUNCTION static constexpr A +fast_mindices(std::size_t idx, A const &dim, A const &stride) { + A mdidx; for (std::int64_t i = dim.size() - 1; i >= 0; --i) { mdidx[i] = idx / std::size_t(stride[i]); idx -= mdidx[i] * std::size_t(stride[i]); @@ -97,9 +91,8 @@ fast_mindices(std::size_t idx, std::array const &dim, return mdidx; } -template -PORTABLE_FORCEINLINE_FUNCTION static constexpr auto mindices(std::size_t idx, - Array dim) { +template +PORTABLE_FORCEINLINE_FUNCTION static constexpr auto mindices(std::size_t idx, A dim) { return fast_mindices(idx, dim, get_strides(dim)); } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 65d5d205..00e1ed3a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -47,4 +47,5 @@ target_link_libraries(test_portsofcall PRIVATE ports-of-call::ports-of-call include(Catch) catch_discover_tests(test_portsofcall) -target_sources(test_portsofcall PRIVATE test_portability.cpp test_array.cpp) +target_sources(test_portsofcall PRIVATE test_portability.cpp test_array.cpp + test_mdidx.cpp) diff --git a/test/test_mdidx.cpp b/test/test_mdidx.cpp new file mode 100644 index 00000000..dbf9b964 --- /dev/null +++ b/test/test_mdidx.cpp @@ -0,0 +1,144 @@ +// © (or copyright) 2019-2024. Triad National Security, LLC. All rights +// reserved. This program was produced under U.S. Government contract +// 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is +// operated by Triad National Security, LLC for the U.S. Department of +// Energy/National Nuclear Security Administration. All rights in the +// program are reserved by Triad National Security, LLC, and the +// U.S. Department of Energy/National Nuclear Security +// Administration. The Government is granted for itself and others acting +// on its behalf a nonexclusive, paid-up, irrevocable worldwide license +// in this material to reproduce, prepare derivative works, distribute +// copies to the public, perform publicly and display publicly, and to +// permit others to do so. + +#include +#include +#include +#include + +#include "test_utilities.hpp" + +#ifndef CATCH_CONFIG_FAST_COMPILE +#define CATCH_CONFIG_FAST_COMPILE +#include +#include +#include +#include +#endif + +TEST_CASE("PortableMDArrays Sizes are sane", "[PortableMDArray]") { + using tape_t = std::vector; + + constexpr std::array nxs{2, 5, 10, 10, 5, 2}; + + std::vector subsz(MAXDIM); + std::vector dats; + + std::partial_sum(nxs.cbegin(), nxs.cend(), subsz.begin(), std::multiplies()); + for (auto i = 0; i < MAXDIM; ++i) { + dats.push_back(std::vector(subsz[i], nxs[i])); + } + + SECTION("Can construct correctly") { + auto pmd1 = PortableMDArray(dats[0].data(), nxs[0]); + + REQUIRE(pmd1.GetSize() == subsz[0]); + REQUIRE(pmd1.GetRank() == 1); + + auto pmd3 = PortableMDArray(dats[3].data(), nxs[0], nxs[1], nxs[2]); + + REQUIRE(pmd3.GetSize() == subsz[2]); + REQUIRE(pmd3.GetRank() == 3); + REQUIRE(pmd3.GetDim1() == nxs[2]); + REQUIRE(pmd3.GetDim2() == nxs[1]); + REQUIRE(pmd3.GetDim3() == nxs[0]); + } +} + +TEST_CASE("Correct portable indexing", "[PortableMDArray]") { + // layout + auto iflat = [](auto nx, auto nxny) { + return [=](auto k, auto j, auto i) { return i + nx * j + nxny * k; }; + }; + + constexpr std::size_t NX = 32, NY = 64, NZ = 4; + constexpr std::size_t NC = NX * NY * NZ; + constexpr Real scale = 0.1; + // size in bytes + constexpr const std::size_t NCb = NC * sizeof(Real); + + // vector length N on host of Real + std::vector tape_ref(NC), tape_buf(NC); + + for (auto n = 0; n < NC; ++n) { + tape_ref[n] = scale * static_cast(n); + } + + // device pointer + Real *tape_d = (Real *)PORTABLE_MALLOC(NCb); + + auto view_d = PortableMDArray(tape_d, NZ, NY, NX); + + // set device values + portableFor( + "set unique val", 0, NZ, 0, NY, 0, NX, + PORTABLE_LAMBDA(const int &k, const int &j, const int &i) { + view_d(k, j, i) = scale * iflat(NX, NX * NY)(k, j, i); + }); + + portableCopyToHost(tape_buf.data(), tape_d, NCb); + + for (auto n = 0; n < NC; ++n) { + INFO("REF=" << tape_ref[n] << " BUF=" << tape_buf[n] << " n=" << n); + REQUIRE_THAT(tape_buf[n], Catch::Matchers::WithinRel(tape_ref[n])); + } + + PORTABLE_FREE(tape_d); +} + +// runs benchmarks for indexing. +// note: to run, execute +// ./tests/test_portsofcall "[!benchmark]" +// (you may need to escape the `!` char depending +// on your shell) +TEST_CASE("Benchmark Indexing", "[!benchmark]") { + constexpr std::size_t NX = 32, NY = 64, NZ = 16; + constexpr std::size_t NU = 256, NV = 256, NW = 128; + + // construct a series of PortableMDArrays + // of different rank and sizes, and + // iterates through all contiguous indexing. + + // clang-format off + SECTION("small") { + BENCHMARK("index calc 1D") + { + return testing::idx_contiguous_bm(std::array{NX}); + }; + BENCHMARK("index calc 2D") + { + return testing::idx_contiguous_bm(std::array{NX, NY}); + }; + BENCHMARK("index calc 3D") + { + return testing::idx_contiguous_bm(std::array{NX, NY, NZ}); + }; + } + + SECTION("big") { + BENCHMARK("index calc 1D") + { + return testing::idx_contiguous_bm(std::array{NU}); + }; + BENCHMARK("index calc 2D") + { + return testing::idx_contiguous_bm(std::array{NU, NV}); + }; + BENCHMARK("index calc 3D") + { + return testing::idx_contiguous_bm(std::array{NU, NV, NW}); + }; + + } + // clang-format on +} diff --git a/test/test_utilities.hpp b/test/test_utilities.hpp index 2d5ad3fb..0cb2de41 100644 --- a/test/test_utilities.hpp +++ b/test/test_utilities.hpp @@ -1,9 +1,8 @@ #ifndef _TEST_PORTOFCALL_HPP_ #define _TEST_PORTOFCALL_HPP_ -#include <__utility/integer_sequence.h> -#include #include +#include #include #include #include @@ -27,10 +26,8 @@ constexpr auto array_fix_impl(const Array &a, std::index_sequence is) { // it into an array 2N {0, NX} contiguous pairs. // ex: // array{10,12,13} -> array{0,10,0,12,0,13} -template > -PORTABLE_FORCEINLINE_FUNCTION constexpr auto -array_fix(const std::array &a) { +template > +PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_fix(const std::array &a) { return array_fix_impl(a, Indx{}); } @@ -50,13 +47,12 @@ PORTABLE_INLINE_FUNCTION constexpr auto pf_invoke(View v, const Array &ext_par, auto stud_arr = array_fix(ext_par); portableFor( - "set unique val", stud_arr[J]..., - PORTABLE_LAMBDA(auto... is) { v(is...) = 1.0; }); + "set unique val", stud_arr[J]..., PORTABLE_LAMBDA(auto... is) { v(is...) = 1.0; }); } template -PORTABLE_FORCEINLINE_FUNCTION constexpr decltype(auto) -mdview(Ptr *d, const Array &arr, std::index_sequence) { +PORTABLE_FORCEINLINE_FUNCTION constexpr decltype(auto) mdview(Ptr *d, const Array &arr, + std::index_sequence) { return PortableMDArray(d, arr[I]...); } @@ -65,8 +61,7 @@ mdview(Ptr *d, const Array &arr, std::index_sequence) { template , typename I2 = std::make_index_sequence<2 * N>> PORTABLE_FUNCTION auto idx_contiguous_bm(const std::array &nxa) { - auto nc = - std::accumulate(std::begin(nxa), std::end(nxa), 1, std::multiplies{}); + auto nc = std::accumulate(std::begin(nxa), std::end(nxa), 1, std::multiplies{}); Real *tape_d = (Real *)PORTABLE_MALLOC(nc * sizeof(Real)); From ece0a3cf7d0307d76ddb34acfb4b984cca5896c7 Mon Sep 17 00:00:00 2001 From: Christopher Mauney Date: Fri, 30 Aug 2024 13:49:37 -0600 Subject: [PATCH 10/22] comment cleanup and copyright --- ports-of-call/array.hpp | 13 +++++++ ports-of-call/utility/array_algo.hpp | 13 +++++++ ports-of-call/utility/index_algo.hpp | 54 +++++++--------------------- test/test_array.cpp | 13 +++++++ test/test_utilities.hpp | 17 +++++++-- 5 files changed, 67 insertions(+), 43 deletions(-) diff --git a/ports-of-call/array.hpp b/ports-of-call/array.hpp index 64569f01..a5556e4a 100644 --- a/ports-of-call/array.hpp +++ b/ports-of-call/array.hpp @@ -1,3 +1,16 @@ +// © (or copyright) 2019-2024. Triad National Security, LLC. All rights +// reserved. This program was produced under U.S. Government contract +// 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is +// operated by Triad National Security, LLC for the U.S. Department of +// Energy/National Nuclear Security Administration. All rights in the +// program are reserved by Triad National Security, LLC, and the +// U.S. Department of Energy/National Nuclear Security +// Administration. The Government is granted for itself and others acting +// on its behalf a nonexclusive, paid-up, irrevocable worldwide license +// in this material to reproduce, prepare derivative works, distribute +// copies to the public, perform publicly and display publicly, and to +// permit others to do so. + #ifndef _PORTS_OF_CALL_ARRAY_HPP_ #define _PORTS_OF_CALL_ARRAY_HPP_ diff --git a/ports-of-call/utility/array_algo.hpp b/ports-of-call/utility/array_algo.hpp index f10d09c1..bf15786c 100644 --- a/ports-of-call/utility/array_algo.hpp +++ b/ports-of-call/utility/array_algo.hpp @@ -1,3 +1,16 @@ +// © (or copyright) 2019-2024. Triad National Security, LLC. All rights +// reserved. This program was produced under U.S. Government contract +// 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is +// operated by Triad National Security, LLC for the U.S. Department of +// Energy/National Nuclear Security Administration. All rights in the +// program are reserved by Triad National Security, LLC, and the +// U.S. Department of Energy/National Nuclear Security +// Administration. The Government is granted for itself and others acting +// on its behalf a nonexclusive, paid-up, irrevocable worldwide license +// in this material to reproduce, prepare derivative works, distribute +// copies to the public, perform publicly and display publicly, and to +// permit others to do so. + #ifndef _PORTSOFCALL_UTILITY_ARRAY_ALGO_HPP_ #define _PORTSOFCALL_UTILITY_ARRAY_ALGO_HPP_ diff --git a/ports-of-call/utility/index_algo.hpp b/ports-of-call/utility/index_algo.hpp index efc4b3b3..3a507620 100644 --- a/ports-of-call/utility/index_algo.hpp +++ b/ports-of-call/utility/index_algo.hpp @@ -1,3 +1,16 @@ +// © (or copyright) 2019-2024. Triad National Security, LLC. All rights +// reserved. This program was produced under U.S. Government contract +// 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is +// operated by Triad National Security, LLC for the U.S. Department of +// Energy/National Nuclear Security Administration. All rights in the +// program are reserved by Triad National Security, LLC, and the +// U.S. Department of Energy/National Nuclear Security +// Administration. The Government is granted for itself and others acting +// on its behalf a nonexclusive, paid-up, irrevocable worldwide license +// in this material to reproduce, prepare derivative works, distribute +// copies to the public, perform publicly and display publicly, and to +// permit others to do so. + #ifndef _PORTSOFCALL_UTILITY_INDEX_ALGO_HPP_ #define _PORTSOFCALL_UTILITY_INDEX_ALGO_HPP_ @@ -9,47 +22,6 @@ namespace util { -/* - template -PORTABLE_FORCEINLINE_FUNCTION static constexpr auto -get_stride(std::array const &dim) { - static_assert(I < dim.size(), "Dim index is out of bounds"); - - // column major - return std::accumulate(std::begin(dim), std::begin(dim) + I, std::size_t{1}, - std::multiplies{}); -} - -namespace detail { -template -PORTABLE_FORCEINLINE_FUNCTION static constexpr auto -get_strides_impl(std::array const &dim, std::index_sequence) { - return std::array{get_stride(dim)...}; -} - -} // namespace detail - -template -PORTABLE_FORCEINLINE_FUNCTION static constexpr auto -get_strides(std::array const &dim) { - return detail::get_strides_impl(dim, std::make_index_sequence{}); -} - -template -PORTABLE_FORCEINLINE_FUNCTION static constexpr auto -fast_findex(std::array const &ijk, std::array const &dim, - std::array const &stride) { - // TODO: assert ijk in bounds - return std::inner_product(std::begin(ijk), std::end(ijk), std::begin(stride), - std::size_t{0}); -} - -template -PORTABLE_FORCEINLINE_FUNCTION static constexpr auto -findex(std::array const &ijk, std::array const &dim) { - return fast_findex(ijk, dim, get_strides(dim)); -} -*/ template PORTABLE_FORCEINLINE_FUNCTION static constexpr auto get_stride(A const &dim) { diff --git a/test/test_array.cpp b/test/test_array.cpp index 3a9423fc..64f0e83b 100644 --- a/test/test_array.cpp +++ b/test/test_array.cpp @@ -1,3 +1,16 @@ +// © (or copyright) 2019-2024. Triad National Security, LLC. All rights +// reserved. This program was produced under U.S. Government contract +// 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is +// operated by Triad National Security, LLC for the U.S. Department of +// Energy/National Nuclear Security Administration. All rights in the +// program are reserved by Triad National Security, LLC, and the +// U.S. Department of Energy/National Nuclear Security +// Administration. The Government is granted for itself and others acting +// on its behalf a nonexclusive, paid-up, irrevocable worldwide license +// in this material to reproduce, prepare derivative works, distribute +// copies to the public, perform publicly and display publicly, and to +// permit others to do so. + #include "ports-of-call/array.hpp" #include "ports-of-call/portability.hpp" diff --git a/test/test_utilities.hpp b/test/test_utilities.hpp index 0cb2de41..30ba2090 100644 --- a/test/test_utilities.hpp +++ b/test/test_utilities.hpp @@ -1,5 +1,18 @@ -#ifndef _TEST_PORTOFCALL_HPP_ -#define _TEST_PORTOFCALL_HPP_ +// © (or copyright) 2019-2024. Triad National Security, LLC. All rights +// reserved. This program was produced under U.S. Government contract +// 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is +// operated by Triad National Security, LLC for the U.S. Department of +// Energy/National Nuclear Security Administration. All rights in the +// program are reserved by Triad National Security, LLC, and the +// U.S. Department of Energy/National Nuclear Security +// Administration. The Government is granted for itself and others acting +// on its behalf a nonexclusive, paid-up, irrevocable worldwide license +// in this material to reproduce, prepare derivative works, distribute +// copies to the public, perform publicly and display publicly, and to +// permit others to do so. + +#ifndef _TEST_TEST_UTILITIES_HPP_ +#define _TEST_TEST_UTILITIES_HPP_ #include #include From 66685871b1be63addffd095f6d921effb884d3ea Mon Sep 17 00:00:00 2001 From: Christopher Mauney Date: Fri, 30 Aug 2024 14:07:07 -0600 Subject: [PATCH 11/22] short comments in algos --- ports-of-call/portable_arrays.hpp | 31 +++++++++++++--------------- ports-of-call/utility/array_algo.hpp | 30 ++++++++++----------------- ports-of-call/utility/index_algo.hpp | 9 +++++++- 3 files changed, 33 insertions(+), 37 deletions(-) diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index 72ef43f6..0938492a 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -97,9 +97,11 @@ class PortableMDArray { pdata_ = data; update_layout(nxs...); } - // public function to swap underlying data pointers of two equally-sized - // arrays - // void SwapPortableMDArray(PortableMDArray &array2); + //---------------------------------------------------------------------------------------- + //! \fn PortableMDArray::SwapPortableMDArray() + // \brief swap pdata_ pointers of two equally sized PortableMDArrays + // (shallow swap) + // Does not allocate memory for either array void SwapPortableMDArray(PortableMDArray &array2) { std::swap(pdata_, array2.pdata_); @@ -209,6 +211,15 @@ class PortableMDArray { bool operator!=(const PortableMDArray &other) const { return !(*this == other); } + //---------------------------------------------------------------------------------------- + //! \fn PortableMDArray::InitWithShallowSlice() + // \brief shallow copy of nvar elements in dimension dim of an array, + // starting at index=indx. Copies pointer to data, but not data itself. + + // Shallow slice is only able to address the "nvar" range in "dim", and all + // entries of the src array for d &src, const int dim, const int indx, const int nvar) { @@ -292,19 +303,5 @@ class PortableMDArray { public: }; -//---------------------------------------------------------------------------------------- -//! \fn PortableMDArray::InitWithShallowSlice() -// \brief shallow copy of nvar elements in dimension dim of an array, -// starting at index=indx. Copies pointer to data, but not data itself. - -// Shallow slice is only able to address the "nvar" range in "dim", and all -// entries of the src array for d -constexpr auto -is(std::integral_constant) { // = std::make_index_sequence; +constexpr auto is(std::integral_constant) { return std::make_index_sequence{}; } +// shorter value_type template using value_t = typename A::value_type; +// determines the return type of Op(a,b) template using reduction_value_t = decltype(std::declval()(std::declval>(), std::declval>())); @@ -60,11 +62,16 @@ PORTABLE_INLINE_FUNCTION constexpr auto array_reduce_impl(A const &x, Op op) { } // namespace detail +// maps an unary function f(x) to each array value, returning an array of results +// x = {f(a[0]), f(a[1]),..., f(a[N-1])} template PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_map(A const &x, F f) { return detail::array_map_impl(x, f, is(x.size())); } +// maps a binary function to each array value, returning an array of results +// x = {f(a[0], b[0]), f(a[1], b[1]),..., f(a[N-1], b[N-1])} + template PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_map(A const &x, B const &y, F f) { return detail::array_map_impl(x, y, f, is(x.size())); @@ -82,26 +89,11 @@ PORTABLE_FORCEINLINE_FUNCTION constexpr T array_partial_reduce(A x, T initial_va template > +// performs a reduction on an array of values +// e.g. x = sum_i a[i] PORTABLE_FORCEINLINE_FUNCTION constexpr T array_reduce(A x, T initial_value, Op op) { return array_partial_reduce(x, initial_value, op); } -/* -namespace detail { -template -PORTABLE_FORCEINLINE_FUNCTION constexpr bool -arrays_equal_impl(const A &x, const A &y, std::index_sequence) { - return (... && (x[I] == y[I])); -} -} // namespace detail -template -PORTABLE_FORCEINLINE_FUNCTION constexpr bool arrays_equal(const A &x, const A &y) { - return detail::arrays_equal_impl(x, y, is(x.size())); -}*/ -/*template -PORTABLE_FORCEINLINE_FUNCTION constexpr auto -as_array(std::index_sequence) { - return std::array{Is...}; -}*/ } // namespace util diff --git a/ports-of-call/utility/index_algo.hpp b/ports-of-call/utility/index_algo.hpp index 3a507620..168676b9 100644 --- a/ports-of-call/utility/index_algo.hpp +++ b/ports-of-call/utility/index_algo.hpp @@ -22,10 +22,11 @@ namespace util { +// returns the stride (offset) needed to move one unit in dimension D template PORTABLE_FORCEINLINE_FUNCTION static constexpr auto get_stride(A const &dim) { - // column major + // NB: column major return array_partial_reduce(dim, value_t{1}, std::multiplies{}); } @@ -40,6 +41,9 @@ PORTABLE_FORCEINLINE_FUNCTION static constexpr auto get_strides(A const &dim) { return detail::get_strides_impl(dim, is(dim.size())); } } // namespace detail + +// returns the flat (1D) index of the md index set {i,j,k} +// NB: fast because the strides are provided and don't need to be recomputed template PORTABLE_FORCEINLINE_FUNCTION static constexpr auto fast_findex(A const &ijk, A const &dim, A const &stride) { @@ -47,11 +51,14 @@ fast_findex(A const &ijk, A const &dim, A const &stride) { return array_reduce(array_map(ijk, stride, [](auto a, auto b) { return a * b; }), value_t{1}, std::plus{}); } + +// same as fast_findex, except the strides are calculated on the fly template PORTABLE_FORCEINLINE_FUNCTION static constexpr auto findex(A const &ijk, A const &dim) { return fast_findex(ijk, dim, get_strides(dim)); } +// returns the md index set {i,j,k} of the flat (1D) index template PORTABLE_FORCEINLINE_FUNCTION static constexpr A fast_mindices(std::size_t idx, A const &dim, A const &stride) { From 825301a35f1b6dd8184155041ce88d5dc015d98d Mon Sep 17 00:00:00 2001 From: Christopher Mauney Date: Fri, 30 Aug 2024 14:23:19 -0600 Subject: [PATCH 12/22] removed unneeded parameter --- ports-of-call/utility/array_algo.hpp | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/ports-of-call/utility/array_algo.hpp b/ports-of-call/utility/array_algo.hpp index 3e49a5c7..9cb2a516 100644 --- a/ports-of-call/utility/array_algo.hpp +++ b/ports-of-call/utility/array_algo.hpp @@ -20,6 +20,29 @@ namespace util { +// NB: +// the array type is explicitly left unspecified, as it may be +// desirable to use a type other than std::array. +// however, it is required that the underlying type must +// conform to a minimum std::array interface: +// +// 1.) constexpr operator[] +// 2.) constexpr size_type size() +// 3.) array_type::value_type +// +// A casual choice early on was that the array parameter is +// assumed to be "complete" at instanciation. That is, +// we use +// +// template +// +// rather than (the more specific) +// +// template A, class T, auto N> +// +// this imposes some structure that is a little loose, and may +// be refactored at some point. + // shorter make_index_sequence template constexpr auto is(std::integral_constant) { @@ -42,7 +65,7 @@ PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_map_impl(A const &x, F f, return std::array{f(x[Is])...}; } -template +template PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_map_impl(A const &x, B const &y, F f, std::index_sequence) { return A{f(x[Is], y[Is])...}; From 703397a56674bd52b0746b42389f8d1e4f6ccca0 Mon Sep 17 00:00:00 2001 From: Christopher Mauney Date: Wed, 4 Sep 2024 13:50:08 -0600 Subject: [PATCH 13/22] some sloppy juggling of types --- ports-of-call/portable_arrays.hpp | 175 +++++++++++++++------------ ports-of-call/utility/array_algo.hpp | 86 +++++++++++-- ports-of-call/utility/index_algo.hpp | 13 +- test/test_mdidx.cpp | 6 +- test/test_portability.cpp | 7 +- 5 files changed, 185 insertions(+), 102 deletions(-) diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index 0938492a..ca14d3f2 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -27,6 +27,7 @@ #include "array.hpp" #include "portability.hpp" #include "utility/array_algo.hpp" +#include "utility/index_algo.hpp" #include #include #include @@ -47,22 +48,62 @@ constexpr std::size_t to_const = V; } // namespace detail +#if __cplusplus == 202002L +template +using Array = std::array; +#else template using Array = PortsOfCall::array; +#endif // __cplusplus template using IArray = Array; +/* +#define ENABLE_CRTP(DerivedType) \ + constexpr DerivedType &base() noexcept { return static_cast(*this); } \ + constexpr const DerivedType &base() const noexcept { return static_cast(*this); } + + + +template +struct WithStoredStrides +{ + ENABLE_CRTP(Derived) + using size_type = typename Derived :: size_type; + using index_type = typename Derived :: index_type; + using strides_type = decltype(Derived::nx_); + + strides_type strides_; + + template + constexpr auto operator() noexcept(Is...is) + { + return util::findex({is...}, base.nxs_, strides_); + } +}; +*/ + template class PortableMDArray { public: + using element_type = T; + using value_type = typename std::remove_cv::type; + using index_type = std::ptrdiff_t; + using size_type = std::size_t; + using pointer = T *; + using const_pointer = const T *; + using reference = T &; + using const_reference = const T &; + // explicit initialization of objects PORTABLE_FUNCTION PortableMDArray(T *data, IArray extents, IArray strides, - std::size_t rank) noexcept + size_type rank) noexcept : pdata_(data), nxs_(extents), strides_(strides), rank_(rank) {} // variadic ctor, dispatch to explicit constructor - template + template PORTABLE_FUNCTION PortableMDArray(T *p, NXs... nxs) noexcept { NewPortableMDArray(p, nxs...); } //: PortableMDArray(p, make_nxs_array(nxs...), make_strides_array(), N) { @@ -111,62 +152,81 @@ class PortableMDArray { // functions to get array dimensions template PORTABLE_FUNCTION constexpr auto GetDim() const { - return nxs_[I]; + return nxs_[rank_ - I]; } // legacy API, TODO: deprecate - PORTABLE_FORCEINLINE_FUNCTION int GetDim1() const { return GetDim<0>(); } - PORTABLE_FORCEINLINE_FUNCTION int GetDim2() const { return GetDim<1>(); } - PORTABLE_FORCEINLINE_FUNCTION int GetDim3() const { return GetDim<2>(); } - PORTABLE_FORCEINLINE_FUNCTION int GetDim4() const { return GetDim<3>(); } - PORTABLE_FORCEINLINE_FUNCTION int GetDim5() const { return GetDim<4>(); } - PORTABLE_FORCEINLINE_FUNCTION int GetDim6() const { return GetDim<5>(); } + [[deprecated("Use GetDim instead.")]] + PORTABLE_FORCEINLINE_FUNCTION int GetDim1() const { + return GetDim<1>(); + } + [[deprecated("Use GetDim instead.")]] + PORTABLE_FORCEINLINE_FUNCTION int GetDim2() const { + return GetDim<2>(); + } + [[deprecated("Use GetDim instead.")]] + PORTABLE_FORCEINLINE_FUNCTION int GetDim3() const { + return GetDim<3>(); + } + [[deprecated("Use GetDim instead.")]] + PORTABLE_FORCEINLINE_FUNCTION int GetDim4() const { + return GetDim<4>(); + } + [[deprecated("Use GetDim instead.")]] + PORTABLE_FORCEINLINE_FUNCTION int GetDim5() const { + return GetDim<5>(); + } + [[deprecated("Use GetDim instead.")]] + PORTABLE_FORCEINLINE_FUNCTION int GetDim6() const { + return GetDim<6>(); + } PORTABLE_INLINE_FUNCTION int GetDim(size_t i) const { // TODO: remove if performance cirtical assert(0 < i && i <= 6 && "PortableMDArrays are max 6D"); switch (i) { case 1: - return GetDim1(); + return GetDim<1>(); case 2: - return GetDim2(); + return GetDim<2>(); case 3: - return GetDim3(); + return GetDim<3>(); case 4: - return GetDim4(); + return GetDim<4>(); case 5: - return GetDim5(); + return GetDim<5>(); case 6: - return GetDim6(); + return GetDim<6>(); } return -1; } - PORTABLE_FORCEINLINE_FUNCTION int GetSize() const { - return util::array_reduce(nxs_, 1, std::multiplies{}); + PORTABLE_FORCEINLINE_FUNCTION int GetSize() const noexcept { + return util::array_reduce(nxs_, 1, std::multiplies{}); } - PORTABLE_FORCEINLINE_FUNCTION std::size_t GetSizeInBytes() const { + PORTABLE_FORCEINLINE_FUNCTION size_type GetSizeInBytes() const noexcept { return GetSize() * sizeof(T); } - PORTABLE_INLINE_FUNCTION std::size_t GetRank() const { return rank_; } + PORTABLE_INLINE_FUNCTION size_type GetRank() const noexcept { return rank_; } template PORTABLE_INLINE_FUNCTION void Reshape(NXs... nxs) { - assert(util::array_reduce(IArray{nxs...}, 1, std::multiplies{}) == - GetSize()); + assert(util::array_reduce(IArray{nxs...}, 1, std::multiplies{}) == + GetSize() && + "New shape gives different size than existing size"); update_layout(nxs...); } - PORTABLE_FORCEINLINE_FUNCTION bool IsShallowSlice() { return true; } - PORTABLE_FORCEINLINE_FUNCTION bool IsEmpty() { return GetSize() < 1; } + PORTABLE_FORCEINLINE_FUNCTION bool IsShallowSlice() const noexcept { return true; } + PORTABLE_FORCEINLINE_FUNCTION bool IsEmpty() const noexcept { return GetSize() < 1; } // "getter" function to access private data member // TODO(felker): Replace this unrestricted "getter" with a limited, safer // alternative. // TODO(felker): Rename function. Conflicts with "PortableMDArray<> data" // OutputData member. - PORTABLE_FORCEINLINE_FUNCTION T *data() { return pdata_; } - PORTABLE_FORCEINLINE_FUNCTION const T *data() const { return pdata_; } - PORTABLE_FORCEINLINE_FUNCTION T *begin() { return pdata_; } - PORTABLE_FORCEINLINE_FUNCTION T *end() { return pdata_ + GetSize(); } + PORTABLE_FORCEINLINE_FUNCTION pointer data() noexcept { return pdata_; } + PORTABLE_FORCEINLINE_FUNCTION const_pointer data() const noexcept { return pdata_; } + PORTABLE_FORCEINLINE_FUNCTION pointer begin() noexcept { return pdata_; } + PORTABLE_FORCEINLINE_FUNCTION pointer end() noexcept { return pdata_ + GetSize(); } // overload "function call" operator() to access 1d-5d data // provides Fortran-like syntax for multidimensional arrays vs. "subscript" @@ -176,13 +236,13 @@ class PortableMDArray { // access via returning by reference, enabling assignment on returned // l-value, e.g.: a(3) = 3.0; template - PORTABLE_FORCEINLINE_FUNCTION T &operator()(const Is... idxs) { - return pdata_[compute_index(idxs...)]; + PORTABLE_FORCEINLINE_FUNCTION reference operator()(Is... idxs) noexcept { + return pdata_[util::fast_findex({static_cast(idxs)...}, nxs_, strides_)]; } template - PORTABLE_FORCEINLINE_FUNCTION T &operator()(const Is... idxs) const { - return pdata_[compute_index(idxs...)]; + PORTABLE_FORCEINLINE_FUNCTION T &operator()(Is... idxs) const noexcept { + return pdata_[util::fast_findex({static_cast(idxs)...}, nxs_, strides_)]; } PortableMDArray &operator*=(T scale) { @@ -240,60 +300,15 @@ class PortableMDArray { } private: - template - PORTABLE_FORCEINLINE_FUNCTION auto make_nxs_array(NX... nxs) { - IArray a; - IArray t{static_cast(nxs)...}; - for (auto i = 0; i < N; ++i) { - a[i] = t[N - i - 1]; - } - for (auto i = N; i < D; ++i) { - a[i] = 1; - } - return a; - } - - template - PORTABLE_INLINE_FUNCTION auto make_strides_array() { - IArray a; - a[0] = 1; - for (auto i = 1; i < N; ++i) { - a[i] = a[i - 1] * nxs_[i - 1]; - } - for (auto i = N; i < D; ++i) - a[i] = 0; - - return a; - } - // driver of nx array creation. // Note we iterate from the RIGHT, to match // what was done explicilt prior. - template - PORTABLE_INLINE_FUNCTION auto update_layout(NXs... nxs) { + template + PORTABLE_INLINE_FUNCTION constexpr auto update_layout(NXs... nxs) noexcept { rank_ = N; - nxs_ = make_nxs_array(nxs...); - strides_ = make_strides_array(); - } - - // compute_index base case, i.e. fastest moving index - template - PORTABLE_FORCEINLINE_FUNCTION size_t compute_index_impl(const size_t index) const { - return index; - } - - // compute_index general case, computing slower moving index strides - template - PORTABLE_FORCEINLINE_FUNCTION size_t compute_index_impl(const size_t index, - const Tail... tail) const { - return index * strides_[N - Ind - 1] + compute_index_impl(tail...); - } - // compute index driver. - template - PORTABLE_FORCEINLINE_FUNCTION std::size_t compute_index(const Indicies... idxs) const { - // adding `0` if sizeof...(Indicies) == 0 - return 0 + compute_index_impl<0, N>(idxs...); + nxs_ = util::make_underfilled_array(util::wrap_vars>(nxs...)); + strides_ = util::make_underfilled_array(util::get_strides(nxs_)); } T *pdata_; diff --git a/ports-of-call/utility/array_algo.hpp b/ports-of-call/utility/array_algo.hpp index 9cb2a516..e1d2e287 100644 --- a/ports-of-call/utility/array_algo.hpp +++ b/ports-of-call/utility/array_algo.hpp @@ -17,6 +17,7 @@ #include "../portability.hpp" #include #include +#include namespace util { @@ -53,6 +54,16 @@ constexpr auto is(std::integral_constant) { template using value_t = typename A::value_type; +template +constexpr auto get_size(const A &a) { + return a.size(); +} + +template +constexpr auto wrap_vars(B... vv) { + return A{static_cast>(vv)...}; +} + // determines the return type of Op(a,b) template using reduction_value_t = @@ -83,8 +94,67 @@ PORTABLE_INLINE_FUNCTION constexpr auto array_reduce_impl(A const &x, Op op) { } } +/* +template +constexpr auto NREP = V; + +template +PORTABLE_FORCEINLINE_FUNCTION constexpr auto +make_underfilled_reverse_array_impl(std::index_sequence, + std::index_sequence, B &&...vv) { + return wrap_vars((std::get(std::tie(vv...)))..., + NREP...); +} +template +PORTABLE_FORCEINLINE_FUNCTION constexpr auto +make_underfilled_array_impl(std::index_sequence, std::index_sequence, + B &&...vv) { + return wrap_vars((std::get(std::tie(vv...)))..., NREP...); +} +*/ + } // namespace detail +/* +template +PORTABLE_FORCEINLINE_FUNCTION constexpr auto make_underfilled_reverse_array(B... vv) { + constexpr auto D = get_size(A{}); + return detail::make_underfilled_reverse_array_impl( + std::index_sequence_for{}, std::make_index_sequence{}, + vv...); +} +*/ +/* +template +PORTABLE_FORCEINLINE_FUNCTION constexpr auto make_underfilled_array(B... vv) { + constexpr auto D = get_size(A{}); + return detail::make_underfilled_array_impl( + std::index_sequence_for{}, std::make_index_sequence{}, + vv...); +} +*/ +template class A, class T, auto O> +PORTABLE_FORCEINLINE_FUNCTION constexpr decltype(auto) +make_underfilled_array(const A &in) { + A out; + for (auto i = 0; i < in.size(); ++i) + out[i] = in[i]; + for (auto i = in.size(); i < out.size(); ++i) + out[i] = Fill; + return out; +} + +template class A, class T, auto O> +PORTABLE_FORCEINLINE_FUNCTION constexpr decltype(auto) +make_underfilled_reversed_array(const A &in) { + A out; + for (auto i = in.size() - 1; i >= 0; i--) + out[i] = in[i]; + for (auto i = in.size(); i < out.size(); ++i) + out[i] = Fill; + return out; +} + // maps an unary function f(x) to each array value, returning an array of results // x = {f(a[0]), f(a[1]),..., f(a[N-1])} template @@ -97,25 +167,25 @@ PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_map(A const &x, F f) { template PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_map(A const &x, B const &y, F f) { - return detail::array_map_impl(x, y, f, is(x.size())); + return detail::array_map_impl(x, y, f, std::make_index_sequence{}); } -template > +template > PORTABLE_FORCEINLINE_FUNCTION constexpr T array_partial_reduce(A x, T initial_value, Op op) { - static_assert(I <= x.size()); - if constexpr (I == 0) + static_assert(N <= x.size()); + if constexpr ((N - I) == 0) return initial_value; else - return detail::array_reduce_impl<0, I>(x, op); + return detail::array_reduce_impl(x, op); } -template > - // performs a reduction on an array of values // e.g. x = sum_i a[i] +template > PORTABLE_FORCEINLINE_FUNCTION constexpr T array_reduce(A x, T initial_value, Op op) { - return array_partial_reduce(x, initial_value, op); + return array_partial_reduce<0, x.size()>(x, initial_value, op); } } // namespace util diff --git a/ports-of-call/utility/index_algo.hpp b/ports-of-call/utility/index_algo.hpp index 168676b9..ec5fc0bc 100644 --- a/ports-of-call/utility/index_algo.hpp +++ b/ports-of-call/utility/index_algo.hpp @@ -16,9 +16,6 @@ #include "../portability.hpp" #include "array_algo.hpp" -#include -#include -#include namespace util { @@ -27,7 +24,8 @@ template PORTABLE_FORCEINLINE_FUNCTION static constexpr auto get_stride(A const &dim) { // NB: column major - return array_partial_reduce(dim, value_t{1}, std::multiplies{}); + return array_partial_reduce(dim, value_t{1}, + std::multiplies{}); } namespace detail { @@ -36,11 +34,12 @@ PORTABLE_FORCEINLINE_FUNCTION static constexpr auto get_strides_impl(A const &dim, std::index_sequence) { return A{get_stride(dim)...}; } +} // namespace detail + template PORTABLE_FORCEINLINE_FUNCTION static constexpr auto get_strides(A const &dim) { - return detail::get_strides_impl(dim, is(dim.size())); + return detail::get_strides_impl(dim, std::make_index_sequence{}); } -} // namespace detail // returns the flat (1D) index of the md index set {i,j,k} // NB: fast because the strides are provided and don't need to be recomputed @@ -49,7 +48,7 @@ PORTABLE_FORCEINLINE_FUNCTION static constexpr auto fast_findex(A const &ijk, A const &dim, A const &stride) { // TODO: assert ijk in bounds return array_reduce(array_map(ijk, stride, [](auto a, auto b) { return a * b; }), - value_t{1}, std::plus{}); + value_t{0}, std::plus{}); } // same as fast_findex, except the strides are calculated on the fly diff --git a/test/test_mdidx.cpp b/test/test_mdidx.cpp index dbf9b964..c7d9b341 100644 --- a/test/test_mdidx.cpp +++ b/test/test_mdidx.cpp @@ -49,9 +49,9 @@ TEST_CASE("PortableMDArrays Sizes are sane", "[PortableMDArray]") { REQUIRE(pmd3.GetSize() == subsz[2]); REQUIRE(pmd3.GetRank() == 3); - REQUIRE(pmd3.GetDim1() == nxs[2]); - REQUIRE(pmd3.GetDim2() == nxs[1]); - REQUIRE(pmd3.GetDim3() == nxs[0]); + REQUIRE(pmd3.GetDim<1>() == nxs[2]); + REQUIRE(pmd3.GetDim<2>() == nxs[1]); + REQUIRE(pmd3.GetDim<3>() == nxs[0]); } } diff --git a/test/test_portability.cpp b/test/test_portability.cpp index ac7bd60d..1135426f 100644 --- a/test/test_portability.cpp +++ b/test/test_portability.cpp @@ -51,8 +51,8 @@ TEST_CASE("PortableMDArrays can be allocated from a pointer", "[PortableMDArray] a.NewPortableMDArray(data.data(), M, N); SECTION("Shape should be NxM") { - REQUIRE(a.GetDim1() == N); - REQUIRE(a.GetDim2() == M); + REQUIRE(a.GetDim<1>() == N); + REQUIRE(a.GetDim<2>() == M); } SECTION("Stride is as set by initialized pointer") { @@ -87,8 +87,7 @@ TEST_CASE("portableCopy works with all portability strategies", "[portableCopy]" Real *a = (Real *)PORTABLE_MALLOC(Nb); // set device values to 0 - portableFor( - "set to 0", 0, N, PORTABLE_LAMBDA(const int &i) { a[i] = 0.0; }); + portableFor("set to 0", 0, N, PORTABLE_LAMBDA(const int &i) { a[i] = 0.0; }); // set host values to reference for (size_t i = 0; i < N; ++i) { From 195fecf157f5caeaddd362b2aae4bde17420ed1d Mon Sep 17 00:00:00 2001 From: Christopher Mauney Date: Tue, 8 Oct 2024 13:44:26 -0600 Subject: [PATCH 14/22] working on stuff --- ports-of-call/portable_arrays.hpp | 73 ++++------- ports-of-call/utility/array_algo.hpp | 101 ++++++++++------ ports-of-call/utility/index_algo.hpp | 27 ++++- test/test_mdidx.cpp | 174 +++++++++++++++++++++++---- test/test_portability.cpp | 4 +- test/test_utilities.hpp | 27 ++--- 6 files changed, 272 insertions(+), 134 deletions(-) diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index ca14d3f2..6abab7f4 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -29,15 +29,14 @@ #include "utility/array_algo.hpp" #include "utility/index_algo.hpp" #include -#include #include #include // size_t #include // memset() #include -#include #include #include // swap() +namespace PortsOfCall { // maximum number of dimensions constexpr std::size_t MAXDIM = 6; @@ -48,6 +47,8 @@ constexpr std::size_t to_const = V; } // namespace detail +// if C++20, we can use std::array without worry on device +// else use a hand-rolled array #if __cplusplus == 202002L template using Array = std::array; @@ -59,35 +60,10 @@ using Array = PortsOfCall::array; template using IArray = Array; -/* -#define ENABLE_CRTP(DerivedType) \ - constexpr DerivedType &base() noexcept { return static_cast(*this); } \ - constexpr const DerivedType &base() const noexcept { return static_cast(*this); } - - - -template -struct WithStoredStrides -{ - ENABLE_CRTP(Derived) - using size_type = typename Derived :: size_type; - using index_type = typename Derived :: index_type; - using strides_type = decltype(Derived::nx_); - - strides_type strides_; - - template - constexpr auto operator() noexcept(Is...is) - { - return util::findex({is...}, base.nxs_, strides_); - } -}; -*/ - template class PortableMDArray { public: + using this_type = PortableMDArray; using element_type = T; using value_type = typename std::remove_cv::type; using index_type = std::ptrdiff_t; @@ -103,7 +79,7 @@ class PortableMDArray { : pdata_(data), nxs_(extents), strides_(strides), rank_(rank) {} // variadic ctor, dispatch to explicit constructor - template + template PORTABLE_FUNCTION PortableMDArray(T *p, NXs... nxs) noexcept { NewPortableMDArray(p, nxs...); } //: PortableMDArray(p, make_nxs_array(nxs...), make_strides_array(), N) { @@ -117,16 +93,13 @@ class PortableMDArray { // copies. // PortableMDArray(const PortableMDArray &t) noexcept; // PortableMDArray &operator=(const PortableMDArray &t) noexcept; - PortableMDArray(const PortableMDArray &src) noexcept { - nxs_ = src.nxs_; - rank_ = src.rank_; - strides_ = src.strides_; - if (src.pdata_) pdata_ = src.pdata_; - } + PortableMDArray(const this_type &src) noexcept + : nxs_(src.nxs_), rank_(src.rank_), strides_(src.strides_), + pdata_(src.pdata_ ? src.pdata_ : nullptr) {} - PortableMDArray &operator=(const PortableMDArray &src) noexcept { + PortableMDArray &operator=(const this_type &src) noexcept { if (this != &src) { - nxs_ = src.nxs; + nxs_ = src.nxs_; rank_ = src.rank_; pdata_ = src.pdata_; } @@ -144,7 +117,7 @@ class PortableMDArray { // (shallow swap) // Does not allocate memory for either array - void SwapPortableMDArray(PortableMDArray &array2) { + void SwapPortableMDArray(this_type &array2) { std::swap(pdata_, array2.pdata_); return; } @@ -235,29 +208,29 @@ class PortableMDArray { // "non-const variants" called for "PortableMDArray()" provide read/write // access via returning by reference, enabling assignment on returned // l-value, e.g.: a(3) = 3.0; - template - PORTABLE_FORCEINLINE_FUNCTION reference operator()(Is... idxs) noexcept { + template //, class = std::enable_if_t<(sizeof...(Is) > 3)>> + PORTABLE_FORCEINLINE_FUNCTION constexpr reference operator()(Is... idxs) noexcept { return pdata_[util::fast_findex({static_cast(idxs)...}, nxs_, strides_)]; } - template - PORTABLE_FORCEINLINE_FUNCTION T &operator()(Is... idxs) const noexcept { + template //, class = std::enable_if_t<(sizeof...(Is) > 3)>> + PORTABLE_FORCEINLINE_FUNCTION constexpr T &operator()(Is... idxs) const noexcept { return pdata_[util::fast_findex({static_cast(idxs)...}, nxs_, strides_)]; } - PortableMDArray &operator*=(T scale) { + this_type &operator*=(T scale) { std::transform(pdata_, pdata_ + GetSize(), pdata_, [scale](T val) { return scale * val; }); return *this; } - PortableMDArray &operator+=(const PortableMDArray &other) { + this_type &operator+=(const this_type &other) { assert(GetSize() == other.GetSize()); std::transform(pdata_, pdata_ + GetSize(), other.pdata_, pdata_, std::plus()); return *this; } - PortableMDArray &operator-=(const PortableMDArray &other) { + this_type &operator-=(const this_type &other) { assert(GetSize() == other.GetSize()); std::transform(pdata_, pdata_ + GetSize(), other.pdata_, pdata_, std::minus()); return *this; @@ -265,11 +238,11 @@ class PortableMDArray { // Checks that arrays point to same data with same shape // note this POINTER equivalence, not data equivalence - bool operator==(const PortableMDArray &rhs) const { + bool operator==(const this_type &rhs) const { return (pdata_ == rhs.pdata_ && nxs_ == rhs.nxs_); // NB rank is implied } - bool operator!=(const PortableMDArray &other) const { return !(*this == other); } + bool operator!=(const this_type &other) const { return !(*this == other); } //---------------------------------------------------------------------------------------- //! \fn PortableMDArray::InitWithShallowSlice() @@ -280,9 +253,8 @@ class PortableMDArray { // entries of the src array for d &src, - const int dim, const int indx, - const int nvar) { + PORTABLE_FUNCTION void InitWithShallowSlice(const this_type &src, const int dim, + const int indx, const int nvar) { pdata_ = src.pdata_; std::size_t offs = indx; nxs_[dim - 1] = nvar; @@ -319,4 +291,5 @@ class PortableMDArray { public: }; +} // namespace PortsOfCall #endif // _PORTABLE_ARRAYS_HPP_ diff --git a/ports-of-call/utility/array_algo.hpp b/ports-of-call/utility/array_algo.hpp index e1d2e287..800f94de 100644 --- a/ports-of-call/utility/array_algo.hpp +++ b/ports-of-call/utility/array_algo.hpp @@ -54,13 +54,16 @@ constexpr auto is(std::integral_constant) { template using value_t = typename A::value_type; +// constexpr helper to get compile-time size of array A template -constexpr auto get_size(const A &a) { +PORTABLE_FORCEINLINE_FUNCTION constexpr auto get_size(const A &a) { return a.size(); } +// wraps a variadic set of variables into an array, +// making an attempt to cast them. template -constexpr auto wrap_vars(B... vv) { +PORTABLE_FORCEINLINE_FUNCTION constexpr auto wrap_vars(B... vv) { return A{static_cast>(vv)...}; } @@ -70,31 +73,39 @@ using reduction_value_t = decltype(std::declval()(std::declval>(), std::declval>())); namespace detail { -template +template PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_map_impl(A const &x, F f, std::index_sequence) { return std::array{f(x[Is])...}; } -template +template PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_map_impl(A const &x, B const &y, F f, std::index_sequence) { return A{f(x[Is], y[Is])...}; } -template +template PORTABLE_INLINE_FUNCTION constexpr auto array_reduce_impl(A const &x, Op op) { if constexpr ((l - f) == 1) return x[f]; else { - constexpr std::size_t n = l - f; + constexpr auto n = l - f; auto left_sum = array_reduce_impl(x, op); auto right_sum = array_reduce_impl(x, op); return op(left_sum, right_sum); } } +} // namespace detail + +/////////////////////////////////////////// +/// CMM: This verbose code is probably too +/// messy to keep, but I liked it so I wanted +/// to keep a record on a commit somehwere if +/// i ever wanted to lift it. /* +namespace detail{ template constexpr auto NREP = V; @@ -111,11 +122,8 @@ make_underfilled_array_impl(std::index_sequence, std::index_sequence((std::get(std::tie(vv...)))..., NREP...); } -*/ +} // -} // namespace detail - -/* template PORTABLE_FORCEINLINE_FUNCTION constexpr auto make_underfilled_reverse_array(B... vv) { constexpr auto D = get_size(A{}); @@ -123,8 +131,7 @@ PORTABLE_FORCEINLINE_FUNCTION constexpr auto make_underfilled_reverse_array(B... std::index_sequence_for{}, std::make_index_sequence{}, vv...); } -*/ -/* + template PORTABLE_FORCEINLINE_FUNCTION constexpr auto make_underfilled_array(B... vv) { constexpr auto D = get_size(A{}); @@ -133,27 +140,7 @@ PORTABLE_FORCEINLINE_FUNCTION constexpr auto make_underfilled_array(B... vv) { vv...); } */ -template class A, class T, auto O> -PORTABLE_FORCEINLINE_FUNCTION constexpr decltype(auto) -make_underfilled_array(const A &in) { - A out; - for (auto i = 0; i < in.size(); ++i) - out[i] = in[i]; - for (auto i = in.size(); i < out.size(); ++i) - out[i] = Fill; - return out; -} - -template class A, class T, auto O> -PORTABLE_FORCEINLINE_FUNCTION constexpr decltype(auto) -make_underfilled_reversed_array(const A &in) { - A out; - for (auto i = in.size() - 1; i >= 0; i--) - out[i] = in[i]; - for (auto i = in.size(); i < out.size(); ++i) - out[i] = Fill; - return out; -} +/////////////////////////////////////////// // maps an unary function f(x) to each array value, returning an array of results // x = {f(a[0]), f(a[1]),..., f(a[N-1])} @@ -164,28 +151,62 @@ PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_map(A const &x, F f) { // maps a binary function to each array value, returning an array of results // x = {f(a[0], b[0]), f(a[1], b[1]),..., f(a[N-1], b[N-1])} - template PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_map(A const &x, B const &y, F f) { return detail::array_map_impl(x, y, f, std::make_index_sequence{}); } -template > +// does a reduction on the sub-array A[I..F) +// also used to construct full reduction A[..N) +template > PORTABLE_FORCEINLINE_FUNCTION constexpr T array_partial_reduce(A x, T initial_value, Op op) { - static_assert(N <= x.size()); - if constexpr ((N - I) == 0) + static_assert(F <= get_size(A{})); + if constexpr ((F - I) == 0) return initial_value; else - return detail::array_reduce_impl(x, op); + return detail::array_reduce_impl(x, op); } // performs a reduction on an array of values // e.g. x = sum_i a[i] +// NB Op is evaluated as a magma, i.e. Op(a,b,c,d) = Op(a,b) + Op(c,d) +// it executes O(ln n) operations. It's possible this obfuscates +// potential compiler optimizations for small (2-3) sized arrays template > PORTABLE_FORCEINLINE_FUNCTION constexpr T array_reduce(A x, T initial_value, Op op) { - return array_partial_reduce<0, x.size()>(x, initial_value, op); + return array_partial_reduce<0, get_size(A{})>(x, initial_value, op); +} + +// if input values are less than output array length, +// this function returns an array with "filled" values. +// e.g. +// in: {x, y} +// out: {x, y, 1, 1, 1} +template class A, class T, auto O> +PORTABLE_FORCEINLINE_FUNCTION constexpr decltype(auto) +make_underfilled_array(const A &in) { + A out; + for (auto i = 0; i < in.size(); ++i) + out[i] = in[i]; + for (auto i = in.size(); i < out.size(); ++i) + out[i] = Fill; + return out; +} + +// same as above, but reverses input values +// e.g. +// in: {x, y, z} +// out: {z, y, x, 1, 1} +template class A, class T, auto O> +PORTABLE_FORCEINLINE_FUNCTION constexpr decltype(auto) +make_underfilled_reversed_array(const A &in) { + A out; + for (auto i = in.size() - 1; i >= 0; i--) + out[i] = in[i]; + for (auto i = in.size(); i < out.size(); ++i) + out[i] = Fill; + return out; } } // namespace util diff --git a/ports-of-call/utility/index_algo.hpp b/ports-of-call/utility/index_algo.hpp index ec5fc0bc..fe2796ac 100644 --- a/ports-of-call/utility/index_algo.hpp +++ b/ports-of-call/utility/index_algo.hpp @@ -16,18 +16,21 @@ #include "../portability.hpp" #include "array_algo.hpp" +#include namespace util { // returns the stride (offset) needed to move one unit in dimension D template PORTABLE_FORCEINLINE_FUNCTION static constexpr auto get_stride(A const &dim) { - - // NB: column major + // NB: storage order row-major return array_partial_reduce(dim, value_t{1}, std::multiplies{}); } +// I will rejoice when C++20 arrives and we can +// template lambdas instead of having to write +// seperate "detail" and "impls" for index_sequences namespace detail { template PORTABLE_FORCEINLINE_FUNCTION static constexpr auto @@ -36,6 +39,12 @@ get_strides_impl(A const &dim, std::index_sequence) { } } // namespace detail +// return an array of strides +// dim: input array of dimenstions +// returns array of strides +// e.g. +// dim = {NX, NY, NZ} +// return = {1, NX, NX * NY} template PORTABLE_FORCEINLINE_FUNCTION static constexpr auto get_strides(A const &dim) { return detail::get_strides_impl(dim, std::make_index_sequence{}); @@ -44,11 +53,19 @@ PORTABLE_FORCEINLINE_FUNCTION static constexpr auto get_strides(A const &dim) { // returns the flat (1D) index of the md index set {i,j,k} // NB: fast because the strides are provided and don't need to be recomputed template -PORTABLE_FORCEINLINE_FUNCTION static constexpr auto -fast_findex(A const &ijk, A const &dim, A const &stride) { +PORTABLE_FUNCTION static constexpr auto fast_findex(A const &ijk, A const &dim, + A const &stride) { // TODO: assert ijk in bounds + constexpr auto N = get_size(A{}); + if constexpr (N == 1) { + return ijk[0]; + } else if constexpr (N == 2) { + return ijk[1] + ijk[0] * stride[0]; + } + // return std::inner_product(std::begin(ijk), std::end(ijk), std::begin(stride), + // value_t{0}); return array_reduce(array_map(ijk, stride, [](auto a, auto b) { return a * b; }), - value_t{0}, std::plus{}); + value_t{0}, std::plus>{}); } // same as fast_findex, except the strides are calculated on the fly diff --git a/test/test_mdidx.cpp b/test/test_mdidx.cpp index c7d9b341..a5e504b1 100644 --- a/test/test_mdidx.cpp +++ b/test/test_mdidx.cpp @@ -13,9 +13,10 @@ #include #include -#include +#include #include +#include "catch2/benchmark/catch_chronometer.hpp" #include "test_utilities.hpp" #ifndef CATCH_CONFIG_FAST_COMPILE @@ -26,6 +27,8 @@ #include #endif +using namespace PortsOfCall; + TEST_CASE("PortableMDArrays Sizes are sane", "[PortableMDArray]") { using tape_t = std::vector; @@ -61,9 +64,9 @@ TEST_CASE("Correct portable indexing", "[PortableMDArray]") { return [=](auto k, auto j, auto i) { return i + nx * j + nxny * k; }; }; - constexpr std::size_t NX = 32, NY = 64, NZ = 4; + constexpr std::size_t NX = 4, NY = 12, NZ = 3; constexpr std::size_t NC = NX * NY * NZ; - constexpr Real scale = 0.1; + constexpr Real scale = 1.0; // size in bytes constexpr const std::size_t NCb = NC * sizeof(Real); @@ -74,9 +77,7 @@ TEST_CASE("Correct portable indexing", "[PortableMDArray]") { tape_ref[n] = scale * static_cast(n); } - // device pointer Real *tape_d = (Real *)PORTABLE_MALLOC(NCb); - auto view_d = PortableMDArray(tape_d, NZ, NY, NX); // set device values @@ -87,12 +88,14 @@ TEST_CASE("Correct portable indexing", "[PortableMDArray]") { }); portableCopyToHost(tape_buf.data(), tape_d, NCb); + for (auto n = 0; n < NC; ++n) { + std::cout << "REF=" << tape_ref[n] << " BUF=" << tape_buf[n] << " n=" << n << "\n"; + } for (auto n = 0; n < NC; ++n) { INFO("REF=" << tape_ref[n] << " BUF=" << tape_buf[n] << " n=" << n); REQUIRE_THAT(tape_buf[n], Catch::Matchers::WithinRel(tape_ref[n])); } - PORTABLE_FREE(tape_d); } @@ -102,43 +105,172 @@ TEST_CASE("Correct portable indexing", "[PortableMDArray]") { // (you may need to escape the `!` char depending // on your shell) TEST_CASE("Benchmark Indexing", "[!benchmark]") { - constexpr std::size_t NX = 32, NY = 64, NZ = 16; + constexpr std::size_t NX = 16, NY = 16, NZ = 78; constexpr std::size_t NU = 256, NV = 256, NW = 128; // construct a series of PortableMDArrays // of different rank and sizes, and // iterates through all contiguous indexing. + Real a = 1.0; // clang-format off - SECTION("small") { - BENCHMARK("index calc 1D") + SECTION("small 1D") { + BENCHMARK_ADVANCED("baseline continguous")(Catch::Benchmark::Chronometer meter) + { + Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX); + + tape_d[0] = a; + meter.measure([=] {portableFor( + "fill 1D", + 1, NX, + PORTABLE_LAMBDA(auto i){tape_d[i]=a+tape_d[i-1];}); return 0;}); + + PORTABLE_FREE(tape_d); + return a; + }; + + + BENCHMARK_ADVANCED("PMD continguous (space optimized)")(Catch::Benchmark::Chronometer meter) { - return testing::idx_contiguous_bm(std::array{NX}); + Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX); + auto view_d = PortableMDArray(tape_d, NX); + + view_d(0) = a; + meter.measure([=] {portableFor( + "fill 1D", + 1, NX, + PORTABLE_LAMBDA(auto i){view_d(i)=a+view_d(i-1);});return 0;}); + + PORTABLE_FREE(tape_d); }; - BENCHMARK("index calc 2D") + + BENCHMARK_ADVANCED("PMD continguous (default size)")(Catch::Benchmark::Chronometer meter) { - return testing::idx_contiguous_bm(std::array{NX, NY}); + Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX); + auto view_d = PortableMDArray(tape_d, NX); + + view_d(0) = a; + meter.measure([=] {portableFor( + "fill 1D", + 1, NX, + PORTABLE_LAMBDA(auto i){view_d(i)=a+view_d(i-1);});return 0;}); + + auto b = view_d(0); + PORTABLE_FREE(tape_d); }; - BENCHMARK("index calc 3D") + + BENCHMARK_ADVANCED("baseline shuffle")(Catch::Benchmark::Chronometer meter) { - return testing::idx_contiguous_bm(std::array{NX, NY, NZ}); + Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX); + std::array offs; + std::iota(std::begin(offs), std::end(offs), 0); + std::shuffle(std::begin(offs), std::end(offs), std::mt19937{std::random_device{}()}); + + meter.measure([=] {portableFor( + "fill 1D", + 0, NX, + PORTABLE_LAMBDA(auto i){tape_d[offs[i]]=i;}); return 0;}); + + PORTABLE_FREE(tape_d); }; + + BENCHMARK_ADVANCED("PMD shuffle (space optimized)")(Catch::Benchmark::Chronometer meter) + { + Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX); + auto view_d = PortableMDArray(tape_d, NX); + + std::array offs; + std::iota(std::begin(offs), std::end(offs), 0); + std::shuffle(std::begin(offs), std::end(offs), std::mt19937{std::random_device{}()}); + + meter.measure([=] {portableFor( + "fill 1D", + 1, NX, + PORTABLE_LAMBDA(auto i){view_d(offs[i])=i;});return 0;}); + + PORTABLE_FREE(tape_d); + }; + + } + SECTION("small 2D") { + BENCHMARK_ADVANCED("baseline continguous")(Catch::Benchmark::Chronometer meter) + { + Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX*NY); + + portableFor("prep 2d", 0, NY, 0, 1, PORTABLE_LAMBDA(auto j, auto i) { tape_d[i+j*NX] = a;}); + meter.measure([=] {portableFor( + "fill 1D", + 0, NY, 1, NX, + PORTABLE_LAMBDA(auto j, auto i){tape_d[i + j * NX]=a + tape_d[(i-1) + j * NX];}); return 0;}); - SECTION("big") { - BENCHMARK("index calc 1D") + PORTABLE_FREE(tape_d); + }; + BENCHMARK_ADVANCED("PMD contiguous (space optimized)")(Catch::Benchmark::Chronometer meter) { - return testing::idx_contiguous_bm(std::array{NU}); + Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX*NY); + auto view_d = PortableMDArray(tape_d, NY, NX); + + portableFor("prep 2d", 0, NY, 0, 1, PORTABLE_LAMBDA(auto j, auto i) { view_d(j, i) = a;}); + view_d(0,0) = a; + + meter.measure([=] {portableFor( + "fill 2D", + 0, NY, 1, NX, + PORTABLE_LAMBDA(auto j, auto i){view_d(j,i)=a + view_d(j, i-1);});return 0;}); + + PORTABLE_FREE(tape_d); }; - BENCHMARK("index calc 2D") + + BENCHMARK_ADVANCED("PMD continguous (default size)")(Catch::Benchmark::Chronometer meter) { - return testing::idx_contiguous_bm(std::array{NU, NV}); + Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX*NY); + auto view_d = PortableMDArray(tape_d, NY, NX); + + portableFor("prep 2d", 0, NY, 0, 1, PORTABLE_LAMBDA(auto j, auto i) { view_d(j, i) = a;}); + view_d(0,0) = a; + + meter.measure([=] {portableFor( + "fill 2D", + 0, NY, 1, NX, + PORTABLE_LAMBDA(auto j, auto i){view_d(j,i)=a + view_d(j, i-1);});return 0;}); + + PORTABLE_FREE(tape_d); }; - BENCHMARK("index calc 3D") + } +SECTION("small 5D"){ + + BENCHMARK_ADVANCED("baseline continguous")(Catch::Benchmark::Chronometer meter) { - return testing::idx_contiguous_bm(std::array{NU, NV, NW}); + Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX*NX*NY*NY*NZ); + + portableFor( + "prep 5D", + 0, NZ, 0, NY, 0, NY, 0, NX, 0, 1, + PORTABLE_LAMBDA(auto m, auto n, auto k, auto j, auto i){tape_d[i + j * NX + k * NX * NX + n * NX * NX * NY + m * NY * NY * NX * NX ]=a;}); + + meter.measure([=] {portableFor( + "fill 5D", + 0, NZ, 0, NY, 0, NY, 0, NX, 1, NX, + PORTABLE_LAMBDA(auto m, auto n, auto k, auto j, auto i){tape_d[i + j * NX + k * NX * NX + n * NX * NX * NY + m * NY * NY * NX * NX ]=a + tape_d[i + j * NX + k * NX * NX + n * NX * NX * NY + m * NY * NY * NX * NX ];});return 0;}); + + PORTABLE_FREE(tape_d); }; + BENCHMARK_ADVANCED("PMD continguous")(Catch::Benchmark::Chronometer meter) + { + Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX*NX*NY*NY*NZ); + auto view_d = PortableMDArray(tape_d, NZ, NY, NY, NX, NX); + + portableFor("prep 5d", 0, NZ, 0, NY, 0, NY, 0, NX, 0, 1, PORTABLE_LAMBDA(auto m, auto n, auto k, auto j, auto i) { view_d(m,n,k,j, i) = a;}); + meter.measure([=] {portableFor( + "fill 5D", + 0, NZ, 0, NY, 0, NY, 0, NX, 1, NX, + PORTABLE_LAMBDA(auto m, auto n, auto k, auto j, auto i){view_d(m,n,k,j,i)=a + view_d(m,n,k,j,i-1);});return 0;}); + + PORTABLE_FREE(tape_d); + }; } + // clang-format on } diff --git a/test/test_portability.cpp b/test/test_portability.cpp index 1135426f..19947768 100644 --- a/test/test_portability.cpp +++ b/test/test_portability.cpp @@ -42,7 +42,7 @@ TEST_CASE("PortableMDArrays can be allocated from a pointer", "[PortableMDArray] constexpr int N = 2; constexpr int M = 3; std::vector data(N * M); - PortableMDArray a; + PortsOfCall::PortableMDArray a; int tot = 0; for (int i = 0; i < N * M; i++) { data[i] = tot; @@ -66,7 +66,7 @@ TEST_CASE("PortableMDArrays can be allocated from a pointer", "[PortableMDArray] } SECTION("Identical slices of the same data should compare equal") { - PortableMDArray aslc1, aslc2; + PortsOfCall::PortableMDArray aslc1, aslc2; aslc1.InitWithShallowSlice(a, 1, 0, 2); aslc2.InitWithShallowSlice(a, 1, 0, 2); REQUIRE(aslc1 == aslc2); diff --git a/test/test_utilities.hpp b/test/test_utilities.hpp index 30ba2090..7d2023a1 100644 --- a/test/test_utilities.hpp +++ b/test/test_utilities.hpp @@ -14,10 +14,10 @@ #ifndef _TEST_TEST_UTILITIES_HPP_ #define _TEST_TEST_UTILITIES_HPP_ -#include #include #include #include +#include #include namespace testing { @@ -66,27 +66,22 @@ PORTABLE_INLINE_FUNCTION constexpr auto pf_invoke(View v, const Array &ext_par, template PORTABLE_FORCEINLINE_FUNCTION constexpr decltype(auto) mdview(Ptr *d, const Array &arr, std::index_sequence) { - return PortableMDArray(d, arr[I]...); + return PortsOfCall::PortableMDArray(d, arr[I]...); } -// stand up and strip down a benchmark -// input is a set of sizes -template , - typename I2 = std::make_index_sequence<2 * N>> -PORTABLE_FUNCTION auto idx_contiguous_bm(const std::array &nxa) { - auto nc = std::accumulate(std::begin(nxa), std::end(nxa), 1, std::multiplies{}); +// stand up a benchmark +template , + typename I2 = std::make_index_sequence<2 * N>, + class = std::enable_if_t...>>> +PORTABLE_FUNCTION auto alloc_tape(Ns... ns) { + const auto nc = (ns * ... * std::size_t{1}); Real *tape_d = (Real *)PORTABLE_MALLOC(nc * sizeof(Real)); - auto view_d = mdview(tape_d, nxa, I1{}); + auto view_d = mdview(tape_d, std::array{ns...}, I1{}); - pf_invoke(view_d, nxa, I2{}); - - PORTABLE_FREE(tape_d); - - // return here is arbitrary, Catch2 recommends this to avoid - // optimizing out a call. - return 1; + return std::tuple(tape_d, view_d); } } // namespace testing From f2431e13894243638e49a69e17322ff03c8016f2 Mon Sep 17 00:00:00 2001 From: Christopher Mauney Date: Fri, 15 Nov 2024 09:23:00 -0700 Subject: [PATCH 15/22] add span --- ports-of-call/portable_arrays.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index 6abab7f4..c0c3ae0c 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -26,6 +26,7 @@ #include "array.hpp" #include "portability.hpp" +#include "span.hpp" #include "utility/array_algo.hpp" #include "utility/index_algo.hpp" #include @@ -283,12 +284,10 @@ class PortableMDArray { strides_ = util::make_underfilled_array(util::get_strides(nxs_)); } - T *pdata_; + span pdata_; IArray nxs_; IArray strides_; int rank_; - - public: }; } // namespace PortsOfCall From 3c769f2da4760cf46110fbaae7a30cec7ff56cf8 Mon Sep 17 00:00:00 2001 From: Christopher Mauney Date: Tue, 19 Nov 2024 09:00:03 -0700 Subject: [PATCH 16/22] undo span changes --- ports-of-call/portable_arrays.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index c0c3ae0c..ae2837ef 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -284,7 +284,7 @@ class PortableMDArray { strides_ = util::make_underfilled_array(util::get_strides(nxs_)); } - span pdata_; + T *pdata_; IArray nxs_; IArray strides_; int rank_; From 9733f8044ce56308299097346d9bcf0d548f1b21 Mon Sep 17 00:00:00 2001 From: Christopher Michael Mauney Date: Tue, 19 Nov 2024 10:56:19 -0700 Subject: [PATCH 17/22] removed benchmark testing; fix some tests for gpu --- ports-of-call/portable_arrays.hpp | 32 +++--- test/CMakeLists.txt | 1 + test/test_mdidx.cpp | 181 +----------------------------- test/test_utilities.hpp | 2 +- 4 files changed, 21 insertions(+), 195 deletions(-) diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index ae2837ef..f3ba64c5 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -130,28 +130,28 @@ class PortableMDArray { } // legacy API, TODO: deprecate - [[deprecated("Use GetDim instead.")]] - PORTABLE_FORCEINLINE_FUNCTION int GetDim1() const { + [[deprecated("Use GetDim instead.")]] PORTABLE_FORCEINLINE_FUNCTION int + GetDim1() const { return GetDim<1>(); } - [[deprecated("Use GetDim instead.")]] - PORTABLE_FORCEINLINE_FUNCTION int GetDim2() const { + [[deprecated("Use GetDim instead.")]] PORTABLE_FORCEINLINE_FUNCTION int + GetDim2() const { return GetDim<2>(); } - [[deprecated("Use GetDim instead.")]] - PORTABLE_FORCEINLINE_FUNCTION int GetDim3() const { + [[deprecated("Use GetDim instead.")]] PORTABLE_FORCEINLINE_FUNCTION int + GetDim3() const { return GetDim<3>(); } - [[deprecated("Use GetDim instead.")]] - PORTABLE_FORCEINLINE_FUNCTION int GetDim4() const { + [[deprecated("Use GetDim instead.")]] PORTABLE_FORCEINLINE_FUNCTION int + GetDim4() const { return GetDim<4>(); } - [[deprecated("Use GetDim instead.")]] - PORTABLE_FORCEINLINE_FUNCTION int GetDim5() const { + [[deprecated("Use GetDim instead.")]] PORTABLE_FORCEINLINE_FUNCTION int + GetDim5() const { return GetDim<5>(); } - [[deprecated("Use GetDim instead.")]] - PORTABLE_FORCEINLINE_FUNCTION int GetDim6() const { + [[deprecated("Use GetDim instead.")]] PORTABLE_FORCEINLINE_FUNCTION int + GetDim6() const { return GetDim<6>(); } PORTABLE_INLINE_FUNCTION int GetDim(size_t i) const { @@ -280,8 +280,12 @@ class PortableMDArray { PORTABLE_INLINE_FUNCTION constexpr auto update_layout(NXs... nxs) noexcept { rank_ = N; - nxs_ = util::make_underfilled_array(util::wrap_vars>(nxs...)); - strides_ = util::make_underfilled_array(util::get_strides(nxs_)); + // NB: the functions here have trouble with template argument deduction + // on some compilers, so they are hand-written. + auto szs = util::wrap_vars>(nxs...); + nxs_ = util::make_underfilled_array(szs); + auto sds = util::get_strides(nxs_); + strides_ = util::make_underfilled_array(sds); } T *pdata_; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 22445319..39dfba58 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -68,5 +68,6 @@ target_sources(test_portsofcall test_span.cpp test_static_vector.cpp test_static_vector_iterator.cpp + test_mdidx.cpp "$<$:test_static_vector_kokkos.cpp>" ) diff --git a/test/test_mdidx.cpp b/test/test_mdidx.cpp index a5e504b1..108f6a20 100644 --- a/test/test_mdidx.cpp +++ b/test/test_mdidx.cpp @@ -60,9 +60,6 @@ TEST_CASE("PortableMDArrays Sizes are sane", "[PortableMDArray]") { TEST_CASE("Correct portable indexing", "[PortableMDArray]") { // layout - auto iflat = [](auto nx, auto nxny) { - return [=](auto k, auto j, auto i) { return i + nx * j + nxny * k; }; - }; constexpr std::size_t NX = 4, NY = 12, NZ = 3; constexpr std::size_t NC = NX * NY * NZ; @@ -84,7 +81,7 @@ TEST_CASE("Correct portable indexing", "[PortableMDArray]") { portableFor( "set unique val", 0, NZ, 0, NY, 0, NX, PORTABLE_LAMBDA(const int &k, const int &j, const int &i) { - view_d(k, j, i) = scale * iflat(NX, NX * NY)(k, j, i); + view_d(k, j, i) = scale * (i + NX * j + NX * NY * k); }); portableCopyToHost(tape_buf.data(), tape_d, NCb); @@ -98,179 +95,3 @@ TEST_CASE("Correct portable indexing", "[PortableMDArray]") { } PORTABLE_FREE(tape_d); } - -// runs benchmarks for indexing. -// note: to run, execute -// ./tests/test_portsofcall "[!benchmark]" -// (you may need to escape the `!` char depending -// on your shell) -TEST_CASE("Benchmark Indexing", "[!benchmark]") { - constexpr std::size_t NX = 16, NY = 16, NZ = 78; - constexpr std::size_t NU = 256, NV = 256, NW = 128; - - // construct a series of PortableMDArrays - // of different rank and sizes, and - // iterates through all contiguous indexing. - - Real a = 1.0; - // clang-format off - SECTION("small 1D") { - BENCHMARK_ADVANCED("baseline continguous")(Catch::Benchmark::Chronometer meter) - { - Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX); - - tape_d[0] = a; - meter.measure([=] {portableFor( - "fill 1D", - 1, NX, - PORTABLE_LAMBDA(auto i){tape_d[i]=a+tape_d[i-1];}); return 0;}); - - PORTABLE_FREE(tape_d); - return a; - }; - - - BENCHMARK_ADVANCED("PMD continguous (space optimized)")(Catch::Benchmark::Chronometer meter) - { - Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX); - auto view_d = PortableMDArray(tape_d, NX); - - view_d(0) = a; - meter.measure([=] {portableFor( - "fill 1D", - 1, NX, - PORTABLE_LAMBDA(auto i){view_d(i)=a+view_d(i-1);});return 0;}); - - PORTABLE_FREE(tape_d); - }; - - BENCHMARK_ADVANCED("PMD continguous (default size)")(Catch::Benchmark::Chronometer meter) - { - Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX); - auto view_d = PortableMDArray(tape_d, NX); - - view_d(0) = a; - meter.measure([=] {portableFor( - "fill 1D", - 1, NX, - PORTABLE_LAMBDA(auto i){view_d(i)=a+view_d(i-1);});return 0;}); - - auto b = view_d(0); - PORTABLE_FREE(tape_d); - }; - - BENCHMARK_ADVANCED("baseline shuffle")(Catch::Benchmark::Chronometer meter) - { - Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX); - std::array offs; - std::iota(std::begin(offs), std::end(offs), 0); - std::shuffle(std::begin(offs), std::end(offs), std::mt19937{std::random_device{}()}); - - meter.measure([=] {portableFor( - "fill 1D", - 0, NX, - PORTABLE_LAMBDA(auto i){tape_d[offs[i]]=i;}); return 0;}); - - PORTABLE_FREE(tape_d); - }; - - BENCHMARK_ADVANCED("PMD shuffle (space optimized)")(Catch::Benchmark::Chronometer meter) - { - Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX); - auto view_d = PortableMDArray(tape_d, NX); - - std::array offs; - std::iota(std::begin(offs), std::end(offs), 0); - std::shuffle(std::begin(offs), std::end(offs), std::mt19937{std::random_device{}()}); - - meter.measure([=] {portableFor( - "fill 1D", - 1, NX, - PORTABLE_LAMBDA(auto i){view_d(offs[i])=i;});return 0;}); - - PORTABLE_FREE(tape_d); - }; - - - } - SECTION("small 2D") { - BENCHMARK_ADVANCED("baseline continguous")(Catch::Benchmark::Chronometer meter) - { - Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX*NY); - - portableFor("prep 2d", 0, NY, 0, 1, PORTABLE_LAMBDA(auto j, auto i) { tape_d[i+j*NX] = a;}); - meter.measure([=] {portableFor( - "fill 1D", - 0, NY, 1, NX, - PORTABLE_LAMBDA(auto j, auto i){tape_d[i + j * NX]=a + tape_d[(i-1) + j * NX];}); return 0;}); - - PORTABLE_FREE(tape_d); - }; - BENCHMARK_ADVANCED("PMD contiguous (space optimized)")(Catch::Benchmark::Chronometer meter) - { - Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX*NY); - auto view_d = PortableMDArray(tape_d, NY, NX); - - portableFor("prep 2d", 0, NY, 0, 1, PORTABLE_LAMBDA(auto j, auto i) { view_d(j, i) = a;}); - view_d(0,0) = a; - - meter.measure([=] {portableFor( - "fill 2D", - 0, NY, 1, NX, - PORTABLE_LAMBDA(auto j, auto i){view_d(j,i)=a + view_d(j, i-1);});return 0;}); - - PORTABLE_FREE(tape_d); - }; - - BENCHMARK_ADVANCED("PMD continguous (default size)")(Catch::Benchmark::Chronometer meter) - { - Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX*NY); - auto view_d = PortableMDArray(tape_d, NY, NX); - - portableFor("prep 2d", 0, NY, 0, 1, PORTABLE_LAMBDA(auto j, auto i) { view_d(j, i) = a;}); - view_d(0,0) = a; - - meter.measure([=] {portableFor( - "fill 2D", - 0, NY, 1, NX, - PORTABLE_LAMBDA(auto j, auto i){view_d(j,i)=a + view_d(j, i-1);});return 0;}); - - PORTABLE_FREE(tape_d); - }; - } -SECTION("small 5D"){ - - BENCHMARK_ADVANCED("baseline continguous")(Catch::Benchmark::Chronometer meter) - { - Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX*NX*NY*NY*NZ); - - portableFor( - "prep 5D", - 0, NZ, 0, NY, 0, NY, 0, NX, 0, 1, - PORTABLE_LAMBDA(auto m, auto n, auto k, auto j, auto i){tape_d[i + j * NX + k * NX * NX + n * NX * NX * NY + m * NY * NY * NX * NX ]=a;}); - - meter.measure([=] {portableFor( - "fill 5D", - 0, NZ, 0, NY, 0, NY, 0, NX, 1, NX, - PORTABLE_LAMBDA(auto m, auto n, auto k, auto j, auto i){tape_d[i + j * NX + k * NX * NX + n * NX * NX * NY + m * NY * NY * NX * NX ]=a + tape_d[i + j * NX + k * NX * NX + n * NX * NX * NY + m * NY * NY * NX * NX ];});return 0;}); - - PORTABLE_FREE(tape_d); - }; - - BENCHMARK_ADVANCED("PMD continguous")(Catch::Benchmark::Chronometer meter) - { - Real *tape_d = (Real *)PORTABLE_MALLOC(sizeof(Real)*NX*NX*NY*NY*NZ); - auto view_d = PortableMDArray(tape_d, NZ, NY, NY, NX, NX); - - portableFor("prep 5d", 0, NZ, 0, NY, 0, NY, 0, NX, 0, 1, PORTABLE_LAMBDA(auto m, auto n, auto k, auto j, auto i) { view_d(m,n,k,j, i) = a;}); - meter.measure([=] {portableFor( - "fill 5D", - 0, NZ, 0, NY, 0, NY, 0, NX, 1, NX, - PORTABLE_LAMBDA(auto m, auto n, auto k, auto j, auto i){view_d(m,n,k,j,i)=a + view_d(m,n,k,j,i-1);});return 0;}); - - PORTABLE_FREE(tape_d); - }; - } - - // clang-format on -} diff --git a/test/test_utilities.hpp b/test/test_utilities.hpp index 7d2023a1..c9e8f1cf 100644 --- a/test/test_utilities.hpp +++ b/test/test_utilities.hpp @@ -55,7 +55,7 @@ PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_fix(const std::array &a // this was written ad-hoc, and could be more general // (e.g. pass label, functor in lambda) template -PORTABLE_INLINE_FUNCTION constexpr auto pf_invoke(View v, const Array &ext_par, +PORTABLE_INLINE_FUNCTION constexpr void pf_invoke(View v, const Array &ext_par, std::index_sequence) { auto stud_arr = array_fix(ext_par); From 68bae3b3b8d740af45eb537071dfeca382ec6bfd Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Fri, 13 Dec 2024 09:36:16 -0700 Subject: [PATCH 18/22] fix compiler errors. --- ports-of-call/portable_arrays.hpp | 18 ++++++++++-------- ports-of-call/utility/array_algo.hpp | 10 ++++++---- ports-of-call/utility/index_algo.hpp | 4 ++-- test/test_mdidx.cpp | 9 +++++---- test/test_portability.cpp | 3 ++- 5 files changed, 25 insertions(+), 19 deletions(-) diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index f3ba64c5..547b15ce 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -39,7 +39,7 @@ namespace PortsOfCall { // maximum number of dimensions -constexpr std::size_t MAXDIM = 6; +constexpr std::size_t DEFAULT_MAXDIM = 6; namespace detail { // convert `index_sequence` values to constant @@ -61,9 +61,10 @@ using Array = PortsOfCall::array; template using IArray = Array; -template +template class PortableMDArray { public: + static constexpr const std::size_t MAXDIM = D; using this_type = PortableMDArray; using element_type = T; using value_type = typename std::remove_cv::type; @@ -89,14 +90,14 @@ class PortableMDArray { // ctors // default ctor: simply set null PortableMDArray PORTABLE_FUNCTION - PortableMDArray() noexcept : pdata_(nullptr), nxs_{{0}}, rank_{0} {} + PortableMDArray() noexcept : pdata_(nullptr), nxs_{{0}}, strides_{{0}}, rank_{0} {} // define copy constructor and overload assignment operator so both do deep // copies. // PortableMDArray(const PortableMDArray &t) noexcept; // PortableMDArray &operator=(const PortableMDArray &t) noexcept; PortableMDArray(const this_type &src) noexcept - : nxs_(src.nxs_), rank_(src.rank_), strides_(src.strides_), - pdata_(src.pdata_ ? src.pdata_ : nullptr) {} + : pdata_(src.pdata_ ? src.pdata_ : nullptr), nxs_(src.nxs_), strides_(src.strides_), + rank_(src.rank_) {} PortableMDArray &operator=(const this_type &src) noexcept { if (this != &src) { @@ -254,17 +255,18 @@ class PortableMDArray { // entries of the src array for d class A, class T, auto O> PORTABLE_FORCEINLINE_FUNCTION constexpr decltype(auto) make_underfilled_array(const A &in) { + using i_t = typename A::size_type; A out; - for (auto i = 0; i < in.size(); ++i) + for (i_t i = 0; i < in.size(); ++i) out[i] = in[i]; - for (auto i = in.size(); i < out.size(); ++i) + for (i_t i = in.size(); i < out.size(); ++i) out[i] = Fill; return out; } @@ -201,10 +202,11 @@ make_underfilled_array(const A &in) { template class A, class T, auto O> PORTABLE_FORCEINLINE_FUNCTION constexpr decltype(auto) make_underfilled_reversed_array(const A &in) { + using i_t = typename A::size_type; A out; - for (auto i = in.size() - 1; i >= 0; i--) + for (i_t i = in.size() - 1; i >= 0; i--) out[i] = in[i]; - for (auto i = in.size(); i < out.size(); ++i) + for (i_t i = in.size(); i < out.size(); ++i) out[i] = Fill; return out; } diff --git a/ports-of-call/utility/index_algo.hpp b/ports-of-call/utility/index_algo.hpp index fe2796ac..cd5fbd9b 100644 --- a/ports-of-call/utility/index_algo.hpp +++ b/ports-of-call/utility/index_algo.hpp @@ -53,8 +53,8 @@ PORTABLE_FORCEINLINE_FUNCTION static constexpr auto get_strides(A const &dim) { // returns the flat (1D) index of the md index set {i,j,k} // NB: fast because the strides are provided and don't need to be recomputed template -PORTABLE_FUNCTION static constexpr auto fast_findex(A const &ijk, A const &dim, - A const &stride) { +PORTABLE_FUNCTION static constexpr auto +fast_findex(A const &ijk, [[maybe_unused]] A const &dim, A const &stride) { // TODO: assert ijk in bounds constexpr auto N = get_size(A{}); if constexpr (N == 1) { diff --git a/test/test_mdidx.cpp b/test/test_mdidx.cpp index 108f6a20..7ef0d15b 100644 --- a/test/test_mdidx.cpp +++ b/test/test_mdidx.cpp @@ -28,6 +28,7 @@ #endif using namespace PortsOfCall; +constexpr const std::size_t MAXDIM = DEFAULT_MAXDIM; TEST_CASE("PortableMDArrays Sizes are sane", "[PortableMDArray]") { using tape_t = std::vector; @@ -38,7 +39,7 @@ TEST_CASE("PortableMDArrays Sizes are sane", "[PortableMDArray]") { std::vector dats; std::partial_sum(nxs.cbegin(), nxs.cend(), subsz.begin(), std::multiplies()); - for (auto i = 0; i < MAXDIM; ++i) { + for (std::size_t i = 0; i < MAXDIM; ++i) { dats.push_back(std::vector(subsz[i], nxs[i])); } @@ -70,7 +71,7 @@ TEST_CASE("Correct portable indexing", "[PortableMDArray]") { // vector length N on host of Real std::vector tape_ref(NC), tape_buf(NC); - for (auto n = 0; n < NC; ++n) { + for (std::size_t n = 0; n < NC; ++n) { tape_ref[n] = scale * static_cast(n); } @@ -85,11 +86,11 @@ TEST_CASE("Correct portable indexing", "[PortableMDArray]") { }); portableCopyToHost(tape_buf.data(), tape_d, NCb); - for (auto n = 0; n < NC; ++n) { + for (std::size_t n = 0; n < NC; ++n) { std::cout << "REF=" << tape_ref[n] << " BUF=" << tape_buf[n] << " n=" << n << "\n"; } - for (auto n = 0; n < NC; ++n) { + for (std::size_t n = 0; n < NC; ++n) { INFO("REF=" << tape_ref[n] << " BUF=" << tape_buf[n] << " n=" << n); REQUIRE_THAT(tape_buf[n], Catch::Matchers::WithinRel(tape_ref[n])); } diff --git a/test/test_portability.cpp b/test/test_portability.cpp index 811f6322..b2542b4c 100644 --- a/test/test_portability.cpp +++ b/test/test_portability.cpp @@ -87,7 +87,8 @@ TEST_CASE("portableCopy works with all portability strategies", "[portableCopy]" Real *a = (Real *)PORTABLE_MALLOC(Nb); // set device values to 0 - portableFor("set to 0", 0, N, PORTABLE_LAMBDA(const int &i) { a[i] = 0.0; }); + portableFor( + "set to 0", 0, N, PORTABLE_LAMBDA(const int &i) { a[i] = 0.0; }); // set host values to reference for (size_t i = 0; i < N; ++i) { From d47b0756ebe05577b111dc1d54c43da5897ff811 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Fri, 13 Dec 2024 09:41:36 -0700 Subject: [PATCH 19/22] return deprecated GetDim calls. Been long enough and Chris's change is already API breaking --- ports-of-call/portable_arrays.hpp | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index 547b15ce..73539608 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -130,31 +130,6 @@ class PortableMDArray { return nxs_[rank_ - I]; } - // legacy API, TODO: deprecate - [[deprecated("Use GetDim instead.")]] PORTABLE_FORCEINLINE_FUNCTION int - GetDim1() const { - return GetDim<1>(); - } - [[deprecated("Use GetDim instead.")]] PORTABLE_FORCEINLINE_FUNCTION int - GetDim2() const { - return GetDim<2>(); - } - [[deprecated("Use GetDim instead.")]] PORTABLE_FORCEINLINE_FUNCTION int - GetDim3() const { - return GetDim<3>(); - } - [[deprecated("Use GetDim instead.")]] PORTABLE_FORCEINLINE_FUNCTION int - GetDim4() const { - return GetDim<4>(); - } - [[deprecated("Use GetDim instead.")]] PORTABLE_FORCEINLINE_FUNCTION int - GetDim5() const { - return GetDim<5>(); - } - [[deprecated("Use GetDim instead.")]] PORTABLE_FORCEINLINE_FUNCTION int - GetDim6() const { - return GetDim<6>(); - } PORTABLE_INLINE_FUNCTION int GetDim(size_t i) const { // TODO: remove if performance cirtical assert(0 < i && i <= 6 && "PortableMDArrays are max 6D"); From e180050b57f27bc38d8e43244df761225565dab0 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Fri, 13 Dec 2024 10:01:24 -0700 Subject: [PATCH 20/22] fast index type punning --- ports-of-call/portable_arrays.hpp | 20 ++++++++++++++++++-- ports-of-call/utility/index_algo.hpp | 7 ++++++- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index 73539608..ce9a550c 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -187,12 +187,28 @@ class PortableMDArray { // l-value, e.g.: a(3) = 3.0; template //, class = std::enable_if_t<(sizeof...(Is) > 3)>> PORTABLE_FORCEINLINE_FUNCTION constexpr reference operator()(Is... idxs) noexcept { - return pdata_[util::fast_findex({static_cast(idxs)...}, nxs_, strides_)]; + index_type idx; + if constexpr (sizeof...(Is) == 0) { + idx = 0; + } else if constexpr (sizeof...(Is) == 1) { + idx = get_first(idxs...); + } else { + idx = util::fast_findex({static_cast(idxs)...}, nxs_, strides_); + } + return pdata_[idx]; } template //, class = std::enable_if_t<(sizeof...(Is) > 3)>> PORTABLE_FORCEINLINE_FUNCTION constexpr T &operator()(Is... idxs) const noexcept { - return pdata_[util::fast_findex({static_cast(idxs)...}, nxs_, strides_)]; + index_type idx; + if constexpr (sizeof...(Is) == 0) { + idx = 0; + } else if constexpr (sizeof...(Is) == 1) { + idx = get_first(idxs...); + } else { + idx = util::fast_findex({static_cast(idxs)...}, nxs_, strides_); + } + return pdata_[idx]; } this_type &operator*=(T scale) { diff --git a/ports-of-call/utility/index_algo.hpp b/ports-of-call/utility/index_algo.hpp index cd5fbd9b..74b9b9da 100644 --- a/ports-of-call/utility/index_algo.hpp +++ b/ports-of-call/utility/index_algo.hpp @@ -50,10 +50,15 @@ PORTABLE_FORCEINLINE_FUNCTION static constexpr auto get_strides(A const &dim) { return detail::get_strides_impl(dim, std::make_index_sequence{}); } +template +PORTABLE_FORCEINLINE_FUNCTION static constexpr auto get_first(I head, Is... tail) { + return head; +} + // returns the flat (1D) index of the md index set {i,j,k} // NB: fast because the strides are provided and don't need to be recomputed template -PORTABLE_FUNCTION static constexpr auto +PORTABLE_FORCEINLINE_FUNCTION static constexpr auto fast_findex(A const &ijk, [[maybe_unused]] A const &dim, A const &stride) { // TODO: assert ijk in bounds constexpr auto N = get_size(A{}); From b3aba8684fde0427ee4e0d53afb48659ab951dec Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Fri, 13 Dec 2024 10:03:39 -0700 Subject: [PATCH 21/22] namespace --- ports-of-call/portable_arrays.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index ce9a550c..1dec9312 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -191,7 +191,7 @@ class PortableMDArray { if constexpr (sizeof...(Is) == 0) { idx = 0; } else if constexpr (sizeof...(Is) == 1) { - idx = get_first(idxs...); + idx = util::get_first(idxs...); } else { idx = util::fast_findex({static_cast(idxs)...}, nxs_, strides_); } @@ -204,7 +204,7 @@ class PortableMDArray { if constexpr (sizeof...(Is) == 0) { idx = 0; } else if constexpr (sizeof...(Is) == 1) { - idx = get_first(idxs...); + idx = util::get_first(idxs...); } else { idx = util::fast_findex({static_cast(idxs)...}, nxs_, strides_); } From 3b5d9bae3a427704d91c8a04f3e6b78f3c9a1db3 Mon Sep 17 00:00:00 2001 From: Christopher Mauney Date: Wed, 18 Dec 2024 07:15:33 -0700 Subject: [PATCH 22/22] null case for index --- ports-of-call/utility/index_algo.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ports-of-call/utility/index_algo.hpp b/ports-of-call/utility/index_algo.hpp index 74b9b9da..8b6d120d 100644 --- a/ports-of-call/utility/index_algo.hpp +++ b/ports-of-call/utility/index_algo.hpp @@ -62,7 +62,9 @@ PORTABLE_FORCEINLINE_FUNCTION static constexpr auto fast_findex(A const &ijk, [[maybe_unused]] A const &dim, A const &stride) { // TODO: assert ijk in bounds constexpr auto N = get_size(A{}); - if constexpr (N == 1) { + if constexpr (N == 0) { + return 0; + } else if constexpr (N == 1) { return ijk[0]; } else if constexpr (N == 2) { return ijk[1] + ijk[0] * stride[0];