From 703397a56674bd52b0746b42389f8d1e4f6ccca0 Mon Sep 17 00:00:00 2001 From: Christopher Mauney Date: Wed, 4 Sep 2024 13:50:08 -0600 Subject: [PATCH] some sloppy juggling of types --- ports-of-call/portable_arrays.hpp | 175 +++++++++++++++------------ ports-of-call/utility/array_algo.hpp | 86 +++++++++++-- ports-of-call/utility/index_algo.hpp | 13 +- test/test_mdidx.cpp | 6 +- test/test_portability.cpp | 7 +- 5 files changed, 185 insertions(+), 102 deletions(-) diff --git a/ports-of-call/portable_arrays.hpp b/ports-of-call/portable_arrays.hpp index 0938492a..ca14d3f2 100644 --- a/ports-of-call/portable_arrays.hpp +++ b/ports-of-call/portable_arrays.hpp @@ -27,6 +27,7 @@ #include "array.hpp" #include "portability.hpp" #include "utility/array_algo.hpp" +#include "utility/index_algo.hpp" #include #include #include @@ -47,22 +48,62 @@ constexpr std::size_t to_const = V; } // namespace detail +#if __cplusplus == 202002L +template +using Array = std::array; +#else template using Array = PortsOfCall::array; +#endif // __cplusplus template using IArray = Array; +/* +#define ENABLE_CRTP(DerivedType) \ + constexpr DerivedType &base() noexcept { return static_cast(*this); } \ + constexpr const DerivedType &base() const noexcept { return static_cast(*this); } + + + +template +struct WithStoredStrides +{ + ENABLE_CRTP(Derived) + using size_type = typename Derived :: size_type; + using index_type = typename Derived :: index_type; + using strides_type = decltype(Derived::nx_); + + strides_type strides_; + + template + constexpr auto operator() noexcept(Is...is) + { + return util::findex({is...}, base.nxs_, strides_); + } +}; +*/ + template class PortableMDArray { public: + using element_type = T; + using value_type = typename std::remove_cv::type; + using index_type = std::ptrdiff_t; + using size_type = std::size_t; + using pointer = T *; + using const_pointer = const T *; + using reference = T &; + using const_reference = const T &; + // explicit initialization of objects PORTABLE_FUNCTION PortableMDArray(T *data, IArray extents, IArray strides, - std::size_t rank) noexcept + size_type rank) noexcept : pdata_(data), nxs_(extents), strides_(strides), rank_(rank) {} // variadic ctor, dispatch to explicit constructor - template + template PORTABLE_FUNCTION PortableMDArray(T *p, NXs... nxs) noexcept { NewPortableMDArray(p, nxs...); } //: PortableMDArray(p, make_nxs_array(nxs...), make_strides_array(), N) { @@ -111,62 +152,81 @@ class PortableMDArray { // functions to get array dimensions template PORTABLE_FUNCTION constexpr auto GetDim() const { - return nxs_[I]; + return nxs_[rank_ - I]; } // legacy API, TODO: deprecate - PORTABLE_FORCEINLINE_FUNCTION int GetDim1() const { return GetDim<0>(); } - PORTABLE_FORCEINLINE_FUNCTION int GetDim2() const { return GetDim<1>(); } - PORTABLE_FORCEINLINE_FUNCTION int GetDim3() const { return GetDim<2>(); } - PORTABLE_FORCEINLINE_FUNCTION int GetDim4() const { return GetDim<3>(); } - PORTABLE_FORCEINLINE_FUNCTION int GetDim5() const { return GetDim<4>(); } - PORTABLE_FORCEINLINE_FUNCTION int GetDim6() const { return GetDim<5>(); } + [[deprecated("Use GetDim instead.")]] + PORTABLE_FORCEINLINE_FUNCTION int GetDim1() const { + return GetDim<1>(); + } + [[deprecated("Use GetDim instead.")]] + PORTABLE_FORCEINLINE_FUNCTION int GetDim2() const { + return GetDim<2>(); + } + [[deprecated("Use GetDim instead.")]] + PORTABLE_FORCEINLINE_FUNCTION int GetDim3() const { + return GetDim<3>(); + } + [[deprecated("Use GetDim instead.")]] + PORTABLE_FORCEINLINE_FUNCTION int GetDim4() const { + return GetDim<4>(); + } + [[deprecated("Use GetDim instead.")]] + PORTABLE_FORCEINLINE_FUNCTION int GetDim5() const { + return GetDim<5>(); + } + [[deprecated("Use GetDim instead.")]] + PORTABLE_FORCEINLINE_FUNCTION int GetDim6() const { + return GetDim<6>(); + } PORTABLE_INLINE_FUNCTION int GetDim(size_t i) const { // TODO: remove if performance cirtical assert(0 < i && i <= 6 && "PortableMDArrays are max 6D"); switch (i) { case 1: - return GetDim1(); + return GetDim<1>(); case 2: - return GetDim2(); + return GetDim<2>(); case 3: - return GetDim3(); + return GetDim<3>(); case 4: - return GetDim4(); + return GetDim<4>(); case 5: - return GetDim5(); + return GetDim<5>(); case 6: - return GetDim6(); + return GetDim<6>(); } return -1; } - PORTABLE_FORCEINLINE_FUNCTION int GetSize() const { - return util::array_reduce(nxs_, 1, std::multiplies{}); + PORTABLE_FORCEINLINE_FUNCTION int GetSize() const noexcept { + return util::array_reduce(nxs_, 1, std::multiplies{}); } - PORTABLE_FORCEINLINE_FUNCTION std::size_t GetSizeInBytes() const { + PORTABLE_FORCEINLINE_FUNCTION size_type GetSizeInBytes() const noexcept { return GetSize() * sizeof(T); } - PORTABLE_INLINE_FUNCTION std::size_t GetRank() const { return rank_; } + PORTABLE_INLINE_FUNCTION size_type GetRank() const noexcept { return rank_; } template PORTABLE_INLINE_FUNCTION void Reshape(NXs... nxs) { - assert(util::array_reduce(IArray{nxs...}, 1, std::multiplies{}) == - GetSize()); + assert(util::array_reduce(IArray{nxs...}, 1, std::multiplies{}) == + GetSize() && + "New shape gives different size than existing size"); update_layout(nxs...); } - PORTABLE_FORCEINLINE_FUNCTION bool IsShallowSlice() { return true; } - PORTABLE_FORCEINLINE_FUNCTION bool IsEmpty() { return GetSize() < 1; } + PORTABLE_FORCEINLINE_FUNCTION bool IsShallowSlice() const noexcept { return true; } + PORTABLE_FORCEINLINE_FUNCTION bool IsEmpty() const noexcept { return GetSize() < 1; } // "getter" function to access private data member // TODO(felker): Replace this unrestricted "getter" with a limited, safer // alternative. // TODO(felker): Rename function. Conflicts with "PortableMDArray<> data" // OutputData member. - PORTABLE_FORCEINLINE_FUNCTION T *data() { return pdata_; } - PORTABLE_FORCEINLINE_FUNCTION const T *data() const { return pdata_; } - PORTABLE_FORCEINLINE_FUNCTION T *begin() { return pdata_; } - PORTABLE_FORCEINLINE_FUNCTION T *end() { return pdata_ + GetSize(); } + PORTABLE_FORCEINLINE_FUNCTION pointer data() noexcept { return pdata_; } + PORTABLE_FORCEINLINE_FUNCTION const_pointer data() const noexcept { return pdata_; } + PORTABLE_FORCEINLINE_FUNCTION pointer begin() noexcept { return pdata_; } + PORTABLE_FORCEINLINE_FUNCTION pointer end() noexcept { return pdata_ + GetSize(); } // overload "function call" operator() to access 1d-5d data // provides Fortran-like syntax for multidimensional arrays vs. "subscript" @@ -176,13 +236,13 @@ class PortableMDArray { // access via returning by reference, enabling assignment on returned // l-value, e.g.: a(3) = 3.0; template - PORTABLE_FORCEINLINE_FUNCTION T &operator()(const Is... idxs) { - return pdata_[compute_index(idxs...)]; + PORTABLE_FORCEINLINE_FUNCTION reference operator()(Is... idxs) noexcept { + return pdata_[util::fast_findex({static_cast(idxs)...}, nxs_, strides_)]; } template - PORTABLE_FORCEINLINE_FUNCTION T &operator()(const Is... idxs) const { - return pdata_[compute_index(idxs...)]; + PORTABLE_FORCEINLINE_FUNCTION T &operator()(Is... idxs) const noexcept { + return pdata_[util::fast_findex({static_cast(idxs)...}, nxs_, strides_)]; } PortableMDArray &operator*=(T scale) { @@ -240,60 +300,15 @@ class PortableMDArray { } private: - template - PORTABLE_FORCEINLINE_FUNCTION auto make_nxs_array(NX... nxs) { - IArray a; - IArray t{static_cast(nxs)...}; - for (auto i = 0; i < N; ++i) { - a[i] = t[N - i - 1]; - } - for (auto i = N; i < D; ++i) { - a[i] = 1; - } - return a; - } - - template - PORTABLE_INLINE_FUNCTION auto make_strides_array() { - IArray a; - a[0] = 1; - for (auto i = 1; i < N; ++i) { - a[i] = a[i - 1] * nxs_[i - 1]; - } - for (auto i = N; i < D; ++i) - a[i] = 0; - - return a; - } - // driver of nx array creation. // Note we iterate from the RIGHT, to match // what was done explicilt prior. - template - PORTABLE_INLINE_FUNCTION auto update_layout(NXs... nxs) { + template + PORTABLE_INLINE_FUNCTION constexpr auto update_layout(NXs... nxs) noexcept { rank_ = N; - nxs_ = make_nxs_array(nxs...); - strides_ = make_strides_array(); - } - - // compute_index base case, i.e. fastest moving index - template - PORTABLE_FORCEINLINE_FUNCTION size_t compute_index_impl(const size_t index) const { - return index; - } - - // compute_index general case, computing slower moving index strides - template - PORTABLE_FORCEINLINE_FUNCTION size_t compute_index_impl(const size_t index, - const Tail... tail) const { - return index * strides_[N - Ind - 1] + compute_index_impl(tail...); - } - // compute index driver. - template - PORTABLE_FORCEINLINE_FUNCTION std::size_t compute_index(const Indicies... idxs) const { - // adding `0` if sizeof...(Indicies) == 0 - return 0 + compute_index_impl<0, N>(idxs...); + nxs_ = util::make_underfilled_array(util::wrap_vars>(nxs...)); + strides_ = util::make_underfilled_array(util::get_strides(nxs_)); } T *pdata_; diff --git a/ports-of-call/utility/array_algo.hpp b/ports-of-call/utility/array_algo.hpp index 9cb2a516..e1d2e287 100644 --- a/ports-of-call/utility/array_algo.hpp +++ b/ports-of-call/utility/array_algo.hpp @@ -17,6 +17,7 @@ #include "../portability.hpp" #include #include +#include namespace util { @@ -53,6 +54,16 @@ constexpr auto is(std::integral_constant) { template using value_t = typename A::value_type; +template +constexpr auto get_size(const A &a) { + return a.size(); +} + +template +constexpr auto wrap_vars(B... vv) { + return A{static_cast>(vv)...}; +} + // determines the return type of Op(a,b) template using reduction_value_t = @@ -83,8 +94,67 @@ PORTABLE_INLINE_FUNCTION constexpr auto array_reduce_impl(A const &x, Op op) { } } +/* +template +constexpr auto NREP = V; + +template +PORTABLE_FORCEINLINE_FUNCTION constexpr auto +make_underfilled_reverse_array_impl(std::index_sequence, + std::index_sequence, B &&...vv) { + return wrap_vars((std::get(std::tie(vv...)))..., + NREP...); +} +template +PORTABLE_FORCEINLINE_FUNCTION constexpr auto +make_underfilled_array_impl(std::index_sequence, std::index_sequence, + B &&...vv) { + return wrap_vars((std::get(std::tie(vv...)))..., NREP...); +} +*/ + } // namespace detail +/* +template +PORTABLE_FORCEINLINE_FUNCTION constexpr auto make_underfilled_reverse_array(B... vv) { + constexpr auto D = get_size(A{}); + return detail::make_underfilled_reverse_array_impl( + std::index_sequence_for{}, std::make_index_sequence{}, + vv...); +} +*/ +/* +template +PORTABLE_FORCEINLINE_FUNCTION constexpr auto make_underfilled_array(B... vv) { + constexpr auto D = get_size(A{}); + return detail::make_underfilled_array_impl( + std::index_sequence_for{}, std::make_index_sequence{}, + vv...); +} +*/ +template class A, class T, auto O> +PORTABLE_FORCEINLINE_FUNCTION constexpr decltype(auto) +make_underfilled_array(const A &in) { + A out; + for (auto i = 0; i < in.size(); ++i) + out[i] = in[i]; + for (auto i = in.size(); i < out.size(); ++i) + out[i] = Fill; + return out; +} + +template class A, class T, auto O> +PORTABLE_FORCEINLINE_FUNCTION constexpr decltype(auto) +make_underfilled_reversed_array(const A &in) { + A out; + for (auto i = in.size() - 1; i >= 0; i--) + out[i] = in[i]; + for (auto i = in.size(); i < out.size(); ++i) + out[i] = Fill; + return out; +} + // maps an unary function f(x) to each array value, returning an array of results // x = {f(a[0]), f(a[1]),..., f(a[N-1])} template @@ -97,25 +167,25 @@ PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_map(A const &x, F f) { template PORTABLE_FORCEINLINE_FUNCTION constexpr auto array_map(A const &x, B const &y, F f) { - return detail::array_map_impl(x, y, f, is(x.size())); + return detail::array_map_impl(x, y, f, std::make_index_sequence{}); } -template > +template > PORTABLE_FORCEINLINE_FUNCTION constexpr T array_partial_reduce(A x, T initial_value, Op op) { - static_assert(I <= x.size()); - if constexpr (I == 0) + static_assert(N <= x.size()); + if constexpr ((N - I) == 0) return initial_value; else - return detail::array_reduce_impl<0, I>(x, op); + return detail::array_reduce_impl(x, op); } -template > - // performs a reduction on an array of values // e.g. x = sum_i a[i] +template > PORTABLE_FORCEINLINE_FUNCTION constexpr T array_reduce(A x, T initial_value, Op op) { - return array_partial_reduce(x, initial_value, op); + return array_partial_reduce<0, x.size()>(x, initial_value, op); } } // namespace util diff --git a/ports-of-call/utility/index_algo.hpp b/ports-of-call/utility/index_algo.hpp index 168676b9..ec5fc0bc 100644 --- a/ports-of-call/utility/index_algo.hpp +++ b/ports-of-call/utility/index_algo.hpp @@ -16,9 +16,6 @@ #include "../portability.hpp" #include "array_algo.hpp" -#include -#include -#include namespace util { @@ -27,7 +24,8 @@ template PORTABLE_FORCEINLINE_FUNCTION static constexpr auto get_stride(A const &dim) { // NB: column major - return array_partial_reduce(dim, value_t{1}, std::multiplies{}); + return array_partial_reduce(dim, value_t{1}, + std::multiplies{}); } namespace detail { @@ -36,11 +34,12 @@ PORTABLE_FORCEINLINE_FUNCTION static constexpr auto get_strides_impl(A const &dim, std::index_sequence) { return A{get_stride(dim)...}; } +} // namespace detail + template PORTABLE_FORCEINLINE_FUNCTION static constexpr auto get_strides(A const &dim) { - return detail::get_strides_impl(dim, is(dim.size())); + return detail::get_strides_impl(dim, std::make_index_sequence{}); } -} // namespace detail // returns the flat (1D) index of the md index set {i,j,k} // NB: fast because the strides are provided and don't need to be recomputed @@ -49,7 +48,7 @@ PORTABLE_FORCEINLINE_FUNCTION static constexpr auto fast_findex(A const &ijk, A const &dim, A const &stride) { // TODO: assert ijk in bounds return array_reduce(array_map(ijk, stride, [](auto a, auto b) { return a * b; }), - value_t{1}, std::plus{}); + value_t{0}, std::plus{}); } // same as fast_findex, except the strides are calculated on the fly diff --git a/test/test_mdidx.cpp b/test/test_mdidx.cpp index dbf9b964..c7d9b341 100644 --- a/test/test_mdidx.cpp +++ b/test/test_mdidx.cpp @@ -49,9 +49,9 @@ TEST_CASE("PortableMDArrays Sizes are sane", "[PortableMDArray]") { REQUIRE(pmd3.GetSize() == subsz[2]); REQUIRE(pmd3.GetRank() == 3); - REQUIRE(pmd3.GetDim1() == nxs[2]); - REQUIRE(pmd3.GetDim2() == nxs[1]); - REQUIRE(pmd3.GetDim3() == nxs[0]); + REQUIRE(pmd3.GetDim<1>() == nxs[2]); + REQUIRE(pmd3.GetDim<2>() == nxs[1]); + REQUIRE(pmd3.GetDim<3>() == nxs[0]); } } diff --git a/test/test_portability.cpp b/test/test_portability.cpp index ac7bd60d..1135426f 100644 --- a/test/test_portability.cpp +++ b/test/test_portability.cpp @@ -51,8 +51,8 @@ TEST_CASE("PortableMDArrays can be allocated from a pointer", "[PortableMDArray] a.NewPortableMDArray(data.data(), M, N); SECTION("Shape should be NxM") { - REQUIRE(a.GetDim1() == N); - REQUIRE(a.GetDim2() == M); + REQUIRE(a.GetDim<1>() == N); + REQUIRE(a.GetDim<2>() == M); } SECTION("Stride is as set by initialized pointer") { @@ -87,8 +87,7 @@ TEST_CASE("portableCopy works with all portability strategies", "[portableCopy]" Real *a = (Real *)PORTABLE_MALLOC(Nb); // set device values to 0 - portableFor( - "set to 0", 0, N, PORTABLE_LAMBDA(const int &i) { a[i] = 0.0; }); + portableFor("set to 0", 0, N, PORTABLE_LAMBDA(const int &i) { a[i] = 0.0; }); // set host values to reference for (size_t i = 0; i < N; ++i) {