diff --git a/benchmarks/lbm/src/ContainersD3QXX.h b/benchmarks/lbm/src/ContainersD3QXX.h index 9165d5de..0106e5da 100644 --- a/benchmarks/lbm/src/ContainersD3QXX.h +++ b/benchmarks/lbm/src/ContainersD3QXX.h @@ -566,9 +566,9 @@ struct ContainerFactoryD3QXX if (globalIdx.y == domainDim.y - 1) { popVal = -6. * Lattice::Memory::template getT() * ulb * - (Lattice::Memory::template getDirection().v[0] * ulid.v[0] + - Lattice::Memory::template getDirection().v[1] * ulid.v[1] + - Lattice::Memory::template getDirection().v[2] * ulid.v[2]); + (Lattice::Memory::template getDirection().x * ulid.x + + Lattice::Memory::template getDirection().y * ulid.y + + Lattice::Memory::template getDirection().z * ulid.z); } else { popVal = 0; } diff --git a/benchmarks/lbm/src/RunCavityTwoPop.cu b/benchmarks/lbm/src/RunCavityTwoPop.cu index a2c5ae76..a693cd17 100644 --- a/benchmarks/lbm/src/RunCavityTwoPop.cu +++ b/benchmarks/lbm/src/RunCavityTwoPop.cu @@ -77,9 +77,9 @@ auto run(Config& config, using M = typename Lattice::template RegisterMapper; if (globalIdx.y == domainDim.y - 1) { popVal = -6. * Lattice::Registers::template getT() * ulb * - (Lattice::Registers::template getVelocityComponent() * ulid.v[0] + - Lattice::Registers::template getVelocityComponent() * ulid.v[1] + - Lattice::Registers::template getVelocityComponent() * ulid.v[2]); + (Lattice::Registers::template getVelocityComponent() * ulid.x+ + Lattice::Registers::template getVelocityComponent() * ulid.y + + Lattice::Registers::template getVelocityComponent() * ulid.z); } else { popVal = 0; } diff --git a/libNeonCore/include/Neon/core/tools/io/IODense_imp.h b/libNeonCore/include/Neon/core/tools/io/IODense_imp.h index c18790c6..41286b46 100644 --- a/libNeonCore/include/Neon/core/tools/io/IODense_imp.h +++ b/libNeonCore/include/Neon/core/tools/io/IODense_imp.h @@ -172,10 +172,10 @@ auto IODense::operator()(const Integer_3d& xyz, return mImplicitFun(xyz, card); } const size_t pitch = - mPitch.mXpitch * xyz.x + - mPitch.mYpitch * xyz.y + - mPitch.mZpitch * xyz.z + - mPitch.mCpitch * card; + mPitch.x * xyz.x + + mPitch.y * xyz.y + + mPitch.z * xyz.z + + mPitch.w * card; return mMem[pitch]; } @@ -189,10 +189,10 @@ auto IODense::getReference(const Integer_3d& xyz, NEON_THROW_UNSUPPORTED_OPERATION("A IODense configure as IMPLICIT does not support such operation"); } const size_t pitch = - mPitch.mXpitch * xyz.x + - mPitch.mYpitch * xyz.y + - mPitch.mZpitch * xyz.z + - mPitch.mCpitch * card; + mPitch.x * xyz.x + + mPitch.y * xyz.y + + mPitch.z * xyz.z + + mPitch.w * card; return mMem[pitch]; } @@ -328,23 +328,23 @@ template ::initPitch() -> void { if (mOrder == Neon::MemoryLayout::structOfArrays) { - mPitch.mXpitch = 1; - mPitch.mYpitch = static_cast(mSpace.x); + mPitch.x = 1; + mPitch.y = static_cast(mSpace.x); - mPitch.mZpitch = static_cast(mSpace.x) * + mPitch.z = static_cast(mSpace.x) * static_cast(mSpace.y); - mPitch.mCpitch = static_cast(mSpace.x) * + mPitch.w = static_cast(mSpace.x) * static_cast(mSpace.y) * static_cast(mSpace.z); } else { - mPitch.mXpitch = mCardinality; - mPitch.mYpitch = mCardinality * + mPitch.x = mCardinality; + mPitch.y = mCardinality * static_cast(mSpace.x); - mPitch.mZpitch = mCardinality * + mPitch.z = mCardinality * static_cast(mSpace.x) * static_cast(mSpace.y); - mPitch.mCpitch = 1; + mPitch.w = 1; } } template diff --git a/libNeonCore/include/Neon/core/types/vec.h b/libNeonCore/include/Neon/core/types/vec.h index 38d597e7..b79a7a27 100644 --- a/libNeonCore/include/Neon/core/types/vec.h +++ b/libNeonCore/include/Neon/core/types/vec.h @@ -14,9 +14,7 @@ #include "Neon/core/types/vec/vec3d_generic.h" -#if !defined(NEON_WARP_COMPILATION) #include "Neon/core/types/vec/vec4d_generic.h" -#endif #include "Neon/core/types/vec/vecAlias.h" @@ -28,8 +26,10 @@ #include "Neon/core/types/vec/vec3d_integer.tdecl.h" #include "Neon/core/types/vec/vec3d_real.tdecl.h" -#if !defined(NEON_WARP_COMPILATION) + + #include "Neon/core/types/vec/vec4d_integer.tdecl.h" +#if !defined(NEON_WARP_COMPILATION) #include "Neon/core/types/vec/vec4d_real.tdecl.h" #endif diff --git a/libNeonCore/include/Neon/core/types/vec/vec4d_generic.h b/libNeonCore/include/Neon/core/types/vec/vec4d_generic.h index 4a4e747e..2faa5cbc 100644 --- a/libNeonCore/include/Neon/core/types/vec/vec4d_generic.h +++ b/libNeonCore/include/Neon/core/types/vec/vec4d_generic.h @@ -26,9 +26,10 @@ class Vec_4d w_axis = 2, num_axis = 4 }; - +#if !defined(NEON_WARP_COMPILATION) static_assert(!IsBaseTypeInteger, ""); static_assert(!IsBaseTypeReal, ""); +#endif }; diff --git a/libNeonCore/include/Neon/core/types/vec/vec4d_integer.tdecl.h b/libNeonCore/include/Neon/core/types/vec/vec4d_integer.tdecl.h index 940c6d2c..d99bd213 100644 --- a/libNeonCore/include/Neon/core/types/vec/vec4d_integer.tdecl.h +++ b/libNeonCore/include/Neon/core/types/vec/vec4d_integer.tdecl.h @@ -37,8 +37,8 @@ #include #include -//#include -//#include +// #include +// #include #include "Neon/core/types/BasicTypes.h" #include "Neon/core/types/Exceptions.h" @@ -52,8 +52,8 @@ namespace Neon { /** -* Partial specialization for integer types (int32_t, int64_t, size_t,...) -*/ + * Partial specialization for integer types (int32_t, int64_t, size_t,...) + */ template class Vec_4d { @@ -75,39 +75,7 @@ class Vec_4d c_axis = 3, num_axis = 4 }; - - union - { - element_t v[axis_e::num_axis]{0, 0, 0, 0}; - struct - { - union - { - element_t x; - element_t r; - element_t mXpitch; - }; - union - { - element_t y; - element_t s; - element_t mYpitch; - }; - union - { - element_t z; - element_t t; - element_t mZpitch; - }; - union - { - element_t w; - element_t u; - element_t mCpitch; - element_t c; - }; - }; - }; + Integer x, y, z, w; /** * Empty constructor. @@ -119,9 +87,9 @@ class Vec_4d ~Vec_4d() = default; /** - * All component of the 4d tuple are set to the same scalar value. - * @param[in] other the vector - */ + * All component of the 4d tuple are set to the same scalar value. + * @param[in] other the vector + */ NEON_CUDA_HOST_DEVICE inline Vec_4d(const self_t& other); /** @@ -132,13 +100,15 @@ class Vec_4d NEON_CUDA_HOST_DEVICE inline Vec_4d(const element_t other[self_t::num_axis]); +#if !defined(NEON_WARP_COMPILATION) NEON_CUDA_HOST_ONLY inline Vec_4d(std::initializer_list other); +#endif /** * Creates a 4d tuple with specific values for each component. * @param[in] px: value for the x component. * @param[in] py: value for the y component. * @param[in] pz: value for the z component. - * @param[in] pw: value for the w component. + * @param[in] pw: value for the w component. */ NEON_CUDA_HOST_DEVICE inline Vec_4d(element_t px, element_t py, element_t pz, element_t pw); @@ -152,6 +122,9 @@ class Vec_4d NEON_CUDA_HOST_DEVICE inline void set(const element_t& xyzw); + NEON_CUDA_HOST_DEVICE inline auto constexpr getVectorView() -> Integer*; + + NEON_CUDA_HOST_DEVICE inline auto constexpr getVectorView() const -> const Integer*; //---- [REDUCE SECTION] -------------------------------------------------------------------------------------------- //---- [REDUCE SECTION] -------------------------------------------------------------------------------------------- @@ -169,7 +142,7 @@ class Vec_4d /** * Extracts the max absolute value stored by the 4d tuple. - * @return max absolute value + * @return max absolute value */ inline element_t rAbsMax() const; @@ -319,10 +292,10 @@ class Vec_4d */ NEON_CUDA_HOST_DEVICE inline self_t operator-(const self_t& B) const; /** - * Compute the mod between two points A and B, component by component (A.x%B.x, A.y%B.y, A.z%B.z). - * @param[in] B: second point for the diff. - * @return Resulting point is C =(A.x % B.x, A.y % B.y, A.z % B.z) - */ + * Compute the mod between two points A and B, component by component (A.x%B.x, A.y%B.y, A.z%B.z). + * @param[in] B: second point for the diff. + * @return Resulting point is C =(A.x % B.x, A.y % B.y, A.z % B.z) + */ NEON_CUDA_HOST_DEVICE inline self_t operator%(const self_t& B) const; /** * Compute the multiplication between two points A and B, component by component (A.x*B.x, A.y*B.y, A.z*B.z, A.w.*B.w). @@ -356,15 +329,15 @@ class Vec_4d NEON_CUDA_HOST_DEVICE inline bool operator<(const self_t& B) const; /** Returns true if A.x >= B.x && A.y >= B.y && A.z >= B.z - * @param[in] B: second point for the operation. - * @return Resulting point is C as C.v[i] = A.v[i] > B.v[i] ? A.v[i] : B.v[i] - */ + * @param[in] B: second point for the operation. + * @return Resulting point is C as C.v[i] = A.v[i] > B.v[i] ? A.v[i] : B.v[i] + */ NEON_CUDA_HOST_DEVICE inline bool operator>=(const self_t& B) const; /** Returns true if A.x <= B.x && A.y <= B.y && A.z <= B.z - * @param[in] B: second point for the operation. - * @return True if A.x <= B.x && A.y <= B.y && A.z <= B.z - */ + * @param[in] B: second point for the operation. + * @return True if A.x <= B.x && A.y <= B.y && A.z <= B.z + */ NEON_CUDA_HOST_DEVICE inline bool operator<=(const self_t& B) const; /** Returns true if A.x <= B.x && A.y <= B.y && A.z <= B.z @@ -423,11 +396,13 @@ class Vec_4d //---- [ForEach SECTION] ---------------------------------------------------------------------------------------------- //---- [ForEach SECTION] ---------------------------------------------------------------------------------------------- +#if !defined(NEON_WARP_COMPILATION) template static void forEach(const self_t& len, std::function lambda); template static void forEach(const self_t& len, std::function lambda); +#endif }; diff --git a/libNeonCore/include/Neon/core/types/vec/vec4d_integer.timp.h b/libNeonCore/include/Neon/core/types/vec/vec4d_integer.timp.h index bb350e08..e382da2a 100644 --- a/libNeonCore/include/Neon/core/types/vec/vec4d_integer.timp.h +++ b/libNeonCore/include/Neon/core/types/vec/vec4d_integer.timp.h @@ -50,6 +50,9 @@ NEON_CUDA_HOST_DEVICE inline Vec_4d::Vec_4d(const I { set(other); } + +#if !defined(NEON_WARP_COMPILATION) + template NEON_CUDA_HOST_ONLY inline Vec_4d::Vec_4d(std::initializer_list other) { @@ -73,6 +76,7 @@ NEON_CUDA_HOST_ONLY inline Vec_4d::Vec_4d(std::init w = begin[3]; return; } +#endif template NEON_CUDA_HOST_DEVICE inline Vec_4d::Vec_4d(IntegerType_ta px, IntegerType_ta py, IntegerType_ta pz, IntegerType_ta pw) @@ -124,6 +128,21 @@ NEON_CUDA_HOST_DEVICE inline void Vec_4d::set(const w = xyzw; } +template +NEON_CUDA_HOST_DEVICE inline constexpr auto Vec_4d:: + getVectorView() + const -> const IntegerType_ta* +{ + return &x; +} + +template +NEON_CUDA_HOST_DEVICE inline constexpr auto Vec_4d:: + getVectorView() + -> IntegerType_ta* +{ + return &x; +} //---- [REDUCE SECTION] -------------------------------------------------------------------------------------------- //---- [REDUCE SECTION] -------------------------------------------------------------------------------------------- @@ -210,16 +229,16 @@ NEON_CUDA_HOST_DEVICE inline size_t Vec_4d::mCardDe { switch (order_ta) { case Neon::MemoryLayout::structOfArrays: { - return size_t(c) + - size_t(x) * size_t(dimGrid.c) + - size_t(y) * size_t(dimGrid.c) * size_t(dimGrid.x) + - size_t(z) * size_t(dimGrid.c) * size_t(dimGrid.x) * size_t(dimGrid.y); + return size_t(w) + + size_t(x) * size_t(dimGrid.w) + + size_t(y) * size_t(dimGrid.w) * size_t(dimGrid.x) + + size_t(z) * size_t(dimGrid.w) * size_t(dimGrid.x) * size_t(dimGrid.y); } case Neon::MemoryLayout::arrayOfStructs: { return size_t(x) + size_t(y) * size_t(dimGrid.x) + size_t(z) * size_t(dimGrid.x) * size_t(dimGrid.y) + - size_t(c) * size_t(dimGrid.x) * size_t(dimGrid.y) * size_t(dimGrid.z); + size_t(w) * size_t(dimGrid.x) * size_t(dimGrid.y) * size_t(dimGrid.z); ; } } @@ -292,8 +311,8 @@ NEON_CUDA_HOST_DEVICE inline index_t Vec_4d::idxOfM element_t themax = x; index_t indexMax = 0; for (int index = 1; index < 4; index++) { - if (themax < v[index]) { - themax = v[index]; + if (themax < getVectorView()[index]) { + themax = getVectorView()[index]; indexMax = index; } } @@ -307,8 +326,8 @@ NEON_CUDA_HOST_DEVICE inline index_t Vec_4d::idxOfM element_t themin = x; index_t indexMin = 0; for (int index = 1; index < 4; index++) { - if (themin > v[index]) { - themin = v[index]; + if (themin > getVectorView()[index]) { + themin = getVectorView()[index]; indexMin = index; } } @@ -321,7 +340,7 @@ NEON_CUDA_HOST_DEVICE inline Vec_4d Vec_4d { Vec_4d mask(0); const index_t index = this->iOfMin(); - mask.v[index] = 1; + mask.getVectorView()[index] = 1; return mask; } @@ -330,19 +349,19 @@ template NEON_CUDA_HOST_DEVICE inline Vec_4d Vec_4d::idxOrderByMax() const { Vec_4d ordered(0, 1, 2, 3); - if (v[0] < v[1]) { - ordered.v[0] = 1; - ordered.v[1] = 0; + if (getVectorView()[0] < getVectorView()[1]) { + ordered.getVectorView()[0] = 1; + ordered.getVectorView()[1] = 0; } - if (v[ordered.v[1]] < v[ordered.v[2]]) { - int32_t tmp = ordered.v[1]; - ordered.v[1] = ordered.v[2]; - ordered.v[2] = tmp; + if (getVectorView()[ordered.getVectorView()[1]] < getVectorView()[ordered.getVectorView()[2]]) { + int32_t tmp = ordered.getVectorView()[1]; + ordered.getVectorView()[1] = ordered.getVectorView()[2]; + ordered.getVectorView()[2] = tmp; } - if (v[ordered.v[2]] < v[ordered.v[3]]) { - int32_t tmp = ordered.v[2]; - ordered.v[2] = ordered.v[3]; - ordered.v[3] = tmp; + if (getVectorView()[ordered.getVectorView()[2]] < getVectorView()[ordered.getVectorView()[3]]) { + int32_t tmp = ordered.getVectorView()[2]; + ordered.getVectorView()[2] = ordered.getVectorView()[3]; + ordered.getVectorView()[3] = tmp; } return ordered; } @@ -673,6 +692,7 @@ std::string Vec_4d::to_stringForComposedNames() con msg += std::to_string(w); return msg; } +#if !defined(NEON_WARP_COMPILATION) template template @@ -743,5 +763,6 @@ std::ostream& operator<<(std::ostream& out, const Vec_4d { public: using element_t = RealType_ta; + using Real = RealType_ta; using self_t = Vec_4d; static_assert(!std::is_integral::value, ""); @@ -65,34 +66,8 @@ class Vec_4d num_axis = 4 }; - union - { - element_t v[axis_e::num_axis]{ - static_cast(.0), static_cast(.0), static_cast(.0), static_cast(.0)}; - struct - { - union - { - element_t x; - element_t r; - }; - union - { - element_t y; - element_t s; - }; - union - { - element_t z; - element_t t; - }; - union - { - element_t w; - element_t u; - }; - }; - }; + Real x, y, z, w; + /** @@ -146,6 +121,10 @@ class Vec_4d NEON_CUDA_HOST_DEVICE inline void set(const self_t& xyzw); + NEON_CUDA_HOST_DEVICE inline auto constexpr getVectorView() -> Real*; + + NEON_CUDA_HOST_DEVICE inline auto constexpr getVectorView() const -> const Real*; + //---- [REDUCE SECTION] -------------------------------------------------------------------------------------------- //---- [REDUCE SECTION] -------------------------------------------------------------------------------------------- //---- [REDUCE SECTION] -------------------------------------------------------------------------------------------- diff --git a/libNeonCore/include/Neon/core/types/vec/vec4d_real.timp.h b/libNeonCore/include/Neon/core/types/vec/vec4d_real.timp.h index 31f1e484..db5d803d 100644 --- a/libNeonCore/include/Neon/core/types/vec/vec4d_real.timp.h +++ b/libNeonCore/include/Neon/core/types/vec/vec4d_real.timp.h @@ -251,8 +251,8 @@ NEON_CUDA_HOST_DEVICE inline index_t Vec_4d::idxOfMax( element_t themax = x; index_t indexMax = 0; for (int index = 1; index < 4; index++) { - if (themax < v[index]) { - themax = v[index]; + if (themax < getVectorView()[index]) { + themax = getVectorView()[index]; indexMax = index; } } @@ -265,8 +265,8 @@ NEON_CUDA_HOST_DEVICE inline index_t Vec_4d::idxOfMin( element_t themin = x; index_t indexMin = 0; for (int index = 1; index < 4; index++) { - if (themin > v[index]) { - themin = v[index]; + if (themin > getVectorView()[index]) { + themin = getVectorView()[index]; indexMin = index; } } @@ -278,7 +278,7 @@ NEON_CUDA_HOST_DEVICE inline Vec_4d Vec_4d::i { Vec_4d mask(0); const index_t index = this->iOfMin(); - mask.v[index] = 1; + mask.getVectorView()[index] = 1; return mask; } @@ -286,20 +286,20 @@ template NEON_CUDA_HOST_DEVICE inline Vec_4d Vec_4d::idxOrderByMax() const { Vec_4d ordered(0, 1, 2, 3); - if (v[0] < v[1]) { - ordered.v[0] = 1; - ordered.v[1] = 0; + if (getVectorView()[0] < getVectorView()[1]) { + ordered.getVectorView()[0] = 1; + ordered.getVectorView()[1] = 0; } - if (v[ordered.v[1]] < v[ordered.v[2]]) { - int32_t tmp = ordered.v[1]; - ordered.v[1] = ordered.v[2]; - ordered.v[2] = tmp; + if (getVectorView()[ordered.getVectorView()[1]] < getVectorView()[ordered.getVectorView()[2]]) { + int32_t tmp = ordered.getVectorView()[1]; + ordered.getVectorView()[1] = ordered.getVectorView()[2]; + ordered.getVectorView()[2] = tmp; } - if (v[ordered.v[2]] < v[ordered.v[3]]) { - int32_t tmp = ordered.v[2]; - ordered.v[2] = ordered.v[3]; - ordered.v[3] = tmp; + if (getVectorView()[ordered.getVectorView()[2]] < getVectorView()[ordered.getVectorView()[3]]) { + int32_t tmp = ordered.getVectorView()[2]; + ordered.getVectorView()[2] = ordered.getVectorView()[3]; + ordered.getVectorView()[3] = tmp; } return ordered; } diff --git a/libNeonCore/include/Neon/core/types/vec/vecAlias.h b/libNeonCore/include/Neon/core/types/vec/vecAlias.h index 619d04a2..3cce74ab 100644 --- a/libNeonCore/include/Neon/core/types/vec/vecAlias.h +++ b/libNeonCore/include/Neon/core/types/vec/vecAlias.h @@ -86,7 +86,7 @@ using Real_4d = Vec_4d; using double_4d = Vec_4d; using float_4d = Vec_4d; - +#endif //---- [Integer 4D SECTION] ---------------------------------------------------------------------------------------------- //---- [Integer 4D SECTION] ---------------------------------------------------------------------------------------------- //---- [Integer 4D SECTION] ---------------------------------------------------------------------------------------------- @@ -103,5 +103,4 @@ using uint64_4d = Vec_4d; using size_4d = Vec_4d; using index_4d = Vec_4d; using char_4d = Vec_4d; -#endif } // End of namespace Neon diff --git a/libNeonDomain/include/Neon/domain/details/dGrid/dPartition.h b/libNeonDomain/include/Neon/domain/details/dGrid/dPartition.h index 789e7a69..b324cddb 100644 --- a/libNeonDomain/include/Neon/domain/details/dGrid/dPartition.h +++ b/libNeonDomain/include/Neon/domain/details/dGrid/dPartition.h @@ -5,7 +5,9 @@ #include "Neon/domain/interface/NghData.h" #include "Neon/set/DevSet.h" #include "Neon/sys/memory/CudaIntrinsics.h" +#if !defined(NEON_WARP_COMPILATION) #include "cuda_fp16.h" +#endif #include "dIndex.h" namespace Neon::domain::details::dGrid { @@ -59,12 +61,14 @@ class dPartition { } +#if !defined(NEON_WARP_COMPILATION) inline NEON_CUDA_HOST_ONLY auto enablePeriodicAlongZ() -> void { mPeriodicZ = true; } +#endif inline NEON_CUDA_HOST_DEVICE auto prtID() @@ -150,6 +154,7 @@ class dPartition return NghData(val, isValidNeighbour); } +#if !defined(NEON_WARP_COMPILATION) template NEON_CUDA_HOST_DEVICE inline auto getNghData(const Idx& gidx, @@ -170,6 +175,7 @@ class dPartition } } } +#endif template