Skip to content

Commit

Permalink
libNeonPy working - issue with the rest of the compilation (#50)
Browse files Browse the repository at this point in the history
* Fixing layout issue with python bindings

* libNeonPy working - issue with the rest of the compilation
  • Loading branch information
massimim authored May 25, 2024
1 parent efc91fc commit 25536af
Show file tree
Hide file tree
Showing 12 changed files with 127 additions and 145 deletions.
6 changes: 3 additions & 3 deletions benchmarks/lbm/src/ContainersD3QXX.h
Original file line number Diff line number Diff line change
Expand Up @@ -566,9 +566,9 @@ struct ContainerFactoryD3QXX

if (globalIdx.y == domainDim.y - 1) {
popVal = -6. * Lattice::Memory::template getT<M::fwdMemQ>() * ulb *
(Lattice::Memory::template getDirection<M::fwdMemQ>().v[0] * ulid.v[0] +
Lattice::Memory::template getDirection<M::fwdMemQ>().v[1] * ulid.v[1] +
Lattice::Memory::template getDirection<M::fwdMemQ>().v[2] * ulid.v[2]);
(Lattice::Memory::template getDirection<M::fwdMemQ>().x * ulid.x +
Lattice::Memory::template getDirection<M::fwdMemQ>().y * ulid.y +
Lattice::Memory::template getDirection<M::fwdMemQ>().z * ulid.z);
} else {
popVal = 0;
}
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/lbm/src/RunCavityTwoPop.cu
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ auto run(Config& config,
using M = typename Lattice::template RegisterMapper<q>;
if (globalIdx.y == domainDim.y - 1) {
popVal = -6. * Lattice::Registers::template getT<M::fwdRegQ>() * ulb *
(Lattice::Registers::template getVelocityComponent<M::fwdRegQ, 0>() * ulid.v[0] +
Lattice::Registers::template getVelocityComponent<M::fwdRegQ, 1>() * ulid.v[1] +
Lattice::Registers::template getVelocityComponent<M::fwdRegQ, 2>() * ulid.v[2]);
(Lattice::Registers::template getVelocityComponent<M::fwdRegQ, 0>() * ulid.x+
Lattice::Registers::template getVelocityComponent<M::fwdRegQ, 1>() * ulid.y +
Lattice::Registers::template getVelocityComponent<M::fwdRegQ, 2>() * ulid.z);
} else {
popVal = 0;
}
Expand Down
32 changes: 16 additions & 16 deletions libNeonCore/include/Neon/core/tools/io/IODense_imp.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,10 @@ auto IODense<ExportType, IntType>::operator()(const Integer_3d<IntType>& xyz,
return mImplicitFun(xyz, card);
}
const size_t pitch =
mPitch.mXpitch * xyz.x +
mPitch.mYpitch * xyz.y +
mPitch.mZpitch * xyz.z +
mPitch.mCpitch * card;
mPitch.x * xyz.x +
mPitch.y * xyz.y +
mPitch.z * xyz.z +
mPitch.w * card;
return mMem[pitch];
}

Expand All @@ -189,10 +189,10 @@ auto IODense<ExportType, IntType>::getReference(const Integer_3d<IntType>& xyz,
NEON_THROW_UNSUPPORTED_OPERATION("A IODense configure as IMPLICIT does not support such operation");
}
const size_t pitch =
mPitch.mXpitch * xyz.x +
mPitch.mYpitch * xyz.y +
mPitch.mZpitch * xyz.z +
mPitch.mCpitch * card;
mPitch.x * xyz.x +
mPitch.y * xyz.y +
mPitch.z * xyz.z +
mPitch.w * card;
return mMem[pitch];
}

Expand Down Expand Up @@ -328,23 +328,23 @@ template <typename ExportType,
auto IODense<ExportType, IntType>::initPitch() -> void
{
if (mOrder == Neon::MemoryLayout::structOfArrays) {
mPitch.mXpitch = 1;
mPitch.mYpitch = static_cast<size_t>(mSpace.x);
mPitch.x = 1;
mPitch.y = static_cast<size_t>(mSpace.x);

mPitch.mZpitch = static_cast<size_t>(mSpace.x) *
mPitch.z = static_cast<size_t>(mSpace.x) *
static_cast<size_t>(mSpace.y);

mPitch.mCpitch = static_cast<size_t>(mSpace.x) *
mPitch.w = static_cast<size_t>(mSpace.x) *
static_cast<size_t>(mSpace.y) *
static_cast<size_t>(mSpace.z);
} else {
mPitch.mXpitch = mCardinality;
mPitch.mYpitch = mCardinality *
mPitch.x = mCardinality;
mPitch.y = mCardinality *
static_cast<size_t>(mSpace.x);
mPitch.mZpitch = mCardinality *
mPitch.z = mCardinality *
static_cast<size_t>(mSpace.x) *
static_cast<size_t>(mSpace.y);
mPitch.mCpitch = 1;
mPitch.w = 1;
}
}
template <typename ExportType, typename IntType>
Expand Down
6 changes: 3 additions & 3 deletions libNeonCore/include/Neon/core/types/vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@

#include "Neon/core/types/vec/vec3d_generic.h"

#if !defined(NEON_WARP_COMPILATION)
#include "Neon/core/types/vec/vec4d_generic.h"
#endif


#include "Neon/core/types/vec/vecAlias.h"
Expand All @@ -28,8 +26,10 @@

#include "Neon/core/types/vec/vec3d_integer.tdecl.h"
#include "Neon/core/types/vec/vec3d_real.tdecl.h"
#if !defined(NEON_WARP_COMPILATION)


#include "Neon/core/types/vec/vec4d_integer.tdecl.h"
#if !defined(NEON_WARP_COMPILATION)
#include "Neon/core/types/vec/vec4d_real.tdecl.h"
#endif

Expand Down
3 changes: 2 additions & 1 deletion libNeonCore/include/Neon/core/types/vec/vec4d_generic.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@ class Vec_4d
w_axis = 2,
num_axis = 4
};

#if !defined(NEON_WARP_COMPILATION)
static_assert(!IsBaseTypeInteger, "");
static_assert(!IsBaseTypeReal, "");
#endif
};


Expand Down
79 changes: 27 additions & 52 deletions libNeonCore/include/Neon/core/types/vec/vec4d_integer.tdecl.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
#include <string>
#include <type_traits>

//#include <cuda.h>
//#include <cuda_runtime_api.h>
// #include <cuda.h>
// #include <cuda_runtime_api.h>

#include "Neon/core/types/BasicTypes.h"
#include "Neon/core/types/Exceptions.h"
Expand All @@ -52,8 +52,8 @@ namespace Neon {


/**
* Partial specialization for integer types (int32_t, int64_t, size_t,...)
*/
* Partial specialization for integer types (int32_t, int64_t, size_t,...)
*/
template <typename IntegerType_ta>
class Vec_4d<IntegerType_ta, true, false>
{
Expand All @@ -75,39 +75,7 @@ class Vec_4d<IntegerType_ta, true, false>
c_axis = 3,
num_axis = 4
};

union
{
element_t v[axis_e::num_axis]{0, 0, 0, 0};
struct
{
union
{
element_t x;
element_t r;
element_t mXpitch;
};
union
{
element_t y;
element_t s;
element_t mYpitch;
};
union
{
element_t z;
element_t t;
element_t mZpitch;
};
union
{
element_t w;
element_t u;
element_t mCpitch;
element_t c;
};
};
};
Integer x, y, z, w;

/**
* Empty constructor.
Expand All @@ -119,9 +87,9 @@ class Vec_4d<IntegerType_ta, true, false>
~Vec_4d() = default;

/**
* All component of the 4d tuple are set to the same scalar value.
* @param[in] other the vector
*/
* All component of the 4d tuple are set to the same scalar value.
* @param[in] other the vector
*/
NEON_CUDA_HOST_DEVICE inline Vec_4d(const self_t& other);

/**
Expand All @@ -132,13 +100,15 @@ class Vec_4d<IntegerType_ta, true, false>

NEON_CUDA_HOST_DEVICE inline Vec_4d(const element_t other[self_t::num_axis]);

#if !defined(NEON_WARP_COMPILATION)
NEON_CUDA_HOST_ONLY inline Vec_4d(std::initializer_list<element_t> other);
#endif
/**
* Creates a 4d tuple with specific values for each component.
* @param[in] px: value for the x component.
* @param[in] py: value for the y component.
* @param[in] pz: value for the z component.
* @param[in] pw: value for the w component.
* @param[in] pw: value for the w component.
*/
NEON_CUDA_HOST_DEVICE inline Vec_4d(element_t px, element_t py, element_t pz, element_t pw);

Expand All @@ -152,6 +122,9 @@ class Vec_4d<IntegerType_ta, true, false>

NEON_CUDA_HOST_DEVICE inline void set(const element_t& xyzw);

NEON_CUDA_HOST_DEVICE inline auto constexpr getVectorView() -> Integer*;

NEON_CUDA_HOST_DEVICE inline auto constexpr getVectorView() const -> const Integer*;

//---- [REDUCE SECTION] --------------------------------------------------------------------------------------------
//---- [REDUCE SECTION] --------------------------------------------------------------------------------------------
Expand All @@ -169,7 +142,7 @@ class Vec_4d<IntegerType_ta, true, false>

/**
* Extracts the max absolute value stored by the 4d tuple.
* @return max absolute value
* @return max absolute value
*/
inline element_t rAbsMax() const;

Expand Down Expand Up @@ -319,10 +292,10 @@ class Vec_4d<IntegerType_ta, true, false>
*/
NEON_CUDA_HOST_DEVICE inline self_t operator-(const self_t& B) const;
/**
* Compute the mod between two points A and B, component by component (A.x%B.x, A.y%B.y, A.z%B.z).
* @param[in] B: second point for the diff.
* @return Resulting point is C =(A.x % B.x, A.y % B.y, A.z % B.z)
*/
* Compute the mod between two points A and B, component by component (A.x%B.x, A.y%B.y, A.z%B.z).
* @param[in] B: second point for the diff.
* @return Resulting point is C =(A.x % B.x, A.y % B.y, A.z % B.z)
*/
NEON_CUDA_HOST_DEVICE inline self_t operator%(const self_t& B) const;
/**
* Compute the multiplication between two points A and B, component by component (A.x*B.x, A.y*B.y, A.z*B.z, A.w.*B.w).
Expand Down Expand Up @@ -356,15 +329,15 @@ class Vec_4d<IntegerType_ta, true, false>
NEON_CUDA_HOST_DEVICE inline bool operator<(const self_t& B) const;

/** Returns true if A.x >= B.x && A.y >= B.y && A.z >= B.z
* @param[in] B: second point for the operation.
* @return Resulting point is C as C.v[i] = A.v[i] > B.v[i] ? A.v[i] : B.v[i]
*/
* @param[in] B: second point for the operation.
* @return Resulting point is C as C.v[i] = A.v[i] > B.v[i] ? A.v[i] : B.v[i]
*/
NEON_CUDA_HOST_DEVICE inline bool operator>=(const self_t& B) const;

/** Returns true if A.x <= B.x && A.y <= B.y && A.z <= B.z
* @param[in] B: second point for the operation.
* @return True if A.x <= B.x && A.y <= B.y && A.z <= B.z
*/
* @param[in] B: second point for the operation.
* @return True if A.x <= B.x && A.y <= B.y && A.z <= B.z
*/
NEON_CUDA_HOST_DEVICE inline bool operator<=(const self_t& B) const;

/** Returns true if A.x <= B.x && A.y <= B.y && A.z <= B.z
Expand Down Expand Up @@ -423,11 +396,13 @@ class Vec_4d<IntegerType_ta, true, false>
//---- [ForEach SECTION] ----------------------------------------------------------------------------------------------
//---- [ForEach SECTION] ----------------------------------------------------------------------------------------------

#if !defined(NEON_WARP_COMPILATION)
template <Neon::computeMode_t::computeMode_e computeMode_ta = Neon::computeMode_t::seq>
static void forEach(const self_t& len, std::function<void(const self_t& idx)> lambda);

template <Neon::computeMode_t::computeMode_e computeMode_ta = Neon::computeMode_t::seq>
static void forEach(const self_t& len, std::function<void(element_t idxX, element_t idxY, element_t idxZ, element_t idxW)> lambda);
#endif
};


Expand Down
Loading

0 comments on commit 25536af

Please sign in to comment.