Skip to content

Commit

Permalink
Some fixes for PowerPC 64 Big Endian mode (some unit tests are still …
Browse files Browse the repository at this point in the history
…failing) (#1409)
  • Loading branch information
jrouwe authored Dec 21, 2024
1 parent 053d9ef commit 997c67e
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 21 deletions.
10 changes: 10 additions & 0 deletions Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,15 @@ class NodeCodecQuadTreeHalfFloat
const Node *node = reinterpret_cast<const Node *>(inBufferStart + (node_properties << OFFSET_NON_SIGNIFICANT_BITS));

// Unpack bounds
#ifdef JPH_CPU_BIG_ENDIAN
Vec4 bounds_minx = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinX[0] + (node->mBoundsMinX[1] << 16), node->mBoundsMinX[2] + (node->mBoundsMinX[3] << 16), 0, 0));
Vec4 bounds_miny = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinY[0] + (node->mBoundsMinY[1] << 16), node->mBoundsMinY[2] + (node->mBoundsMinY[3] << 16), 0, 0));
Vec4 bounds_minz = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinZ[0] + (node->mBoundsMinZ[1] << 16), node->mBoundsMinZ[2] + (node->mBoundsMinZ[3] << 16), 0, 0));

Vec4 bounds_maxx = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxX[0] + (node->mBoundsMaxX[1] << 16), node->mBoundsMaxX[2] + (node->mBoundsMaxX[3] << 16), 0, 0));
Vec4 bounds_maxy = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxY[0] + (node->mBoundsMaxY[1] << 16), node->mBoundsMaxY[2] + (node->mBoundsMaxY[3] << 16), 0, 0));
Vec4 bounds_maxz = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxZ[0] + (node->mBoundsMaxZ[1] << 16), node->mBoundsMaxZ[2] + (node->mBoundsMaxZ[3] << 16), 0, 0));
#else
UVec4 bounds_minxy = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&node->mBoundsMinX[0]));
Vec4 bounds_minx = HalfFloatConversion::ToFloat(bounds_minxy);
Vec4 bounds_miny = HalfFloatConversion::ToFloat(bounds_minxy.Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_UNUSED, SWIZZLE_UNUSED>());
Expand All @@ -265,6 +274,7 @@ class NodeCodecQuadTreeHalfFloat
UVec4 bounds_maxyz = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&node->mBoundsMaxY[0]));
Vec4 bounds_maxy = HalfFloatConversion::ToFloat(bounds_maxyz);
Vec4 bounds_maxz = HalfFloatConversion::ToFloat(bounds_maxyz.Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_UNUSED, SWIZZLE_UNUSED>());
#endif

// Load properties for 4 children
UVec4 properties = UVec4::sLoadInt4(&node->mNodeProperties[0]);
Expand Down
48 changes: 27 additions & 21 deletions Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ class TriangleCodecIndexed8BitPackSOA4Flags
class DecodingContext
{
private:
/// Private helper functions to unpack the 1 vertex of 4 triangles (outX contains the x coordinate of triangle 0 .. 3 etc.)
/// Private helper function to unpack the 1 vertex of 4 triangles (outX contains the x coordinate of triangle 0 .. 3 etc.)
JPH_INLINE void Unpack(const VertexData *inVertices, UVec4Arg inIndex, Vec4 &outX, Vec4 &outY, Vec4 &outZ) const
{
// Get compressed data
Expand All @@ -356,6 +356,28 @@ class TriangleCodecIndexed8BitPackSOA4Flags
outZ = Vec4::sFusedMultiplyAdd(zc.ToFloat(), mScaleZ, mOffsetZ);
}

/// Private helper function to unpack 4 triangles from a triangle block
JPH_INLINE void Unpack(const TriangleBlock *inBlock, const VertexData *inVertices, Vec4 &outX1, Vec4 &outY1, Vec4 &outZ1, Vec4 &outX2, Vec4 &outY2, Vec4 &outZ2, Vec4 &outX3, Vec4 &outY3, Vec4 &outZ3) const
{
// Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok)
UVec4 indices = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&inBlock->mIndices[0]));
UVec4 iv1 = indices.Expand4Byte0();
UVec4 iv2 = indices.Expand4Byte4();
UVec4 iv3 = indices.Expand4Byte8();

#ifdef JPH_CPU_BIG_ENDIAN
// On big endian systems we need to reverse the bytes
iv1 = iv1.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>();
iv2 = iv2.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>();
iv3 = iv3.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>();
#endif

// Decompress the triangle data
Unpack(inVertices, iv1, outX1, outY1, outZ1);
Unpack(inVertices, iv2, outX2, outY2, outZ2);
Unpack(inVertices, iv3, outX3, outY3, outZ3);
}

public:
JPH_INLINE explicit DecodingContext(const TriangleHeader *inHeader) :
mOffsetX(Vec4::sReplicate(inHeader->mOffset.x)),
Expand All @@ -380,17 +402,9 @@ class TriangleCodecIndexed8BitPackSOA4Flags

do
{
// Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok)
UVec4 indices = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&t->mIndices[0]));
UVec4 iv1 = indices.Expand4Byte0();
UVec4 iv2 = indices.Expand4Byte4();
UVec4 iv3 = indices.Expand4Byte8();

// Decompress the triangle data
// Unpack the vertices for 4 triangles
Vec4 v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z;
Unpack(vertices, iv1, v1x, v1y, v1z);
Unpack(vertices, iv2, v2x, v2y, v2z);
Unpack(vertices, iv3, v3x, v3y, v3z);
Unpack(t, vertices, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);

// Transpose it so we get normal vectors
Mat44 v1 = Mat44(v1x, v1y, v1z, Vec4::sZero()).Transposed();
Expand Down Expand Up @@ -425,17 +439,9 @@ class TriangleCodecIndexed8BitPackSOA4Flags
UVec4 start_triangle_idx = UVec4::sZero();
do
{
// Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok)
UVec4 indices = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&t->mIndices[0]));
UVec4 iv1 = indices.Expand4Byte0();
UVec4 iv2 = indices.Expand4Byte4();
UVec4 iv3 = indices.Expand4Byte8();

// Decompress the triangle data
// Unpack the vertices for 4 triangles
Vec4 v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z;
Unpack(vertices, iv1, v1x, v1y, v1z);
Unpack(vertices, iv2, v2x, v2y, v2z);
Unpack(vertices, iv3, v3x, v3y, v3z);
Unpack(t, vertices, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);

// Perform ray vs triangle test
Vec4 distance = RayTriangle4(inRayOrigin, inRayDirection, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
Expand Down

0 comments on commit 997c67e

Please sign in to comment.