Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Some fixes for PowerPC 64 Big Endian mode #1409

Merged
merged 1 commit into from
Dec 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,15 @@ class NodeCodecQuadTreeHalfFloat
const Node *node = reinterpret_cast<const Node *>(inBufferStart + (node_properties << OFFSET_NON_SIGNIFICANT_BITS));

// Unpack bounds
#ifdef JPH_CPU_BIG_ENDIAN
Vec4 bounds_minx = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinX[0] + (node->mBoundsMinX[1] << 16), node->mBoundsMinX[2] + (node->mBoundsMinX[3] << 16), 0, 0));
Vec4 bounds_miny = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinY[0] + (node->mBoundsMinY[1] << 16), node->mBoundsMinY[2] + (node->mBoundsMinY[3] << 16), 0, 0));
Vec4 bounds_minz = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinZ[0] + (node->mBoundsMinZ[1] << 16), node->mBoundsMinZ[2] + (node->mBoundsMinZ[3] << 16), 0, 0));

Vec4 bounds_maxx = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxX[0] + (node->mBoundsMaxX[1] << 16), node->mBoundsMaxX[2] + (node->mBoundsMaxX[3] << 16), 0, 0));
Vec4 bounds_maxy = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxY[0] + (node->mBoundsMaxY[1] << 16), node->mBoundsMaxY[2] + (node->mBoundsMaxY[3] << 16), 0, 0));
Vec4 bounds_maxz = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxZ[0] + (node->mBoundsMaxZ[1] << 16), node->mBoundsMaxZ[2] + (node->mBoundsMaxZ[3] << 16), 0, 0));
#else
UVec4 bounds_minxy = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&node->mBoundsMinX[0]));
Vec4 bounds_minx = HalfFloatConversion::ToFloat(bounds_minxy);
Vec4 bounds_miny = HalfFloatConversion::ToFloat(bounds_minxy.Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_UNUSED, SWIZZLE_UNUSED>());
Expand All @@ -265,6 +274,7 @@ class NodeCodecQuadTreeHalfFloat
UVec4 bounds_maxyz = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&node->mBoundsMaxY[0]));
Vec4 bounds_maxy = HalfFloatConversion::ToFloat(bounds_maxyz);
Vec4 bounds_maxz = HalfFloatConversion::ToFloat(bounds_maxyz.Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_UNUSED, SWIZZLE_UNUSED>());
#endif

// Load properties for 4 children
UVec4 properties = UVec4::sLoadInt4(&node->mNodeProperties[0]);
Expand Down
48 changes: 27 additions & 21 deletions Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ class TriangleCodecIndexed8BitPackSOA4Flags
class DecodingContext
{
private:
/// Private helper functions to unpack the 1 vertex of 4 triangles (outX contains the x coordinate of triangle 0 .. 3 etc.)
/// Private helper function to unpack the 1 vertex of 4 triangles (outX contains the x coordinate of triangle 0 .. 3 etc.)
JPH_INLINE void Unpack(const VertexData *inVertices, UVec4Arg inIndex, Vec4 &outX, Vec4 &outY, Vec4 &outZ) const
{
// Get compressed data
Expand All @@ -356,6 +356,28 @@ class TriangleCodecIndexed8BitPackSOA4Flags
outZ = Vec4::sFusedMultiplyAdd(zc.ToFloat(), mScaleZ, mOffsetZ);
}

/// Private helper function to unpack 4 triangles from a triangle block
JPH_INLINE void Unpack(const TriangleBlock *inBlock, const VertexData *inVertices, Vec4 &outX1, Vec4 &outY1, Vec4 &outZ1, Vec4 &outX2, Vec4 &outY2, Vec4 &outZ2, Vec4 &outX3, Vec4 &outY3, Vec4 &outZ3) const
{
// Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok)
UVec4 indices = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&inBlock->mIndices[0]));
UVec4 iv1 = indices.Expand4Byte0();
UVec4 iv2 = indices.Expand4Byte4();
UVec4 iv3 = indices.Expand4Byte8();

#ifdef JPH_CPU_BIG_ENDIAN
// On big endian systems we need to reverse the bytes
iv1 = iv1.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>();
iv2 = iv2.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>();
iv3 = iv3.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>();
#endif

// Decompress the triangle data
Unpack(inVertices, iv1, outX1, outY1, outZ1);
Unpack(inVertices, iv2, outX2, outY2, outZ2);
Unpack(inVertices, iv3, outX3, outY3, outZ3);
}

public:
JPH_INLINE explicit DecodingContext(const TriangleHeader *inHeader) :
mOffsetX(Vec4::sReplicate(inHeader->mOffset.x)),
Expand All @@ -380,17 +402,9 @@ class TriangleCodecIndexed8BitPackSOA4Flags

do
{
// Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok)
UVec4 indices = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&t->mIndices[0]));
UVec4 iv1 = indices.Expand4Byte0();
UVec4 iv2 = indices.Expand4Byte4();
UVec4 iv3 = indices.Expand4Byte8();

// Decompress the triangle data
// Unpack the vertices for 4 triangles
Vec4 v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z;
Unpack(vertices, iv1, v1x, v1y, v1z);
Unpack(vertices, iv2, v2x, v2y, v2z);
Unpack(vertices, iv3, v3x, v3y, v3z);
Unpack(t, vertices, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);

// Transpose it so we get normal vectors
Mat44 v1 = Mat44(v1x, v1y, v1z, Vec4::sZero()).Transposed();
Expand Down Expand Up @@ -425,17 +439,9 @@ class TriangleCodecIndexed8BitPackSOA4Flags
UVec4 start_triangle_idx = UVec4::sZero();
do
{
// Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok)
UVec4 indices = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&t->mIndices[0]));
UVec4 iv1 = indices.Expand4Byte0();
UVec4 iv2 = indices.Expand4Byte4();
UVec4 iv3 = indices.Expand4Byte8();

// Decompress the triangle data
// Unpack the vertices for 4 triangles
Vec4 v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z;
Unpack(vertices, iv1, v1x, v1y, v1z);
Unpack(vertices, iv2, v2x, v2y, v2z);
Unpack(vertices, iv3, v3x, v3y, v3z);
Unpack(t, vertices, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);

// Perform ray vs triangle test
Vec4 distance = RayTriangle4(inRayOrigin, inRayDirection, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
Expand Down
Loading