From 997c67e808dc94a5f4e098fae9308a4789eab6fa Mon Sep 17 00:00:00 2001
From: Jorrit Rouwe <jrouwe@gmail.com>
Date: Sat, 21 Dec 2024 10:11:08 +0100
Subject: [PATCH] Some fixes for PowerPC 64 Big Endian mode (some unit tests
 are still failing) (#1409)

---
 .../NodeCodec/NodeCodecQuadTreeHalfFloat.h    | 10 ++++
 .../TriangleCodecIndexed8BitPackSOA4Flags.h   | 48 +++++++++++--------
 2 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h b/Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h
index 15717a2da..c0feea7fe 100644
--- a/Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h
+++ b/Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h
@@ -254,6 +254,15 @@ class NodeCodecQuadTreeHalfFloat
 					const Node *node = reinterpret_cast<const Node *>(inBufferStart + (node_properties << OFFSET_NON_SIGNIFICANT_BITS));
 
 					// Unpack bounds
+				#ifdef JPH_CPU_BIG_ENDIAN
+					Vec4 bounds_minx = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinX[0] + (node->mBoundsMinX[1] << 16), node->mBoundsMinX[2] + (node->mBoundsMinX[3] << 16), 0, 0));
+					Vec4 bounds_miny = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinY[0] + (node->mBoundsMinY[1] << 16), node->mBoundsMinY[2] + (node->mBoundsMinY[3] << 16), 0, 0));
+					Vec4 bounds_minz = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinZ[0] + (node->mBoundsMinZ[1] << 16), node->mBoundsMinZ[2] + (node->mBoundsMinZ[3] << 16), 0, 0));
+
+					Vec4 bounds_maxx = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxX[0] + (node->mBoundsMaxX[1] << 16), node->mBoundsMaxX[2] + (node->mBoundsMaxX[3] << 16), 0, 0));
+					Vec4 bounds_maxy = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxY[0] + (node->mBoundsMaxY[1] << 16), node->mBoundsMaxY[2] + (node->mBoundsMaxY[3] << 16), 0, 0));
+					Vec4 bounds_maxz = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxZ[0] + (node->mBoundsMaxZ[1] << 16), node->mBoundsMaxZ[2] + (node->mBoundsMaxZ[3] << 16), 0, 0));
+				#else
 					UVec4 bounds_minxy = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&node->mBoundsMinX[0]));
 					Vec4 bounds_minx = HalfFloatConversion::ToFloat(bounds_minxy);
 					Vec4 bounds_miny = HalfFloatConversion::ToFloat(bounds_minxy.Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_UNUSED, SWIZZLE_UNUSED>());
@@ -265,6 +274,7 @@ class NodeCodecQuadTreeHalfFloat
 					UVec4 bounds_maxyz = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&node->mBoundsMaxY[0]));
 					Vec4 bounds_maxy = HalfFloatConversion::ToFloat(bounds_maxyz);
 					Vec4 bounds_maxz = HalfFloatConversion::ToFloat(bounds_maxyz.Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_UNUSED, SWIZZLE_UNUSED>());
+				#endif
 
 					// Load properties for 4 children
 					UVec4 properties = UVec4::sLoadInt4(&node->mNodeProperties[0]);
diff --git a/Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h b/Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h
index 2168a5e43..b3c33c515 100644
--- a/Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h
+++ b/Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h
@@ -338,7 +338,7 @@ class TriangleCodecIndexed8BitPackSOA4Flags
 	class DecodingContext
 	{
 	private:
-		/// Private helper functions to unpack the 1 vertex of 4 triangles (outX contains the x coordinate of triangle 0 .. 3 etc.)
+		/// Private helper function to unpack the 1 vertex of 4 triangles (outX contains the x coordinate of triangle 0 .. 3 etc.)
 		JPH_INLINE void				Unpack(const VertexData *inVertices, UVec4Arg inIndex, Vec4 &outX, Vec4 &outY, Vec4 &outZ) const
 		{
 			// Get compressed data
@@ -356,6 +356,28 @@ class TriangleCodecIndexed8BitPackSOA4Flags
 			outZ = Vec4::sFusedMultiplyAdd(zc.ToFloat(), mScaleZ, mOffsetZ);
 		}
 
+		/// Private helper function to unpack 4 triangles from a triangle block
+		JPH_INLINE void				Unpack(const TriangleBlock *inBlock, const VertexData *inVertices, Vec4 &outX1, Vec4 &outY1, Vec4 &outZ1, Vec4 &outX2, Vec4 &outY2, Vec4 &outZ2, Vec4 &outX3, Vec4 &outY3, Vec4 &outZ3) const
+		{
+			// Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok)
+			UVec4 indices = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&inBlock->mIndices[0]));
+			UVec4 iv1 = indices.Expand4Byte0();
+			UVec4 iv2 = indices.Expand4Byte4();
+			UVec4 iv3 = indices.Expand4Byte8();
+
+		#ifdef JPH_CPU_BIG_ENDIAN
+			// On big endian systems we need to reverse the bytes
+			iv1 = iv1.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>();
+			iv2 = iv2.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>();
+			iv3 = iv3.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>();
+		#endif
+
+			// Decompress the triangle data
+			Unpack(inVertices, iv1, outX1, outY1, outZ1);
+			Unpack(inVertices, iv2, outX2, outY2, outZ2);
+			Unpack(inVertices, iv3, outX3, outY3, outZ3);
+		}
+
 	public:
 		JPH_INLINE explicit			DecodingContext(const TriangleHeader *inHeader) :
 			mOffsetX(Vec4::sReplicate(inHeader->mOffset.x)),
@@ -380,17 +402,9 @@ class TriangleCodecIndexed8BitPackSOA4Flags
 
 			do
 			{
-				// Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok)
-				UVec4 indices = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&t->mIndices[0]));
-				UVec4 iv1 = indices.Expand4Byte0();
-				UVec4 iv2 = indices.Expand4Byte4();
-				UVec4 iv3 = indices.Expand4Byte8();
-
-				// Decompress the triangle data
+				// Unpack the vertices for 4 triangles
 				Vec4 v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z;
-				Unpack(vertices, iv1, v1x, v1y, v1z);
-				Unpack(vertices, iv2, v2x, v2y, v2z);
-				Unpack(vertices, iv3, v3x, v3y, v3z);
+				Unpack(t, vertices, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
 
 				// Transpose it so we get normal vectors
 				Mat44 v1 = Mat44(v1x, v1y, v1z, Vec4::sZero()).Transposed();
@@ -425,17 +439,9 @@ class TriangleCodecIndexed8BitPackSOA4Flags
 			UVec4 start_triangle_idx = UVec4::sZero();
 			do
 			{
-				// Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok)
-				UVec4 indices = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&t->mIndices[0]));
-				UVec4 iv1 = indices.Expand4Byte0();
-				UVec4 iv2 = indices.Expand4Byte4();
-				UVec4 iv3 = indices.Expand4Byte8();
-
-				// Decompress the triangle data
+				// Unpack the vertices for 4 triangles
 				Vec4 v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z;
-				Unpack(vertices, iv1, v1x, v1y, v1z);
-				Unpack(vertices, iv2, v2x, v2y, v2z);
-				Unpack(vertices, iv3, v3x, v3y, v3z);
+				Unpack(t, vertices, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
 
 				// Perform ray vs triangle test
 				Vec4 distance = RayTriangle4(inRayOrigin, inRayDirection, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);