diff --git a/Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h b/Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h index 15717a2da..c0feea7fe 100644 --- a/Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h +++ b/Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h @@ -254,6 +254,15 @@ class NodeCodecQuadTreeHalfFloat const Node *node = reinterpret_cast(inBufferStart + (node_properties << OFFSET_NON_SIGNIFICANT_BITS)); // Unpack bounds + #ifdef JPH_CPU_BIG_ENDIAN + Vec4 bounds_minx = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinX[0] + (node->mBoundsMinX[1] << 16), node->mBoundsMinX[2] + (node->mBoundsMinX[3] << 16), 0, 0)); + Vec4 bounds_miny = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinY[0] + (node->mBoundsMinY[1] << 16), node->mBoundsMinY[2] + (node->mBoundsMinY[3] << 16), 0, 0)); + Vec4 bounds_minz = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinZ[0] + (node->mBoundsMinZ[1] << 16), node->mBoundsMinZ[2] + (node->mBoundsMinZ[3] << 16), 0, 0)); + + Vec4 bounds_maxx = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxX[0] + (node->mBoundsMaxX[1] << 16), node->mBoundsMaxX[2] + (node->mBoundsMaxX[3] << 16), 0, 0)); + Vec4 bounds_maxy = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxY[0] + (node->mBoundsMaxY[1] << 16), node->mBoundsMaxY[2] + (node->mBoundsMaxY[3] << 16), 0, 0)); + Vec4 bounds_maxz = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxZ[0] + (node->mBoundsMaxZ[1] << 16), node->mBoundsMaxZ[2] + (node->mBoundsMaxZ[3] << 16), 0, 0)); + #else UVec4 bounds_minxy = UVec4::sLoadInt4(reinterpret_cast(&node->mBoundsMinX[0])); Vec4 bounds_minx = HalfFloatConversion::ToFloat(bounds_minxy); Vec4 bounds_miny = HalfFloatConversion::ToFloat(bounds_minxy.Swizzle()); @@ -265,6 +274,7 @@ class NodeCodecQuadTreeHalfFloat UVec4 bounds_maxyz = UVec4::sLoadInt4(reinterpret_cast(&node->mBoundsMaxY[0])); Vec4 bounds_maxy = HalfFloatConversion::ToFloat(bounds_maxyz); Vec4 bounds_maxz = HalfFloatConversion::ToFloat(bounds_maxyz.Swizzle()); + #endif // Load properties for 4 children UVec4 properties = UVec4::sLoadInt4(&node->mNodeProperties[0]); diff --git a/Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h b/Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h index 2168a5e43..b3c33c515 100644 --- a/Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h +++ b/Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h @@ -338,7 +338,7 @@ class TriangleCodecIndexed8BitPackSOA4Flags class DecodingContext { private: - /// Private helper functions to unpack the 1 vertex of 4 triangles (outX contains the x coordinate of triangle 0 .. 3 etc.) + /// Private helper function to unpack the 1 vertex of 4 triangles (outX contains the x coordinate of triangle 0 .. 3 etc.) JPH_INLINE void Unpack(const VertexData *inVertices, UVec4Arg inIndex, Vec4 &outX, Vec4 &outY, Vec4 &outZ) const { // Get compressed data @@ -356,6 +356,28 @@ class TriangleCodecIndexed8BitPackSOA4Flags outZ = Vec4::sFusedMultiplyAdd(zc.ToFloat(), mScaleZ, mOffsetZ); } + /// Private helper function to unpack 4 triangles from a triangle block + JPH_INLINE void Unpack(const TriangleBlock *inBlock, const VertexData *inVertices, Vec4 &outX1, Vec4 &outY1, Vec4 &outZ1, Vec4 &outX2, Vec4 &outY2, Vec4 &outZ2, Vec4 &outX3, Vec4 &outY3, Vec4 &outZ3) const + { + // Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok) + UVec4 indices = UVec4::sLoadInt4(reinterpret_cast(&inBlock->mIndices[0])); + UVec4 iv1 = indices.Expand4Byte0(); + UVec4 iv2 = indices.Expand4Byte4(); + UVec4 iv3 = indices.Expand4Byte8(); + + #ifdef JPH_CPU_BIG_ENDIAN + // On big endian systems we need to reverse the bytes + iv1 = iv1.Swizzle(); + iv2 = iv2.Swizzle(); + iv3 = iv3.Swizzle(); + #endif + + // Decompress the triangle data + Unpack(inVertices, iv1, outX1, outY1, outZ1); + Unpack(inVertices, iv2, outX2, outY2, outZ2); + Unpack(inVertices, iv3, outX3, outY3, outZ3); + } + public: JPH_INLINE explicit DecodingContext(const TriangleHeader *inHeader) : mOffsetX(Vec4::sReplicate(inHeader->mOffset.x)), @@ -380,17 +402,9 @@ class TriangleCodecIndexed8BitPackSOA4Flags do { - // Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok) - UVec4 indices = UVec4::sLoadInt4(reinterpret_cast(&t->mIndices[0])); - UVec4 iv1 = indices.Expand4Byte0(); - UVec4 iv2 = indices.Expand4Byte4(); - UVec4 iv3 = indices.Expand4Byte8(); - - // Decompress the triangle data + // Unpack the vertices for 4 triangles Vec4 v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z; - Unpack(vertices, iv1, v1x, v1y, v1z); - Unpack(vertices, iv2, v2x, v2y, v2z); - Unpack(vertices, iv3, v3x, v3y, v3z); + Unpack(t, vertices, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z); // Transpose it so we get normal vectors Mat44 v1 = Mat44(v1x, v1y, v1z, Vec4::sZero()).Transposed(); @@ -425,17 +439,9 @@ class TriangleCodecIndexed8BitPackSOA4Flags UVec4 start_triangle_idx = UVec4::sZero(); do { - // Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok) - UVec4 indices = UVec4::sLoadInt4(reinterpret_cast(&t->mIndices[0])); - UVec4 iv1 = indices.Expand4Byte0(); - UVec4 iv2 = indices.Expand4Byte4(); - UVec4 iv3 = indices.Expand4Byte8(); - - // Decompress the triangle data + // Unpack the vertices for 4 triangles Vec4 v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z; - Unpack(vertices, iv1, v1x, v1y, v1z); - Unpack(vertices, iv2, v2x, v2y, v2z); - Unpack(vertices, iv3, v3x, v3y, v3z); + Unpack(t, vertices, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z); // Perform ray vs triangle test Vec4 distance = RayTriangle4(inRayOrigin, inRayDirection, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);