From f094082aa2bbfcbebc725dbe8b8f65c7d5152886 Mon Sep 17 00:00:00 2001
From: Jorrit Rouwe <jrouwe@gmail.com>
Date: Sat, 21 Dec 2024 14:13:44 +0100
Subject: [PATCH] LoongArch support (#1411)

* Fixed -Wuninitialized warnings when not using SSE or NEON
---
 .github/workflows/determinism_check.yml | 30 +++++++++++++++++++++++++
 Docs/Architecture.md                    |  1 +
 Docs/ReleaseNotes.md                    |  2 +-
 Jolt/ConfigurationString.h              |  2 ++
 Jolt/Core/Core.h                        | 26 +++++++++------------
 Jolt/Core/FPControlWord.h               |  2 +-
 Jolt/Core/FPFlushDenormals.h            |  2 +-
 Jolt/Core/TickCounter.h                 |  2 +-
 Jolt/Math/Math.h                        |  4 ++--
 Jolt/Math/Vec3.inl                      |  8 ++-----
 README.md                               |  2 +-
 UnitTests/Core/FPFlushDenormalsTest.cpp |  2 +-
 12 files changed, 54 insertions(+), 29 deletions(-)

diff --git a/.github/workflows/determinism_check.yml b/.github/workflows/determinism_check.yml
index 1b0db0725..95b997e63 100644
--- a/.github/workflows/determinism_check.yml
+++ b/.github/workflows/determinism_check.yml
@@ -10,6 +10,7 @@ env:
   UBUNTU_GCC_AARCH64_VERSION: aarch64-linux-gnu-g++-12
   UBUNTU_GCC_RISCV_VERSION: riscv64-linux-gnu-g++-12
   UBUNTU_GCC_POWERPC_VERSION: powerpc64le-linux-gnu-g++-12
+  UBUNTU_GCC_LOONGARCH_VERSION: loongarch64-linux-gnu-g++-14
 
 on:
   push:
@@ -294,6 +295,35 @@ jobs:
 #      working-directory: ${{github.workspace}}/Build/Linux_Distribution
 #      run: qemu-ppc64le -L /usr/powerpc64le-linux-gnu/ ./PerformanceTest -q=LinearCast -t=max -s=Pyramid -validate_hash=${PYRAMID_HASH}
 
+  loongarch_gcc:
+    runs-on: ubuntu-24.04
+    name: LoongArch GCC Determinism Check
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@v4
+    - name: Update index
+      run: sudo apt-get update
+    - name: Install Cross Compiler
+      run: sudo apt-get install g++-14-loongarch64-linux-gnu gcc-12-multilib g++-12-multilib qemu-user -y
+    - name: Configure CMake
+      working-directory: ${{github.workspace}}/Build
+      run: ./cmake_linux_clang_gcc.sh Distribution ${{env.UBUNTU_GCC_LOONGARCH_VERSION}} -DCROSS_COMPILE_ARM=ON -DCROSS_PLATFORM_DETERMINISTIC=ON -DCROSS_COMPILE_ARM_TARGET="" -DTARGET_VIEWER=OFF -DTARGET_SAMPLES=OFF -DTARGET_HELLO_WORLD=OFF -DTARGET_UNIT_TESTS=ON -DTARGET_PERFORMANCE_TEST=ON
+    - name: Build
+      run: cmake --build ${{github.workspace}}/Build/Linux_Distribution -j $(nproc)
+    - name: Unit Tests
+      working-directory: ${{github.workspace}}/Build/Linux_Distribution
+      run: qemu-loongarch64 -L /usr/loongarch64-linux-gnu/ ./UnitTests
+    - name: Test ConvexVsMesh
+      working-directory: ${{github.workspace}}/Build/Linux_Distribution
+      run: qemu-loongarch64 -L /usr/loongarch64-linux-gnu/ ./PerformanceTest -q=LinearCast -t=max -s=ConvexVsMesh -validate_hash=${CONVEX_VS_MESH_HASH}
+    - name: Test Ragdoll
+      working-directory: ${{github.workspace}}/Build/Linux_Distribution
+      run: qemu-loongarch64 -L /usr/loongarch64-linux-gnu/ ./PerformanceTest -q=LinearCast -t=max -s=Ragdoll -validate_hash=${RAGDOLL_HASH}
+# This is slow so disabled for the moment
+#    - name: Test Pyramid
+#      working-directory: ${{github.workspace}}/Build/Linux_Distribution
+#      run: qemu-loongarch64 -L /usr/loongarch64-linux-gnu/ ./PerformanceTest -q=LinearCast -t=max -s=Pyramid -validate_hash=${PYRAMID_HASH}
+
   emscripten:
     runs-on: ubuntu-latest
     name: Emscripten Determinism Check
diff --git a/Docs/Architecture.md b/Docs/Architecture.md
index 60ac01d13..49c12dede 100644
--- a/Docs/Architecture.md
+++ b/Docs/Architecture.md
@@ -634,6 +634,7 @@ It is quite difficult to verify cross platform determinism, so this feature is l
 * Linux gcc ARM 64-bit with NEON
 * Linux gcc RISC-V 64-bit
 * Linux gcc PowerPC (Little Endian) 64-bit
+* Linux gcc LoongArch 64-bit
 * WASM emscripten running in nodejs
 
 The most important things to look out for in your own application:
diff --git a/Docs/ReleaseNotes.md b/Docs/ReleaseNotes.md
index da887164e..cfda9964f 100644
--- a/Docs/ReleaseNotes.md
+++ b/Docs/ReleaseNotes.md
@@ -14,7 +14,7 @@ For breaking API changes see [this document](https://github.com/jrouwe/JoltPhysi
 * Added `PhysicsSystem::SetSimShapeFilter`. This allows filtering out collisions between sub shapes within a body and can for example be used to have a single body that contains a low detail simulation shape an a high detail collision query shape.
 * Added an example of a body that's both a sensor and a rigid body in `ContactListenerTest`.
 * Added binary serialization to `SkeletalAnimation`.
-* Added support for RISC-V and PowerPC (Little Endian) CPUs.
+* Added support for RISC-V, LoongArch and PowerPC (Little Endian) CPUs.
 
 ### Bug fixes
 
diff --git a/Jolt/ConfigurationString.h b/Jolt/ConfigurationString.h
index 861e7f75c..1ff1969b2 100644
--- a/Jolt/ConfigurationString.h
+++ b/Jolt/ConfigurationString.h
@@ -23,6 +23,8 @@ inline const char *GetConfigurationString()
 	#else
 		"(Little Endian) "
 	#endif
+#elif defined(JPH_CPU_LOONGARCH)
+		"LoongArch "
 #elif defined(JPH_CPU_E2K)
 		"E2K "
 #elif defined(JPH_CPU_WASM)
diff --git a/Jolt/Core/Core.h b/Jolt/Core/Core.h
index f0f0bd72d..01b6afbf1 100644
--- a/Jolt/Core/Core.h
+++ b/Jolt/Core/Core.h
@@ -216,6 +216,16 @@
 	#endif
 	#define JPH_VECTOR_ALIGNMENT 16
 	#define JPH_DVECTOR_ALIGNMENT 8
+#elif defined(__loongarch__)
+	// LoongArch CPU architecture
+	#define JPH_CPU_LOONGARCH
+	#if defined(__loongarch64)
+		#define JPH_CPU_ADDRESS_BITS 64
+	#else
+		#define JPH_CPU_ADDRESS_BITS 32
+	#endif
+	#define JPH_VECTOR_ALIGNMENT 16
+	#define JPH_DVECTOR_ALIGNMENT 8
 #elif defined(__e2k__)
 	// E2K CPU architecture (MCST Elbrus 2000)
 	#define JPH_CPU_E2K
@@ -231,18 +241,6 @@
 	#error Unsupported CPU architecture
 #endif
 
-// CPU helper macros
-#ifdef JPH_CPU_RISCV
-	#define JPH_IF_RISCV(x) x
-#else
-	#define JPH_IF_RISCV(x)
-#endif
-#ifdef JPH_CPU_PPC
-	#define JPH_IF_PPC(x) x
-#else
-	#define JPH_IF_PPC(x)
-#endif
-
 // If this define is set, Jolt is compiled as a shared library
 #ifdef JPH_SHARED_LIBRARY
 	#ifdef JPH_BUILD_SHARED_LIBRARY
@@ -357,8 +355,6 @@
 	JPH_GCC_SUPPRESS_WARNING("-Wpedantic")														\
 	JPH_GCC_SUPPRESS_WARNING("-Wunused-parameter")												\
 	JPH_GCC_SUPPRESS_WARNING("-Wmaybe-uninitialized")											\
-	JPH_IF_RISCV(JPH_GCC_SUPPRESS_WARNING("-Wuninitialized"))									\
-	JPH_IF_PPC(JPH_GCC_SUPPRESS_WARNING("-Wuninitialized"))										\
 																								\
 	JPH_MSVC_SUPPRESS_WARNING(4619) /* #pragma warning: there is no warning number 'XXXX' */	\
 	JPH_MSVC_SUPPRESS_WARNING(4514) /* 'X' : unreferenced inline function has been removed */	\
@@ -397,7 +393,7 @@
 #elif defined(JPH_PLATFORM_LINUX) || defined(JPH_PLATFORM_ANDROID) || defined(JPH_PLATFORM_MACOS) || defined(JPH_PLATFORM_IOS) || defined(JPH_PLATFORM_FREEBSD)
 	#if defined(JPH_CPU_X86)
 		#define JPH_BREAKPOINT	__asm volatile ("int $0x3")
-	#elif defined(JPH_CPU_ARM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_E2K) || defined(JPH_CPU_PPC)
+	#elif defined(JPH_CPU_ARM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_E2K) || defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH)
 		#define JPH_BREAKPOINT	__builtin_trap()
 	#else
 		#error Unknown CPU architecture
diff --git a/Jolt/Core/FPControlWord.h b/Jolt/Core/FPControlWord.h
index 58f621fc4..9fceee463 100644
--- a/Jolt/Core/FPControlWord.h
+++ b/Jolt/Core/FPControlWord.h
@@ -130,7 +130,7 @@ class FPControlWord : public NonCopyable
 
 // RISC-V only implements manually checking if exceptions occurred by reading the fcsr register. It doesn't generate exceptions.
 
-#elif defined(JPH_CPU_PPC)
+#elif defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH)
 
 // Not implemented right now
 
diff --git a/Jolt/Core/FPFlushDenormals.h b/Jolt/Core/FPFlushDenormals.h
index ce32e2bc1..74a2c10b3 100644
--- a/Jolt/Core/FPFlushDenormals.h
+++ b/Jolt/Core/FPFlushDenormals.h
@@ -8,7 +8,7 @@
 
 JPH_NAMESPACE_BEGIN
 
-#if defined(JPH_CPU_WASM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_PPC)
+#if defined(JPH_CPU_WASM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH)
 
 // Not supported
 class FPFlushDenormals { };
diff --git a/Jolt/Core/TickCounter.h b/Jolt/Core/TickCounter.h
index dbbf777a2..7701bd2dc 100644
--- a/Jolt/Core/TickCounter.h
+++ b/Jolt/Core/TickCounter.h
@@ -35,7 +35,7 @@ JPH_INLINE uint64 GetProcessorTickCount()
 	uint64 val;
 	asm volatile("mrs %0, cntvct_el0" : "=r" (val));
 	return val;
-#elif defined(JPH_CPU_ARM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_WASM) || defined(JPH_CPU_PPC)
+#elif defined(JPH_CPU_ARM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_WASM) || defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH)
 	return 0; // Not supported
 #else
 	#error Undefined
diff --git a/Jolt/Math/Math.h b/Jolt/Math/Math.h
index 097d01c1a..221176eea 100644
--- a/Jolt/Math/Math.h
+++ b/Jolt/Math/Math.h
@@ -120,7 +120,7 @@ inline uint CountTrailingZeros(uint32 inValue)
 			return 32;
 		return __builtin_ctz(inValue);
 	#endif
-#elif defined(JPH_CPU_E2K) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_PPC)
+#elif defined(JPH_CPU_E2K) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH)
 	return inValue ? __builtin_ctz(inValue) : 32;
 #else
 	#error Undefined
@@ -150,7 +150,7 @@ inline uint CountLeadingZeros(uint32 inValue)
 	#else
 		return __builtin_clz(inValue);
 	#endif
-#elif defined(JPH_CPU_E2K) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_PPC)
+#elif defined(JPH_CPU_E2K) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH)
 	return inValue ? __builtin_clz(inValue) : 32;
 #else
 	#error Undefined
diff --git a/Jolt/Math/Vec3.inl b/Jolt/Math/Vec3.inl
index 5a47f40ce..dba99360d 100644
--- a/Jolt/Math/Vec3.inl
+++ b/Jolt/Math/Vec3.inl
@@ -64,9 +64,7 @@ Vec3::Vec3(const Float3 &inV)
 	mF32[0] = inV[0];
 	mF32[1] = inV[1];
 	mF32[2] = inV[2];
-	#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
-		mF32[3] = inV[2];
-	#endif
+	mF32[3] = inV[2]; // Not strictly needed when JPH_FLOATING_POINT_EXCEPTIONS_ENABLED is off but prevents warnings about uninitialized variables
 #endif
 }
 
@@ -82,9 +80,7 @@ Vec3::Vec3(float inX, float inY, float inZ)
 	mF32[0] = inX;
 	mF32[1] = inY;
 	mF32[2] = inZ;
-	#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
-		mF32[3] = inZ;
-	#endif
+	mF32[3] = inZ; // Not strictly needed when JPH_FLOATING_POINT_EXCEPTIONS_ENABLED is off but prevents warnings about uninitialized variables
 #endif
 }
 
diff --git a/README.md b/README.md
index da6fbeda1..9fd012d0c 100644
--- a/README.md
+++ b/README.md
@@ -90,7 +90,7 @@ Why create yet another physics engine? Firstly, it has been a personal learning
 ## Supported platforms
 
 * Windows (Desktop or UWP) x86/x64/ARM32/ARM64
-* Linux (tested on Ubuntu) x86/x64/ARM32/ARM64/RISC-V64/PowerPC64LE
+* Linux (tested on Ubuntu) x86/x64/ARM32/ARM64/RISC-V64/LoongArch64/PowerPC64LE
 * FreeBSD
 * Android x86/x64/ARM32/ARM64
 * Platform Blue (a popular game console) x64
diff --git a/UnitTests/Core/FPFlushDenormalsTest.cpp b/UnitTests/Core/FPFlushDenormalsTest.cpp
index 98639451a..6203f4bca 100644
--- a/UnitTests/Core/FPFlushDenormalsTest.cpp
+++ b/UnitTests/Core/FPFlushDenormalsTest.cpp
@@ -6,7 +6,7 @@
 #include <Jolt/Core/FPFlushDenormals.h>
 #include <atomic>
 
-#if !defined(JPH_CPU_WASM) && !defined(JPH_CPU_RISCV) && !defined(JPH_CPU_PPC)
+#if !defined(JPH_CPU_WASM) && !defined(JPH_CPU_RISCV) && !defined(JPH_CPU_PPC) && !defined(JPH_CPU_LOONGARCH)
 
 // Implemented as a global atomic so the compiler can't optimize it to a constant
 extern atomic<float> TestFltMin;