From f094082aa2bbfcbebc725dbe8b8f65c7d5152886 Mon Sep 17 00:00:00 2001 From: Jorrit Rouwe Date: Sat, 21 Dec 2024 14:13:44 +0100 Subject: [PATCH] LoongArch support (#1411) * Fixed -Wuninitialized warnings when not using SSE or NEON --- .github/workflows/determinism_check.yml | 30 +++++++++++++++++++++++++ Docs/Architecture.md | 1 + Docs/ReleaseNotes.md | 2 +- Jolt/ConfigurationString.h | 2 ++ Jolt/Core/Core.h | 26 +++++++++------------ Jolt/Core/FPControlWord.h | 2 +- Jolt/Core/FPFlushDenormals.h | 2 +- Jolt/Core/TickCounter.h | 2 +- Jolt/Math/Math.h | 4 ++-- Jolt/Math/Vec3.inl | 8 ++----- README.md | 2 +- UnitTests/Core/FPFlushDenormalsTest.cpp | 2 +- 12 files changed, 54 insertions(+), 29 deletions(-) diff --git a/.github/workflows/determinism_check.yml b/.github/workflows/determinism_check.yml index 1b0db0725..95b997e63 100644 --- a/.github/workflows/determinism_check.yml +++ b/.github/workflows/determinism_check.yml @@ -10,6 +10,7 @@ env: UBUNTU_GCC_AARCH64_VERSION: aarch64-linux-gnu-g++-12 UBUNTU_GCC_RISCV_VERSION: riscv64-linux-gnu-g++-12 UBUNTU_GCC_POWERPC_VERSION: powerpc64le-linux-gnu-g++-12 + UBUNTU_GCC_LOONGARCH_VERSION: loongarch64-linux-gnu-g++-14 on: push: @@ -294,6 +295,35 @@ jobs: # working-directory: ${{github.workspace}}/Build/Linux_Distribution # run: qemu-ppc64le -L /usr/powerpc64le-linux-gnu/ ./PerformanceTest -q=LinearCast -t=max -s=Pyramid -validate_hash=${PYRAMID_HASH} + loongarch_gcc: + runs-on: ubuntu-24.04 + name: LoongArch GCC Determinism Check + steps: + - name: Checkout Code + uses: actions/checkout@v4 + - name: Update index + run: sudo apt-get update + - name: Install Cross Compiler + run: sudo apt-get install g++-14-loongarch64-linux-gnu gcc-12-multilib g++-12-multilib qemu-user -y + - name: Configure CMake + working-directory: ${{github.workspace}}/Build + run: ./cmake_linux_clang_gcc.sh Distribution ${{env.UBUNTU_GCC_LOONGARCH_VERSION}} -DCROSS_COMPILE_ARM=ON -DCROSS_PLATFORM_DETERMINISTIC=ON -DCROSS_COMPILE_ARM_TARGET="" -DTARGET_VIEWER=OFF -DTARGET_SAMPLES=OFF -DTARGET_HELLO_WORLD=OFF -DTARGET_UNIT_TESTS=ON -DTARGET_PERFORMANCE_TEST=ON + - name: Build + run: cmake --build ${{github.workspace}}/Build/Linux_Distribution -j $(nproc) + - name: Unit Tests + working-directory: ${{github.workspace}}/Build/Linux_Distribution + run: qemu-loongarch64 -L /usr/loongarch64-linux-gnu/ ./UnitTests + - name: Test ConvexVsMesh + working-directory: ${{github.workspace}}/Build/Linux_Distribution + run: qemu-loongarch64 -L /usr/loongarch64-linux-gnu/ ./PerformanceTest -q=LinearCast -t=max -s=ConvexVsMesh -validate_hash=${CONVEX_VS_MESH_HASH} + - name: Test Ragdoll + working-directory: ${{github.workspace}}/Build/Linux_Distribution + run: qemu-loongarch64 -L /usr/loongarch64-linux-gnu/ ./PerformanceTest -q=LinearCast -t=max -s=Ragdoll -validate_hash=${RAGDOLL_HASH} +# This is slow so disabled for the moment +# - name: Test Pyramid +# working-directory: ${{github.workspace}}/Build/Linux_Distribution +# run: qemu-loongarch64 -L /usr/loongarch64-linux-gnu/ ./PerformanceTest -q=LinearCast -t=max -s=Pyramid -validate_hash=${PYRAMID_HASH} + emscripten: runs-on: ubuntu-latest name: Emscripten Determinism Check diff --git a/Docs/Architecture.md b/Docs/Architecture.md index 60ac01d13..49c12dede 100644 --- a/Docs/Architecture.md +++ b/Docs/Architecture.md @@ -634,6 +634,7 @@ It is quite difficult to verify cross platform determinism, so this feature is l * Linux gcc ARM 64-bit with NEON * Linux gcc RISC-V 64-bit * Linux gcc PowerPC (Little Endian) 64-bit +* Linux gcc LoongArch 64-bit * WASM emscripten running in nodejs The most important things to look out for in your own application: diff --git a/Docs/ReleaseNotes.md b/Docs/ReleaseNotes.md index da887164e..cfda9964f 100644 --- a/Docs/ReleaseNotes.md +++ b/Docs/ReleaseNotes.md @@ -14,7 +14,7 @@ For breaking API changes see [this document](https://github.com/jrouwe/JoltPhysi * Added `PhysicsSystem::SetSimShapeFilter`. This allows filtering out collisions between sub shapes within a body and can for example be used to have a single body that contains a low detail simulation shape an a high detail collision query shape. * Added an example of a body that's both a sensor and a rigid body in `ContactListenerTest`. * Added binary serialization to `SkeletalAnimation`. -* Added support for RISC-V and PowerPC (Little Endian) CPUs. +* Added support for RISC-V, LoongArch and PowerPC (Little Endian) CPUs. ### Bug fixes diff --git a/Jolt/ConfigurationString.h b/Jolt/ConfigurationString.h index 861e7f75c..1ff1969b2 100644 --- a/Jolt/ConfigurationString.h +++ b/Jolt/ConfigurationString.h @@ -23,6 +23,8 @@ inline const char *GetConfigurationString() #else "(Little Endian) " #endif +#elif defined(JPH_CPU_LOONGARCH) + "LoongArch " #elif defined(JPH_CPU_E2K) "E2K " #elif defined(JPH_CPU_WASM) diff --git a/Jolt/Core/Core.h b/Jolt/Core/Core.h index f0f0bd72d..01b6afbf1 100644 --- a/Jolt/Core/Core.h +++ b/Jolt/Core/Core.h @@ -216,6 +216,16 @@ #endif #define JPH_VECTOR_ALIGNMENT 16 #define JPH_DVECTOR_ALIGNMENT 8 +#elif defined(__loongarch__) + // LoongArch CPU architecture + #define JPH_CPU_LOONGARCH + #if defined(__loongarch64) + #define JPH_CPU_ADDRESS_BITS 64 + #else + #define JPH_CPU_ADDRESS_BITS 32 + #endif + #define JPH_VECTOR_ALIGNMENT 16 + #define JPH_DVECTOR_ALIGNMENT 8 #elif defined(__e2k__) // E2K CPU architecture (MCST Elbrus 2000) #define JPH_CPU_E2K @@ -231,18 +241,6 @@ #error Unsupported CPU architecture #endif -// CPU helper macros -#ifdef JPH_CPU_RISCV - #define JPH_IF_RISCV(x) x -#else - #define JPH_IF_RISCV(x) -#endif -#ifdef JPH_CPU_PPC - #define JPH_IF_PPC(x) x -#else - #define JPH_IF_PPC(x) -#endif - // If this define is set, Jolt is compiled as a shared library #ifdef JPH_SHARED_LIBRARY #ifdef JPH_BUILD_SHARED_LIBRARY @@ -357,8 +355,6 @@ JPH_GCC_SUPPRESS_WARNING("-Wpedantic") \ JPH_GCC_SUPPRESS_WARNING("-Wunused-parameter") \ JPH_GCC_SUPPRESS_WARNING("-Wmaybe-uninitialized") \ - JPH_IF_RISCV(JPH_GCC_SUPPRESS_WARNING("-Wuninitialized")) \ - JPH_IF_PPC(JPH_GCC_SUPPRESS_WARNING("-Wuninitialized")) \ \ JPH_MSVC_SUPPRESS_WARNING(4619) /* #pragma warning: there is no warning number 'XXXX' */ \ JPH_MSVC_SUPPRESS_WARNING(4514) /* 'X' : unreferenced inline function has been removed */ \ @@ -397,7 +393,7 @@ #elif defined(JPH_PLATFORM_LINUX) || defined(JPH_PLATFORM_ANDROID) || defined(JPH_PLATFORM_MACOS) || defined(JPH_PLATFORM_IOS) || defined(JPH_PLATFORM_FREEBSD) #if defined(JPH_CPU_X86) #define JPH_BREAKPOINT __asm volatile ("int $0x3") - #elif defined(JPH_CPU_ARM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_E2K) || defined(JPH_CPU_PPC) + #elif defined(JPH_CPU_ARM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_E2K) || defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH) #define JPH_BREAKPOINT __builtin_trap() #else #error Unknown CPU architecture diff --git a/Jolt/Core/FPControlWord.h b/Jolt/Core/FPControlWord.h index 58f621fc4..9fceee463 100644 --- a/Jolt/Core/FPControlWord.h +++ b/Jolt/Core/FPControlWord.h @@ -130,7 +130,7 @@ class FPControlWord : public NonCopyable // RISC-V only implements manually checking if exceptions occurred by reading the fcsr register. It doesn't generate exceptions. -#elif defined(JPH_CPU_PPC) +#elif defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH) // Not implemented right now diff --git a/Jolt/Core/FPFlushDenormals.h b/Jolt/Core/FPFlushDenormals.h index ce32e2bc1..74a2c10b3 100644 --- a/Jolt/Core/FPFlushDenormals.h +++ b/Jolt/Core/FPFlushDenormals.h @@ -8,7 +8,7 @@ JPH_NAMESPACE_BEGIN -#if defined(JPH_CPU_WASM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_PPC) +#if defined(JPH_CPU_WASM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH) // Not supported class FPFlushDenormals { }; diff --git a/Jolt/Core/TickCounter.h b/Jolt/Core/TickCounter.h index dbbf777a2..7701bd2dc 100644 --- a/Jolt/Core/TickCounter.h +++ b/Jolt/Core/TickCounter.h @@ -35,7 +35,7 @@ JPH_INLINE uint64 GetProcessorTickCount() uint64 val; asm volatile("mrs %0, cntvct_el0" : "=r" (val)); return val; -#elif defined(JPH_CPU_ARM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_WASM) || defined(JPH_CPU_PPC) +#elif defined(JPH_CPU_ARM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_WASM) || defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH) return 0; // Not supported #else #error Undefined diff --git a/Jolt/Math/Math.h b/Jolt/Math/Math.h index 097d01c1a..221176eea 100644 --- a/Jolt/Math/Math.h +++ b/Jolt/Math/Math.h @@ -120,7 +120,7 @@ inline uint CountTrailingZeros(uint32 inValue) return 32; return __builtin_ctz(inValue); #endif -#elif defined(JPH_CPU_E2K) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_PPC) +#elif defined(JPH_CPU_E2K) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH) return inValue ? __builtin_ctz(inValue) : 32; #else #error Undefined @@ -150,7 +150,7 @@ inline uint CountLeadingZeros(uint32 inValue) #else return __builtin_clz(inValue); #endif -#elif defined(JPH_CPU_E2K) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_PPC) +#elif defined(JPH_CPU_E2K) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH) return inValue ? __builtin_clz(inValue) : 32; #else #error Undefined diff --git a/Jolt/Math/Vec3.inl b/Jolt/Math/Vec3.inl index 5a47f40ce..dba99360d 100644 --- a/Jolt/Math/Vec3.inl +++ b/Jolt/Math/Vec3.inl @@ -64,9 +64,7 @@ Vec3::Vec3(const Float3 &inV) mF32[0] = inV[0]; mF32[1] = inV[1]; mF32[2] = inV[2]; - #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED - mF32[3] = inV[2]; - #endif + mF32[3] = inV[2]; // Not strictly needed when JPH_FLOATING_POINT_EXCEPTIONS_ENABLED is off but prevents warnings about uninitialized variables #endif } @@ -82,9 +80,7 @@ Vec3::Vec3(float inX, float inY, float inZ) mF32[0] = inX; mF32[1] = inY; mF32[2] = inZ; - #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED - mF32[3] = inZ; - #endif + mF32[3] = inZ; // Not strictly needed when JPH_FLOATING_POINT_EXCEPTIONS_ENABLED is off but prevents warnings about uninitialized variables #endif } diff --git a/README.md b/README.md index da6fbeda1..9fd012d0c 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ Why create yet another physics engine? Firstly, it has been a personal learning ## Supported platforms * Windows (Desktop or UWP) x86/x64/ARM32/ARM64 -* Linux (tested on Ubuntu) x86/x64/ARM32/ARM64/RISC-V64/PowerPC64LE +* Linux (tested on Ubuntu) x86/x64/ARM32/ARM64/RISC-V64/LoongArch64/PowerPC64LE * FreeBSD * Android x86/x64/ARM32/ARM64 * Platform Blue (a popular game console) x64 diff --git a/UnitTests/Core/FPFlushDenormalsTest.cpp b/UnitTests/Core/FPFlushDenormalsTest.cpp index 98639451a..6203f4bca 100644 --- a/UnitTests/Core/FPFlushDenormalsTest.cpp +++ b/UnitTests/Core/FPFlushDenormalsTest.cpp @@ -6,7 +6,7 @@ #include #include -#if !defined(JPH_CPU_WASM) && !defined(JPH_CPU_RISCV) && !defined(JPH_CPU_PPC) +#if !defined(JPH_CPU_WASM) && !defined(JPH_CPU_RISCV) && !defined(JPH_CPU_PPC) && !defined(JPH_CPU_LOONGARCH) // Implemented as a global atomic so the compiler can't optimize it to a constant extern atomic TestFltMin;