Merge pull request #4007 from Sonicadvance1/ensure_no_256bit_operations

Arm64: Ensure 256-bit operations always assert without 256-bit SVE
FEX-Emu · Aug 28, 2024 · 90f7cc9 · 90f7cc9
2 parents 2039950 + b19440c
commit 90f7cc9
Show file tree

Hide file tree

Showing 4 changed files with 273 additions and 163 deletions.
diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp
@@ -1491,6 +1491,7 @@ DEF_OP(VExtractToGPR) {
 
   const auto Offset = ElementSizeBits * Op->Index;
   const auto Is256Bit = Offset >= SSERegBitSize;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -1511,7 +1512,6 @@ DEF_OP(VExtractToGPR) {
     // when acting on larger register sizes.
     PerformMove(Vector, Op->Index);
   } else {
-    LOGMAN_THROW_AA_FMT(HostSupportsSVE256, "Host doesn't support SVE. Cannot perform 256-bit operation.");
     LOGMAN_THROW_AA_FMT(Is256Bit, "Can't perform 256-bit extraction with op side: {}", OpSize);
     LOGMAN_THROW_AA_FMT(Offset < AVXRegBitSize, "Trying to extract element outside bounds of register. Offset={}, Index={}", Offset, Op->Index);
 

diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/ConversionOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/ConversionOps.cpp
@@ -17,6 +17,7 @@ DEF_OP(VInsGPR) {
   const auto DestIdx = Op->DestIdx;
   const auto ElementSize = Op->Header.ElementSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto SubEmitSize = ConvertSubRegSize8(IROp);
   const auto ElementsPer128Bit = 16 / ElementSize;
@@ -112,6 +113,8 @@ DEF_OP(VDupFromGPR) {
   const auto Src = GetReg(Op->Src.ID());
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto SubEmitSize = ConvertSubRegSize8(IROp);
 
   if (HostSupportsSVE256 && Is256Bit) {
@@ -204,6 +207,7 @@ DEF_OP(Vector_SToF) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubEmitSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -236,6 +240,7 @@ DEF_OP(Vector_FToZS) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubEmitSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -266,6 +271,8 @@ DEF_OP(Vector_FToS) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto SubEmitSize = ConvertSubRegSize248(IROp);
 
   const auto Dst = GetVReg(Node);
@@ -295,6 +302,8 @@ DEF_OP(Vector_FToF) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubEmitSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto Conv = (ElementSize << 8) | Op->SrcElementSize;
 
   const auto Dst = GetVReg(Node);
@@ -396,6 +405,7 @@ DEF_OP(Vector_FToI) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubEmitSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -456,6 +466,7 @@ DEF_OP(Vector_F64ToI32) {
   const auto Round = Op->Round;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());

diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp
@@ -639,6 +639,7 @@ DEF_OP(LoadMem) {
     case 8: ldr(Dst.D(), MemSrc); break;
     case 16: ldr(Dst.Q(), MemSrc); break;
     case 32: {
+      LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
       const auto Operand = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
       ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), PRED_TMP_32B.Zeroing(), Operand);
       break;
@@ -747,6 +748,7 @@ DEF_OP(LoadMemTSO) {
     case 8: ldr(Dst.D(), MemSrc); break;
     case 16: ldr(Dst.Q(), MemSrc); break;
     case 32: {
+      LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
       const auto MemSrc = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
       ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), PRED_TMP_32B.Zeroing(), MemSrc);
       break;
@@ -766,9 +768,7 @@ DEF_OP(VLoadVectorMasked) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
-  if (Is256Bit) {
-    LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VLoadVectorMasked with 256-bit operation");
-  }
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
   const auto SubRegSize = ConvertSubRegSize8(IROp);
 
   const auto CMPPredicate = ARMEmitter::PReg::p0;
@@ -863,9 +863,7 @@ DEF_OP(VStoreVectorMasked) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
-  if (Is256Bit) {
-    LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VStoreVectorMasked with 256-bit operation");
-  }
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
   const auto SubRegSize = ConvertSubRegSize8(IROp);
 
   const auto CMPPredicate = ARMEmitter::PReg::p0;
@@ -1082,9 +1080,7 @@ DEF_OP(VLoadVectorGatherMasked) {
   ///  - AddrBase also doesn't need to exist
   ///     - If the instruction is using 64-bit vector indexing or 32-bit addresses where the top-bit isn't set then this is valid!
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
-  if (Is256Bit) {
-    LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VStoreVectorMasked with 256-bit operation");
-  }
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto IncomingDst = GetVReg(Op->Incoming.ID());
@@ -1310,6 +1306,7 @@ DEF_OP(VBroadcastFromMem) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
   const auto ElementSize = IROp->ElementSize;
 
   const auto Dst = GetVReg(Node);
@@ -1318,11 +1315,6 @@ DEF_OP(VBroadcastFromMem) {
   LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid element "
                                                                                                                          "size");
 
-  if (Is256Bit && !HostSupportsSVE256) {
-    LOGMAN_MSG_A_FMT("{}: 256-bit vectors must support SVE256", __func__);
-    return;
-  }
-
   if (Is256Bit && HostSupportsSVE256) {
     const auto GoverningPredicate = PRED_TMP_32B.Zeroing();
 
@@ -1511,6 +1503,7 @@ DEF_OP(StoreMem) {
       break;
     }
     case 32: {
+      LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
       const auto MemSrc = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
       st1b<ARMEmitter::SubRegSize::i8Bit>(Src.Z(), PRED_TMP_32B, MemSrc);
       break;
@@ -1608,6 +1601,7 @@ DEF_OP(StoreMemTSO) {
     case 8: str(Src.D(), MemSrc); break;
     case 16: str(Src.Q(), MemSrc); break;
     case 32: {
+      LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
       const auto Operand = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
       st1b<ARMEmitter::SubRegSize::i8Bit>(Src.Z(), PRED_TMP_32B, Operand);
       break;
@@ -2158,6 +2152,7 @@ DEF_OP(ParanoidLoadMemTSO) {
       ins(ARMEmitter::SubRegSize::i64Bit, Dst, 1, TMP2);
       break;
     case 32:
+      LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
       dmb(ARMEmitter::BarrierScope::ISH);
       ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), PRED_TMP_32B.Zeroing(), MemReg);
       dmb(ARMEmitter::BarrierScope::ISH);
@@ -2234,6 +2229,7 @@ DEF_OP(ParanoidStoreMemTSO) {
       break;
     }
     case 32: {
+      LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
       dmb(ARMEmitter::BarrierScope::ISH);
       st1b<ARMEmitter::SubRegSize::i8Bit>(Src.Z(), PRED_TMP_32B, MemReg, 0);
       dmb(ARMEmitter::BarrierScope::ISH);
@@ -2360,14 +2356,14 @@ DEF_OP(VStoreNonTemporal) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
   const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE;
 
   const auto Value = GetVReg(Op->Value.ID());
   const auto MemReg = GetReg(Op->Addr.ID());
   const auto Offset = Op->Offset;
 
   if (Is256Bit) {
-    LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VStoreNonTemporal with 256-bit operation");
     const auto GoverningPredicate = PRED_TMP_32B.Zeroing();
     const auto OffsetScaled = Offset / 32;
     stnt1b(Value.Z(), GoverningPredicate, MemReg, OffsetScaled);
@@ -2402,14 +2398,14 @@ DEF_OP(VLoadNonTemporal) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
   const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE;
 
   const auto Dst = GetVReg(Node);
   const auto MemReg = GetReg(Op->Addr.ID());
   const auto Offset = Op->Offset;
 
   if (Is256Bit) {
-    LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VStoreNonTemporal with 256-bit operation");
     const auto GoverningPredicate = PRED_TMP_32B.Zeroing();
     const auto OffsetScaled = Offset / 32;
     ldnt1b(Dst.Z(), GoverningPredicate, MemReg, OffsetScaled);