From b19440c78fa5bf44a67cce4f30468892c9dd7a13 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sun, 25 Aug 2024 06:22:45 -0700 Subject: [PATCH] Arm64: Ensure 256-bit operations always assert without 256-bit SVE Our JIT will happily consume incorrectly formed 256-bit vector operations in a lot of cases when the host CPU doesn't support 256-bit SVE. This is what caused the bug in #4006. For every vector operation that can consume a 256-bit size, add an assert that always checks if 256-bit SVE is supported in those cases. This will ensure that #4006 doesn't happen again. --- .../Interface/Core/JIT/Arm64/ALUOps.cpp | 2 +- .../Core/JIT/Arm64/ConversionOps.cpp | 11 + .../Interface/Core/JIT/Arm64/MemoryOps.cpp | 28 +- .../Interface/Core/JIT/Arm64/VectorOps.cpp | 395 +++++++++++------- 4 files changed, 273 insertions(+), 163 deletions(-) diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp index 69bcd95106..30268973c3 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp @@ -1491,6 +1491,7 @@ DEF_OP(VExtractToGPR) { const auto Offset = ElementSizeBits * Op->Index; const auto Is256Bit = Offset >= SSERegBitSize; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -1511,7 +1512,6 @@ DEF_OP(VExtractToGPR) { // when acting on larger register sizes. PerformMove(Vector, Op->Index); } else { - LOGMAN_THROW_AA_FMT(HostSupportsSVE256, "Host doesn't support SVE. Cannot perform 256-bit operation."); LOGMAN_THROW_AA_FMT(Is256Bit, "Can't perform 256-bit extraction with op side: {}", OpSize); LOGMAN_THROW_AA_FMT(Offset < AVXRegBitSize, "Trying to extract element outside bounds of register. Offset={}, Index={}", Offset, Op->Index); diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/ConversionOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/ConversionOps.cpp index b633ead4e5..f062cc6dcf 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/ConversionOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/ConversionOps.cpp @@ -17,6 +17,7 @@ DEF_OP(VInsGPR) { const auto DestIdx = Op->DestIdx; const auto ElementSize = Op->Header.ElementSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto SubEmitSize = ConvertSubRegSize8(IROp); const auto ElementsPer128Bit = 16 / ElementSize; @@ -112,6 +113,8 @@ DEF_OP(VDupFromGPR) { const auto Src = GetReg(Op->Src.ID()); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); + const auto SubEmitSize = ConvertSubRegSize8(IROp); if (HostSupportsSVE256 && Is256Bit) { @@ -204,6 +207,7 @@ DEF_OP(Vector_SToF) { const auto ElementSize = Op->Header.ElementSize; const auto SubEmitSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -236,6 +240,7 @@ DEF_OP(Vector_FToZS) { const auto ElementSize = Op->Header.ElementSize; const auto SubEmitSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -266,6 +271,8 @@ DEF_OP(Vector_FToS) { const auto OpSize = IROp->Size; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); + const auto SubEmitSize = ConvertSubRegSize248(IROp); const auto Dst = GetVReg(Node); @@ -295,6 +302,8 @@ DEF_OP(Vector_FToF) { const auto ElementSize = Op->Header.ElementSize; const auto SubEmitSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); + const auto Conv = (ElementSize << 8) | Op->SrcElementSize; const auto Dst = GetVReg(Node); @@ -396,6 +405,7 @@ DEF_OP(Vector_FToI) { const auto ElementSize = Op->Header.ElementSize; const auto SubEmitSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -456,6 +466,7 @@ DEF_OP(Vector_F64ToI32) { const auto Round = Op->Round; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp index 323b41d02e..3560ce7411 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp @@ -639,6 +639,7 @@ DEF_OP(LoadMem) { case 8: ldr(Dst.D(), MemSrc); break; case 16: ldr(Dst.Q(), MemSrc); break; case 32: { + LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Operand = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale); ld1b(Dst.Z(), PRED_TMP_32B.Zeroing(), Operand); break; @@ -747,6 +748,7 @@ DEF_OP(LoadMemTSO) { case 8: ldr(Dst.D(), MemSrc); break; case 16: ldr(Dst.Q(), MemSrc); break; case 32: { + LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto MemSrc = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale); ld1b(Dst.Z(), PRED_TMP_32B.Zeroing(), MemSrc); break; @@ -766,9 +768,7 @@ DEF_OP(VLoadVectorMasked) { const auto OpSize = IROp->Size; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; - if (Is256Bit) { - LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VLoadVectorMasked with 256-bit operation"); - } + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto SubRegSize = ConvertSubRegSize8(IROp); const auto CMPPredicate = ARMEmitter::PReg::p0; @@ -863,9 +863,7 @@ DEF_OP(VStoreVectorMasked) { const auto OpSize = IROp->Size; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; - if (Is256Bit) { - LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VStoreVectorMasked with 256-bit operation"); - } + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto SubRegSize = ConvertSubRegSize8(IROp); const auto CMPPredicate = ARMEmitter::PReg::p0; @@ -1082,9 +1080,7 @@ DEF_OP(VLoadVectorGatherMasked) { /// - AddrBase also doesn't need to exist /// - If the instruction is using 64-bit vector indexing or 32-bit addresses where the top-bit isn't set then this is valid! const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; - if (Is256Bit) { - LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VStoreVectorMasked with 256-bit operation"); - } + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto IncomingDst = GetVReg(Op->Incoming.ID()); @@ -1310,6 +1306,7 @@ DEF_OP(VBroadcastFromMem) { const auto OpSize = IROp->Size; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto ElementSize = IROp->ElementSize; const auto Dst = GetVReg(Node); @@ -1318,11 +1315,6 @@ DEF_OP(VBroadcastFromMem) { LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid element " "size"); - if (Is256Bit && !HostSupportsSVE256) { - LOGMAN_MSG_A_FMT("{}: 256-bit vectors must support SVE256", __func__); - return; - } - if (Is256Bit && HostSupportsSVE256) { const auto GoverningPredicate = PRED_TMP_32B.Zeroing(); @@ -1511,6 +1503,7 @@ DEF_OP(StoreMem) { break; } case 32: { + LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto MemSrc = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale); st1b(Src.Z(), PRED_TMP_32B, MemSrc); break; @@ -1608,6 +1601,7 @@ DEF_OP(StoreMemTSO) { case 8: str(Src.D(), MemSrc); break; case 16: str(Src.Q(), MemSrc); break; case 32: { + LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Operand = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale); st1b(Src.Z(), PRED_TMP_32B, Operand); break; @@ -2158,6 +2152,7 @@ DEF_OP(ParanoidLoadMemTSO) { ins(ARMEmitter::SubRegSize::i64Bit, Dst, 1, TMP2); break; case 32: + LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__); dmb(ARMEmitter::BarrierScope::ISH); ld1b(Dst.Z(), PRED_TMP_32B.Zeroing(), MemReg); dmb(ARMEmitter::BarrierScope::ISH); @@ -2234,6 +2229,7 @@ DEF_OP(ParanoidStoreMemTSO) { break; } case 32: { + LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__); dmb(ARMEmitter::BarrierScope::ISH); st1b(Src.Z(), PRED_TMP_32B, MemReg, 0); dmb(ARMEmitter::BarrierScope::ISH); @@ -2360,6 +2356,7 @@ DEF_OP(VStoreNonTemporal) { const auto OpSize = IROp->Size; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE; const auto Value = GetVReg(Op->Value.ID()); @@ -2367,7 +2364,6 @@ DEF_OP(VStoreNonTemporal) { const auto Offset = Op->Offset; if (Is256Bit) { - LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VStoreNonTemporal with 256-bit operation"); const auto GoverningPredicate = PRED_TMP_32B.Zeroing(); const auto OffsetScaled = Offset / 32; stnt1b(Value.Z(), GoverningPredicate, MemReg, OffsetScaled); @@ -2402,6 +2398,7 @@ DEF_OP(VLoadNonTemporal) { const auto OpSize = IROp->Size; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE; const auto Dst = GetVReg(Node); @@ -2409,7 +2406,6 @@ DEF_OP(VLoadNonTemporal) { const auto Offset = Op->Offset; if (Is256Bit) { - LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VStoreNonTemporal with 256-bit operation"); const auto GoverningPredicate = PRED_TMP_32B.Zeroing(); const auto OffsetScaled = Offset / 32; ldnt1b(Dst.Z(), GoverningPredicate, MemReg, OffsetScaled); diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp index 944d59818f..8e3d8d5229 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp @@ -13,162 +13,168 @@ tags: backend|arm64 namespace FEXCore::CPU { #define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const* IROp, IR::NodeID Node) -#define DEF_UNOP(FEXOp, ARMOp, ScalarCase) \ - DEF_OP(FEXOp) { \ - const auto Op = IROp->C(); \ - const auto OpSize = IROp->Size; \ - \ - const auto ElementSize = Op->Header.ElementSize; \ - const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; \ - const auto SubRegSize = ConvertSubRegSize8(IROp); \ - \ - const auto Dst = GetVReg(Node); \ - const auto Src = GetVReg(Op->Vector.ID()); \ - \ - if (HostSupportsSVE256 && Is256Bit) { \ - ARMOp(SubRegSize, Dst.Z(), PRED_TMP_32B.Merging(), Src.Z()); \ - } else { \ - if (ElementSize == OpSize && ScalarCase) { \ - ARMOp(SubRegSize, Dst.D(), Src.D()); \ - } else { \ - ARMOp(SubRegSize, Dst.Q(), Src.Q()); \ - } \ - } \ +#define DEF_UNOP(FEXOp, ARMOp, ScalarCase) \ + DEF_OP(FEXOp) { \ + const auto Op = IROp->C(); \ + const auto OpSize = IROp->Size; \ + \ + const auto ElementSize = Op->Header.ElementSize; \ + const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; \ + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); \ + const auto SubRegSize = ConvertSubRegSize8(IROp); \ + \ + const auto Dst = GetVReg(Node); \ + const auto Src = GetVReg(Op->Vector.ID()); \ + \ + if (HostSupportsSVE256 && Is256Bit) { \ + ARMOp(SubRegSize, Dst.Z(), PRED_TMP_32B.Merging(), Src.Z()); \ + } else { \ + if (ElementSize == OpSize && ScalarCase) { \ + ARMOp(SubRegSize, Dst.D(), Src.D()); \ + } else { \ + ARMOp(SubRegSize, Dst.Q(), Src.Q()); \ + } \ + } \ } -#define DEF_BITOP(FEXOp, ARMOp) \ - DEF_OP(FEXOp) { \ - const auto Op = IROp->C(); \ - const auto OpSize = IROp->Size; \ - const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; \ - \ - const auto Dst = GetVReg(Node); \ - const auto Vector1 = GetVReg(Op->Vector1.ID()); \ - const auto Vector2 = GetVReg(Op->Vector2.ID()); \ - \ - if (HostSupportsSVE256 && Is256Bit) { \ - ARMOp(Dst.Z(), Vector1.Z(), Vector2.Z()); \ - } else { \ - ARMOp(Dst.Q(), Vector1.Q(), Vector2.Q()); \ - } \ +#define DEF_BITOP(FEXOp, ARMOp) \ + DEF_OP(FEXOp) { \ + const auto Op = IROp->C(); \ + const auto OpSize = IROp->Size; \ + const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; \ + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); \ + \ + const auto Dst = GetVReg(Node); \ + const auto Vector1 = GetVReg(Op->Vector1.ID()); \ + const auto Vector2 = GetVReg(Op->Vector2.ID()); \ + \ + if (HostSupportsSVE256 && Is256Bit) { \ + ARMOp(Dst.Z(), Vector1.Z(), Vector2.Z()); \ + } else { \ + ARMOp(Dst.Q(), Vector1.Q(), Vector2.Q()); \ + } \ } -#define DEF_BINOP(FEXOp, ARMOp) \ - DEF_OP(FEXOp) { \ - const auto Op = IROp->C(); \ - const auto OpSize = IROp->Size; \ - \ - const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; \ - const auto SubRegSize = ConvertSubRegSize8(IROp); \ - \ - const auto Dst = GetVReg(Node); \ - const auto Vector1 = GetVReg(Op->Vector1.ID()); \ - const auto Vector2 = GetVReg(Op->Vector2.ID()); \ - \ - if (HostSupportsSVE256 && Is256Bit) { \ - ARMOp(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z()); \ - } else { \ - ARMOp(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q()); \ - } \ +#define DEF_BINOP(FEXOp, ARMOp) \ + DEF_OP(FEXOp) { \ + const auto Op = IROp->C(); \ + const auto OpSize = IROp->Size; \ + \ + const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; \ + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); \ + const auto SubRegSize = ConvertSubRegSize8(IROp); \ + \ + const auto Dst = GetVReg(Node); \ + const auto Vector1 = GetVReg(Op->Vector1.ID()); \ + const auto Vector2 = GetVReg(Op->Vector2.ID()); \ + \ + if (HostSupportsSVE256 && Is256Bit) { \ + ARMOp(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z()); \ + } else { \ + ARMOp(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q()); \ + } \ } -#define DEF_ZIPOP(FEXOp, ARMOp) \ - DEF_OP(FEXOp) { \ - const auto Op = IROp->C(); \ - const auto OpSize = IROp->Size; \ - \ - const auto SubRegSize = ConvertSubRegSize8(IROp); \ - const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; \ - \ - const auto Dst = GetVReg(Node); \ - const auto VectorLower = GetVReg(Op->VectorLower.ID()); \ - const auto VectorUpper = GetVReg(Op->VectorUpper.ID()); \ - \ - if (HostSupportsSVE256 && Is256Bit) { \ - ARMOp(SubRegSize, Dst.Z(), VectorLower.Z(), VectorUpper.Z()); \ - } else { \ - if (OpSize == 8) { \ - ARMOp(SubRegSize, Dst.D(), VectorLower.D(), VectorUpper.D()); \ - } else { \ - ARMOp(SubRegSize, Dst.Q(), VectorLower.Q(), VectorUpper.Q()); \ - } \ - } \ +#define DEF_ZIPOP(FEXOp, ARMOp) \ + DEF_OP(FEXOp) { \ + const auto Op = IROp->C(); \ + const auto OpSize = IROp->Size; \ + \ + const auto SubRegSize = ConvertSubRegSize8(IROp); \ + const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; \ + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); \ + \ + const auto Dst = GetVReg(Node); \ + const auto VectorLower = GetVReg(Op->VectorLower.ID()); \ + const auto VectorUpper = GetVReg(Op->VectorUpper.ID()); \ + \ + if (HostSupportsSVE256 && Is256Bit) { \ + ARMOp(SubRegSize, Dst.Z(), VectorLower.Z(), VectorUpper.Z()); \ + } else { \ + if (OpSize == 8) { \ + ARMOp(SubRegSize, Dst.D(), VectorLower.D(), VectorUpper.D()); \ + } else { \ + ARMOp(SubRegSize, Dst.Q(), VectorLower.Q(), VectorUpper.Q()); \ + } \ + } \ } -#define DEF_FUNOP(FEXOp, ARMOp) \ - DEF_OP(FEXOp) { \ - const auto Op = IROp->C(); \ - const auto OpSize = IROp->Size; \ - \ - const auto ElementSize = Op->Header.ElementSize; \ - const auto SubRegSize = ConvertSubRegSize248(IROp); \ - const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; \ - \ - const auto Dst = GetVReg(Node); \ - const auto Src = GetVReg(Op->Vector.ID()); \ - \ - if (HostSupportsSVE256 && Is256Bit) { \ - ARMOp(SubRegSize, Dst.Z(), PRED_TMP_32B.Merging(), Src.Z()); \ - } else { \ - if (ElementSize == OpSize) { \ - switch (ElementSize) { \ - case 2: { \ - ARMOp(Dst.H(), Src.H()); \ - break; \ - } \ - case 4: { \ - ARMOp(Dst.S(), Src.S()); \ - break; \ - } \ - case 8: { \ - ARMOp(Dst.D(), Src.D()); \ - break; \ - } \ - default: break; \ - } \ - } else { \ - ARMOp(SubRegSize, Dst.Q(), Src.Q()); \ - } \ - } \ +#define DEF_FUNOP(FEXOp, ARMOp) \ + DEF_OP(FEXOp) { \ + const auto Op = IROp->C(); \ + const auto OpSize = IROp->Size; \ + \ + const auto ElementSize = Op->Header.ElementSize; \ + const auto SubRegSize = ConvertSubRegSize248(IROp); \ + const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; \ + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); \ + \ + const auto Dst = GetVReg(Node); \ + const auto Src = GetVReg(Op->Vector.ID()); \ + \ + if (HostSupportsSVE256 && Is256Bit) { \ + ARMOp(SubRegSize, Dst.Z(), PRED_TMP_32B.Merging(), Src.Z()); \ + } else { \ + if (ElementSize == OpSize) { \ + switch (ElementSize) { \ + case 2: { \ + ARMOp(Dst.H(), Src.H()); \ + break; \ + } \ + case 4: { \ + ARMOp(Dst.S(), Src.S()); \ + break; \ + } \ + case 8: { \ + ARMOp(Dst.D(), Src.D()); \ + break; \ + } \ + default: break; \ + } \ + } else { \ + ARMOp(SubRegSize, Dst.Q(), Src.Q()); \ + } \ + } \ } -#define DEF_FBINOP(FEXOp, ARMOp) \ - DEF_OP(FEXOp) { \ - const auto Op = IROp->C(); \ - const auto OpSize = IROp->Size; \ - \ - const auto ElementSize = Op->Header.ElementSize; \ - const auto SubRegSize = ConvertSubRegSize248(IROp); \ - const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; \ - const auto IsScalar = ElementSize == OpSize; \ - \ - const auto Dst = GetVReg(Node); \ - const auto Vector1 = GetVReg(Op->Vector1.ID()); \ - const auto Vector2 = GetVReg(Op->Vector2.ID()); \ - \ - if (HostSupportsSVE256 && Is256Bit) { \ - ARMOp(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z()); \ - } else { \ - if (IsScalar) { \ - switch (ElementSize) { \ - case 2: { \ - ARMOp(Dst.H(), Vector1.H(), Vector2.H()); \ - break; \ - } \ - case 4: { \ - ARMOp(Dst.S(), Vector1.S(), Vector2.S()); \ - break; \ - } \ - case 8: { \ - ARMOp(Dst.D(), Vector1.D(), Vector2.D()); \ - break; \ - } \ - default: break; \ - } \ - } else { \ - ARMOp(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q()); \ - } \ - } \ +#define DEF_FBINOP(FEXOp, ARMOp) \ + DEF_OP(FEXOp) { \ + const auto Op = IROp->C(); \ + const auto OpSize = IROp->Size; \ + \ + const auto ElementSize = Op->Header.ElementSize; \ + const auto SubRegSize = ConvertSubRegSize248(IROp); \ + const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; \ + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); \ + const auto IsScalar = ElementSize == OpSize; \ + \ + const auto Dst = GetVReg(Node); \ + const auto Vector1 = GetVReg(Op->Vector1.ID()); \ + const auto Vector2 = GetVReg(Op->Vector2.ID()); \ + \ + if (HostSupportsSVE256 && Is256Bit) { \ + ARMOp(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z()); \ + } else { \ + if (IsScalar) { \ + switch (ElementSize) { \ + case 2: { \ + ARMOp(Dst.H(), Vector1.H(), Vector2.H()); \ + break; \ + } \ + case 4: { \ + ARMOp(Dst.S(), Vector1.S(), Vector2.S()); \ + break; \ + } \ + case 8: { \ + ARMOp(Dst.D(), Vector1.D(), Vector2.D()); \ + break; \ + } \ + default: break; \ + } \ + } else { \ + ARMOp(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q()); \ + } \ + } \ } #define DEF_FBINOP_SCALAR_INSERT(FEXOp, ARMOp) \ @@ -285,6 +291,7 @@ void Arm64JITCore::VFScalarFMAOperation(uint8_t OpSize, uint8_t ElementSize, Sca void Arm64JITCore::VFScalarOperation(uint8_t OpSize, uint8_t ElementSize, bool ZeroUpperBits, ScalarBinaryOpCaller ScalarEmit, ARMEmitter::VRegister Dst, ARMEmitter::VRegister Vector1, ARMEmitter::VRegister Vector2) { const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); LOGMAN_THROW_A_FMT(Is256Bit || !ZeroUpperBits, "128-bit operation doesn't support ZeroUpperBits in {}", __func__); // Bit of a tricky detail. @@ -357,6 +364,7 @@ void Arm64JITCore::VFScalarUnaryOperation(uint8_t OpSize, uint8_t ElementSize, b ARMEmitter::VRegister Dst, ARMEmitter::VRegister Vector1, std::variant Vector2) { const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); LOGMAN_THROW_A_FMT(Is256Bit || !ZeroUpperBits, "128-bit operation doesn't support ZeroUpperBits in {}", __func__); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); @@ -647,6 +655,7 @@ DEF_OP(VSToFVectorInsert) { // Dealing with the odd case of this being actually a vector operation rather than scalar. const auto Is256Bit = IROp->Size == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); constexpr auto Predicate = ARMEmitter::PReg::p0; ScalarEmit(VTMP1, Vector2); @@ -746,6 +755,7 @@ DEF_OP(VFCMPScalarInsert) { const auto ZeroUpperBits = Op->ZeroUpperBits; const auto Is256Bit = IROp->Size == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); auto ScalarEmitEQ = [this, SubRegSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) { switch (SubRegSize.Scalar) { @@ -903,6 +913,7 @@ DEF_OP(VectorImm) { const auto OpSize = IROp->Size; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto ElementSize = Op->Header.ElementSize; const auto SubRegSize = ConvertSubRegSize8(IROp); @@ -1057,6 +1068,7 @@ DEF_OP(VAddP) { const auto OpSize = IROp->Size; const auto IsScalar = OpSize == 8; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Dst = GetVReg(Node); @@ -1092,6 +1104,7 @@ DEF_OP(VFAddV) { const auto Op = IROp->C(); const auto OpSize = IROp->Size; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto ElementSize = Op->Header.ElementSize; const auto SubRegSize = ConvertSubRegSizePair248(IROp); @@ -1126,6 +1139,7 @@ DEF_OP(VAddV) { const auto ElementSize = Op->Header.ElementSize; const auto SubRegSize = ConvertSubRegSizePair8(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -1159,6 +1173,8 @@ DEF_OP(VUMinV) { const auto OpSize = IROp->Size; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); + const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Dst = GetVReg(Node); @@ -1178,6 +1194,8 @@ DEF_OP(VUMaxV) { const auto OpSize = IROp->Size; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); + const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Dst = GetVReg(Node); @@ -1198,6 +1216,7 @@ DEF_OP(VURAvg) { const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -1229,6 +1248,8 @@ DEF_OP(VFAddP) { const auto OpSize = IROp->Size; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); + const auto SubRegSize = ConvertSubRegSize248(IROp); const auto Dst = GetVReg(Node); @@ -1264,6 +1285,7 @@ DEF_OP(VFDiv) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto IsScalar = ElementSize == OpSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -1319,6 +1341,7 @@ DEF_OP(VFMin) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto IsScalar = ElementSize == OpSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -1409,6 +1432,7 @@ DEF_OP(VFMax) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto IsScalar = ElementSize == OpSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -1485,6 +1509,7 @@ DEF_OP(VFRecp) { const auto SubRegSize = ConvertSubRegSizePair16(IROp); const auto IsScalar = Op->Header.ElementSize == OpSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -1550,6 +1575,7 @@ DEF_OP(VFRSqrt) { const auto SubRegSize = ConvertSubRegSizePair16(IROp); const auto IsScalar = ElementSize == OpSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -1614,6 +1640,7 @@ DEF_OP(VNot) { const auto Op = IROp->C(); const auto OpSize = IROp->Size; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -1632,6 +1659,7 @@ DEF_OP(VUMin) { const auto ElementSize = Op->Header.ElementSize; const auto SubRegSize = ConvertSubRegSize16(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -1680,6 +1708,7 @@ DEF_OP(VSMin) { const auto ElementSize = Op->Header.ElementSize; const auto SubRegSize = ConvertSubRegSize16(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -1728,6 +1757,7 @@ DEF_OP(VUMax) { const auto ElementSize = Op->Header.ElementSize; const auto SubRegSize = ConvertSubRegSize16(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -1776,6 +1806,7 @@ DEF_OP(VSMax) { const auto ElementSize = Op->Header.ElementSize; const auto SubRegSize = ConvertSubRegSize16(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -1821,6 +1852,8 @@ DEF_OP(VBSL) { const auto Op = IROp->C(); const auto OpSize = IROp->Size; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); + const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE; const auto Dst = GetVReg(Node); @@ -1890,6 +1923,7 @@ DEF_OP(VCMPEQ) { const auto SubRegSize = ConvertSubRegSizePair16(IROp); const auto IsScalar = ElementSize == OpSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -1929,6 +1963,7 @@ DEF_OP(VCMPEQZ) { const auto SubRegSize = ConvertSubRegSizePair16(IROp); const auto IsScalar = ElementSize == OpSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -1970,6 +2005,7 @@ DEF_OP(VCMPGT) { const auto SubRegSize = ConvertSubRegSizePair16(IROp); const auto IsScalar = ElementSize == OpSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -2009,6 +2045,7 @@ DEF_OP(VCMPGTZ) { const auto SubRegSize = ConvertSubRegSizePair16(IROp); const auto IsScalar = ElementSize == OpSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -2047,6 +2084,7 @@ DEF_OP(VCMPLTZ) { const auto SubRegSize = ConvertSubRegSizePair16(IROp); const auto IsScalar = ElementSize == OpSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -2085,6 +2123,7 @@ DEF_OP(VFCMPEQ) { const auto SubRegSize = ConvertSubRegSizePair248(IROp); const auto IsScalar = ElementSize == OpSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -2123,6 +2162,7 @@ DEF_OP(VFCMPNEQ) { const auto SubRegSize = ConvertSubRegSizePair248(IROp); const auto IsScalar = ElementSize == OpSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -2163,6 +2203,7 @@ DEF_OP(VFCMPLT) { const auto SubRegSize = ConvertSubRegSizePair248(IROp); const auto IsScalar = ElementSize == OpSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -2201,6 +2242,7 @@ DEF_OP(VFCMPGT) { const auto SubRegSize = ConvertSubRegSizePair248(IROp); const auto IsScalar = ElementSize == OpSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -2239,6 +2281,7 @@ DEF_OP(VFCMPLE) { const auto SubRegSize = ConvertSubRegSizePair248(IROp); const auto IsScalar = ElementSize == OpSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -2277,6 +2320,7 @@ DEF_OP(VFCMPORD) { const auto SubRegSize = ConvertSubRegSizePair248(IROp); const auto IsScalar = ElementSize == OpSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -2327,6 +2371,7 @@ DEF_OP(VFCMPUNO) { const auto SubRegSize = ConvertSubRegSizePair248(IROp); const auto IsScalar = ElementSize == OpSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -2375,6 +2420,8 @@ DEF_OP(VUShl) { const auto ElementSize = IROp->ElementSize; const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); + const auto MaxShift = ElementSize * 8; const auto Dst = GetVReg(Node); @@ -2429,6 +2476,8 @@ DEF_OP(VUShr) { const auto ElementSize = IROp->ElementSize; const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); + const auto MaxShift = ElementSize * 8; const auto Dst = GetVReg(Node); @@ -2486,6 +2535,8 @@ DEF_OP(VSShr) { const auto ElementSize = IROp->ElementSize; const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); + const auto MaxShift = (ElementSize * 8) - 1; const auto RangeCheck = Op->RangeCheck; @@ -2542,6 +2593,7 @@ DEF_OP(VUShlS) { const auto SubRegSize = ConvertSubRegSize16(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto ShiftScalar = GetVReg(Op->ShiftScalar.ID()); @@ -2570,6 +2622,7 @@ DEF_OP(VUShrS) { const auto SubRegSize = ConvertSubRegSize16(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto ShiftScalar = GetVReg(Op->ShiftScalar.ID()); @@ -2600,6 +2653,7 @@ DEF_OP(VUShrSWide) { const auto ElementSize = Op->Header.ElementSize; const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto ShiftScalar = GetVReg(Op->ShiftScalar.ID()); @@ -2665,6 +2719,7 @@ DEF_OP(VSShrSWide) { const auto ElementSize = Op->Header.ElementSize; const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto ShiftScalar = GetVReg(Op->ShiftScalar.ID()); @@ -2730,6 +2785,7 @@ DEF_OP(VUShlSWide) { const auto ElementSize = Op->Header.ElementSize; const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto ShiftScalar = GetVReg(Op->ShiftScalar.ID()); @@ -2793,6 +2849,7 @@ DEF_OP(VSShrS) { const auto SubRegSize = ConvertSubRegSize16(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto ShiftScalar = GetVReg(Op->ShiftScalar.ID()); @@ -2820,6 +2877,7 @@ DEF_OP(VInsElement) { const auto Op = IROp->C(); const auto OpSize = IROp->Size; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const uint32_t ElementSize = Op->Header.ElementSize; const auto SubRegSize = ConvertSubRegSize16(IROp); @@ -2902,6 +2960,8 @@ DEF_OP(VDupElement) { const auto Index = Op->Index; const auto SubRegSize = ConvertSubRegSize16(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); + const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE; const auto Dst = GetVReg(Node); @@ -2922,6 +2982,7 @@ DEF_OP(VExtr) { const auto Op = IROp->C(); const auto OpSize = IROp->Size; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); // AArch64 ext op has bit arrangement as [Vm:Vn] so arguments need to be swapped const auto Dst = GetVReg(Node); @@ -2973,6 +3034,7 @@ DEF_OP(VUShrI) { const auto ElementSize = Op->Header.ElementSize; const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -3014,6 +3076,7 @@ DEF_OP(VUShraI) { const auto BitShift = Op->BitShift; const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto DestVector = GetVReg(Op->DestVector.ID()); @@ -3056,6 +3119,7 @@ DEF_OP(VSShrI) { const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Shift = std::min(uint8_t(ElementSize * 8 - 1), Op->BitShift); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -3094,6 +3158,7 @@ DEF_OP(VShlI) { const auto ElementSize = Op->Header.ElementSize; const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -3135,6 +3200,7 @@ DEF_OP(VUShrNI) { const auto BitShift = Op->BitShift; const auto SubRegSize = ConvertSubRegSize4(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -3154,6 +3220,7 @@ DEF_OP(VUShrNI2) { const auto BitShift = Op->BitShift; const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto VectorLower = GetVReg(Op->VectorLower.ID()); @@ -3190,6 +3257,7 @@ DEF_OP(VSXTL) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -3207,6 +3275,7 @@ DEF_OP(VSXTL2) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -3224,6 +3293,7 @@ DEF_OP(VSSHLL) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -3245,6 +3315,7 @@ DEF_OP(VSSHLL2) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -3266,6 +3337,7 @@ DEF_OP(VUXTL) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -3283,6 +3355,7 @@ DEF_OP(VUXTL2) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -3300,6 +3373,7 @@ DEF_OP(VSQXTN) { const auto SubRegSize = ConvertSubRegSize4(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -3351,6 +3425,7 @@ DEF_OP(VSQXTN2) { const auto SubRegSize = ConvertSubRegSize4(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto VectorLower = GetVReg(Op->VectorLower.ID()); @@ -3394,6 +3469,7 @@ DEF_OP(VSQXTNPair) { const auto SubRegSize = ConvertSubRegSize4(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto VectorLower = GetVReg(Op->VectorLower.ID()); @@ -3437,6 +3513,7 @@ DEF_OP(VSQXTUN) { const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -3455,6 +3532,7 @@ DEF_OP(VSQXTUN2) { const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto VectorLower = GetVReg(Op->VectorLower.ID()); @@ -3500,6 +3578,7 @@ DEF_OP(VSQXTUNPair) { const auto SubRegSize = ConvertSubRegSize4(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto VectorLower = GetVReg(Op->VectorLower.ID()); @@ -3542,6 +3621,8 @@ DEF_OP(VSRSHR) { const auto OpSize = IROp->Size; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); + const auto SubRegSize = ConvertSubRegSize16(IROp); const auto Dst = GetVReg(Node); @@ -3570,6 +3651,8 @@ DEF_OP(VSQSHL) { const auto OpSize = IROp->Size; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); + const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Dst = GetVReg(Node); @@ -3598,6 +3681,8 @@ DEF_OP(VMul) { const auto OpSize = IROp->Size; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); + const auto SubRegSize = ConvertSubRegSize16(IROp); const auto Dst = GetVReg(Node); @@ -3617,6 +3702,7 @@ DEF_OP(VUMull) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -3637,6 +3723,7 @@ DEF_OP(VSMull) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -3657,6 +3744,7 @@ DEF_OP(VUMull2) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -3677,6 +3765,7 @@ DEF_OP(VSMull2) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -3698,6 +3787,8 @@ DEF_OP(VUMulH) { const auto ElementSize = Op->Header.ElementSize; const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); + const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE; const auto Dst = GetVReg(Node); @@ -3747,6 +3838,8 @@ DEF_OP(VSMulH) { const auto ElementSize = Op->Header.ElementSize; const auto SubRegSize = ConvertSubRegSize8(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); + const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE; const auto Dst = GetVReg(Node); @@ -3795,6 +3888,7 @@ DEF_OP(VUABDL) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -3820,6 +3914,7 @@ DEF_OP(VUABDL2) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -3969,6 +4064,7 @@ DEF_OP(VRev32) { const auto ElementSize = Op->Header.ElementSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -4007,6 +4103,7 @@ DEF_OP(VRev64) { const auto ElementSize = Op->Header.ElementSize; const auto SubRegSize = ConvertSubRegSize4(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -4044,6 +4141,7 @@ DEF_OP(VFCADD) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -4090,6 +4188,7 @@ DEF_OP(VFMLA) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -4156,6 +4255,7 @@ DEF_OP(VFMLS) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -4245,6 +4345,7 @@ DEF_OP(VFNMLA) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); const auto Dst = GetVReg(Node); const auto Vector1 = GetVReg(Op->Vector1.ID()); @@ -4312,6 +4413,8 @@ DEF_OP(VFNMLS) { const auto SubRegSize = ConvertSubRegSize248(IROp); const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); + const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE; const auto Dst = GetVReg(Node);