From b19440c78fa5bf44a67cce4f30468892c9dd7a13 Mon Sep 17 00:00:00 2001
From: Ryan Houdek <Sonicadvance1@gmail.com>
Date: Sun, 25 Aug 2024 06:22:45 -0700
Subject: [PATCH] Arm64: Ensure 256-bit operations always assert without
 256-bit SVE

Our JIT will happily consume incorrectly formed 256-bit vector operations in a lot of cases when the host CPU doesn't support 256-bit SVE.
This is what caused the bug in #4006. For every vector operation that
can consume a 256-bit size, add an assert that always checks if 256-bit
SVE is supported in those cases.

This will ensure that #4006 doesn't happen again.
---
 .../Interface/Core/JIT/Arm64/ALUOps.cpp       |   2 +-
 .../Core/JIT/Arm64/ConversionOps.cpp          |  11 +
 .../Interface/Core/JIT/Arm64/MemoryOps.cpp    |  28 +-
 .../Interface/Core/JIT/Arm64/VectorOps.cpp    | 395 +++++++++++-------
 4 files changed, 273 insertions(+), 163 deletions(-)

diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp
index 69bcd95106..30268973c3 100644
--- a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp
+++ b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp
@@ -1491,6 +1491,7 @@ DEF_OP(VExtractToGPR) {
 
   const auto Offset = ElementSizeBits * Op->Index;
   const auto Is256Bit = Offset >= SSERegBitSize;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -1511,7 +1512,6 @@ DEF_OP(VExtractToGPR) {
     // when acting on larger register sizes.
     PerformMove(Vector, Op->Index);
   } else {
-    LOGMAN_THROW_AA_FMT(HostSupportsSVE256, "Host doesn't support SVE. Cannot perform 256-bit operation.");
     LOGMAN_THROW_AA_FMT(Is256Bit, "Can't perform 256-bit extraction with op side: {}", OpSize);
     LOGMAN_THROW_AA_FMT(Offset < AVXRegBitSize, "Trying to extract element outside bounds of register. Offset={}, Index={}", Offset, Op->Index);
 
diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/ConversionOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/ConversionOps.cpp
index b633ead4e5..f062cc6dcf 100644
--- a/FEXCore/Source/Interface/Core/JIT/Arm64/ConversionOps.cpp
+++ b/FEXCore/Source/Interface/Core/JIT/Arm64/ConversionOps.cpp
@@ -17,6 +17,7 @@ DEF_OP(VInsGPR) {
   const auto DestIdx = Op->DestIdx;
   const auto ElementSize = Op->Header.ElementSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto SubEmitSize = ConvertSubRegSize8(IROp);
   const auto ElementsPer128Bit = 16 / ElementSize;
@@ -112,6 +113,8 @@ DEF_OP(VDupFromGPR) {
   const auto Src = GetReg(Op->Src.ID());
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto SubEmitSize = ConvertSubRegSize8(IROp);
 
   if (HostSupportsSVE256 && Is256Bit) {
@@ -204,6 +207,7 @@ DEF_OP(Vector_SToF) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubEmitSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -236,6 +240,7 @@ DEF_OP(Vector_FToZS) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubEmitSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -266,6 +271,8 @@ DEF_OP(Vector_FToS) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto SubEmitSize = ConvertSubRegSize248(IROp);
 
   const auto Dst = GetVReg(Node);
@@ -295,6 +302,8 @@ DEF_OP(Vector_FToF) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubEmitSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto Conv = (ElementSize << 8) | Op->SrcElementSize;
 
   const auto Dst = GetVReg(Node);
@@ -396,6 +405,7 @@ DEF_OP(Vector_FToI) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubEmitSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -456,6 +466,7 @@ DEF_OP(Vector_F64ToI32) {
   const auto Round = Op->Round;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp
index 323b41d02e..3560ce7411 100644
--- a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp
+++ b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp
@@ -639,6 +639,7 @@ DEF_OP(LoadMem) {
     case 8: ldr(Dst.D(), MemSrc); break;
     case 16: ldr(Dst.Q(), MemSrc); break;
     case 32: {
+      LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
       const auto Operand = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
       ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), PRED_TMP_32B.Zeroing(), Operand);
       break;
@@ -747,6 +748,7 @@ DEF_OP(LoadMemTSO) {
     case 8: ldr(Dst.D(), MemSrc); break;
     case 16: ldr(Dst.Q(), MemSrc); break;
     case 32: {
+      LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
       const auto MemSrc = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
       ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), PRED_TMP_32B.Zeroing(), MemSrc);
       break;
@@ -766,9 +768,7 @@ DEF_OP(VLoadVectorMasked) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
-  if (Is256Bit) {
-    LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VLoadVectorMasked with 256-bit operation");
-  }
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
   const auto SubRegSize = ConvertSubRegSize8(IROp);
 
   const auto CMPPredicate = ARMEmitter::PReg::p0;
@@ -863,9 +863,7 @@ DEF_OP(VStoreVectorMasked) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
-  if (Is256Bit) {
-    LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VStoreVectorMasked with 256-bit operation");
-  }
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
   const auto SubRegSize = ConvertSubRegSize8(IROp);
 
   const auto CMPPredicate = ARMEmitter::PReg::p0;
@@ -1082,9 +1080,7 @@ DEF_OP(VLoadVectorGatherMasked) {
   ///  - AddrBase also doesn't need to exist
   ///     - If the instruction is using 64-bit vector indexing or 32-bit addresses where the top-bit isn't set then this is valid!
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
-  if (Is256Bit) {
-    LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VStoreVectorMasked with 256-bit operation");
-  }
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto IncomingDst = GetVReg(Op->Incoming.ID());
@@ -1310,6 +1306,7 @@ DEF_OP(VBroadcastFromMem) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
   const auto ElementSize = IROp->ElementSize;
 
   const auto Dst = GetVReg(Node);
@@ -1318,11 +1315,6 @@ DEF_OP(VBroadcastFromMem) {
   LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid element "
                                                                                                                          "size");
 
-  if (Is256Bit && !HostSupportsSVE256) {
-    LOGMAN_MSG_A_FMT("{}: 256-bit vectors must support SVE256", __func__);
-    return;
-  }
-
   if (Is256Bit && HostSupportsSVE256) {
     const auto GoverningPredicate = PRED_TMP_32B.Zeroing();
 
@@ -1511,6 +1503,7 @@ DEF_OP(StoreMem) {
       break;
     }
     case 32: {
+      LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
       const auto MemSrc = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
       st1b<ARMEmitter::SubRegSize::i8Bit>(Src.Z(), PRED_TMP_32B, MemSrc);
       break;
@@ -1608,6 +1601,7 @@ DEF_OP(StoreMemTSO) {
     case 8: str(Src.D(), MemSrc); break;
     case 16: str(Src.Q(), MemSrc); break;
     case 32: {
+      LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
       const auto Operand = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
       st1b<ARMEmitter::SubRegSize::i8Bit>(Src.Z(), PRED_TMP_32B, Operand);
       break;
@@ -2158,6 +2152,7 @@ DEF_OP(ParanoidLoadMemTSO) {
       ins(ARMEmitter::SubRegSize::i64Bit, Dst, 1, TMP2);
       break;
     case 32:
+      LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
       dmb(ARMEmitter::BarrierScope::ISH);
       ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), PRED_TMP_32B.Zeroing(), MemReg);
       dmb(ARMEmitter::BarrierScope::ISH);
@@ -2234,6 +2229,7 @@ DEF_OP(ParanoidStoreMemTSO) {
       break;
     }
     case 32: {
+      LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
       dmb(ARMEmitter::BarrierScope::ISH);
       st1b<ARMEmitter::SubRegSize::i8Bit>(Src.Z(), PRED_TMP_32B, MemReg, 0);
       dmb(ARMEmitter::BarrierScope::ISH);
@@ -2360,6 +2356,7 @@ DEF_OP(VStoreNonTemporal) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
   const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE;
 
   const auto Value = GetVReg(Op->Value.ID());
@@ -2367,7 +2364,6 @@ DEF_OP(VStoreNonTemporal) {
   const auto Offset = Op->Offset;
 
   if (Is256Bit) {
-    LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VStoreNonTemporal with 256-bit operation");
     const auto GoverningPredicate = PRED_TMP_32B.Zeroing();
     const auto OffsetScaled = Offset / 32;
     stnt1b(Value.Z(), GoverningPredicate, MemReg, OffsetScaled);
@@ -2402,6 +2398,7 @@ DEF_OP(VLoadNonTemporal) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
   const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE;
 
   const auto Dst = GetVReg(Node);
@@ -2409,7 +2406,6 @@ DEF_OP(VLoadNonTemporal) {
   const auto Offset = Op->Offset;
 
   if (Is256Bit) {
-    LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VStoreNonTemporal with 256-bit operation");
     const auto GoverningPredicate = PRED_TMP_32B.Zeroing();
     const auto OffsetScaled = Offset / 32;
     ldnt1b(Dst.Z(), GoverningPredicate, MemReg, OffsetScaled);
diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp
index 944d59818f..8e3d8d5229 100644
--- a/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp
+++ b/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp
@@ -13,162 +13,168 @@ tags: backend|arm64
 namespace FEXCore::CPU {
 #define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const* IROp, IR::NodeID Node)
 
-#define DEF_UNOP(FEXOp, ARMOp, ScalarCase)                            \
-  DEF_OP(FEXOp) {                                                     \
-    const auto Op = IROp->C<IR::IROp_##FEXOp>();                      \
-    const auto OpSize = IROp->Size;                                   \
-                                                                      \
-    const auto ElementSize = Op->Header.ElementSize;                  \
-    const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; \
-    const auto SubRegSize = ConvertSubRegSize8(IROp);                 \
-                                                                      \
-    const auto Dst = GetVReg(Node);                                   \
-    const auto Src = GetVReg(Op->Vector.ID());                        \
-                                                                      \
-    if (HostSupportsSVE256 && Is256Bit) {                             \
-      ARMOp(SubRegSize, Dst.Z(), PRED_TMP_32B.Merging(), Src.Z());    \
-    } else {                                                          \
-      if (ElementSize == OpSize && ScalarCase) {                      \
-        ARMOp(SubRegSize, Dst.D(), Src.D());                          \
-      } else {                                                        \
-        ARMOp(SubRegSize, Dst.Q(), Src.Q());                          \
-      }                                                               \
-    }                                                                 \
+#define DEF_UNOP(FEXOp, ARMOp, ScalarCase)                                                                                                        \
+  DEF_OP(FEXOp) {                                                                                                                                 \
+    const auto Op = IROp->C<IR::IROp_##FEXOp>();                                                                                                  \
+    const auto OpSize = IROp->Size;                                                                                                               \
+                                                                                                                                                  \
+    const auto ElementSize = Op->Header.ElementSize;                                                                                              \
+    const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;                                                                             \
+    LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); \
+    const auto SubRegSize = ConvertSubRegSize8(IROp);                                                                                             \
+                                                                                                                                                  \
+    const auto Dst = GetVReg(Node);                                                                                                               \
+    const auto Src = GetVReg(Op->Vector.ID());                                                                                                    \
+                                                                                                                                                  \
+    if (HostSupportsSVE256 && Is256Bit) {                                                                                                         \
+      ARMOp(SubRegSize, Dst.Z(), PRED_TMP_32B.Merging(), Src.Z());                                                                                \
+    } else {                                                                                                                                      \
+      if (ElementSize == OpSize && ScalarCase) {                                                                                                  \
+        ARMOp(SubRegSize, Dst.D(), Src.D());                                                                                                      \
+      } else {                                                                                                                                    \
+        ARMOp(SubRegSize, Dst.Q(), Src.Q());                                                                                                      \
+      }                                                                                                                                           \
+    }                                                                                                                                             \
   }
 
-#define DEF_BITOP(FEXOp, ARMOp)                                       \
-  DEF_OP(FEXOp) {                                                     \
-    const auto Op = IROp->C<IR::IROp_##FEXOp>();                      \
-    const auto OpSize = IROp->Size;                                   \
-    const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; \
-                                                                      \
-    const auto Dst = GetVReg(Node);                                   \
-    const auto Vector1 = GetVReg(Op->Vector1.ID());                   \
-    const auto Vector2 = GetVReg(Op->Vector2.ID());                   \
-                                                                      \
-    if (HostSupportsSVE256 && Is256Bit) {                             \
-      ARMOp(Dst.Z(), Vector1.Z(), Vector2.Z());                       \
-    } else {                                                          \
-      ARMOp(Dst.Q(), Vector1.Q(), Vector2.Q());                       \
-    }                                                                 \
+#define DEF_BITOP(FEXOp, ARMOp)                                                                                                                   \
+  DEF_OP(FEXOp) {                                                                                                                                 \
+    const auto Op = IROp->C<IR::IROp_##FEXOp>();                                                                                                  \
+    const auto OpSize = IROp->Size;                                                                                                               \
+    const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;                                                                             \
+    LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); \
+                                                                                                                                                  \
+    const auto Dst = GetVReg(Node);                                                                                                               \
+    const auto Vector1 = GetVReg(Op->Vector1.ID());                                                                                               \
+    const auto Vector2 = GetVReg(Op->Vector2.ID());                                                                                               \
+                                                                                                                                                  \
+    if (HostSupportsSVE256 && Is256Bit) {                                                                                                         \
+      ARMOp(Dst.Z(), Vector1.Z(), Vector2.Z());                                                                                                   \
+    } else {                                                                                                                                      \
+      ARMOp(Dst.Q(), Vector1.Q(), Vector2.Q());                                                                                                   \
+    }                                                                                                                                             \
   }
 
-#define DEF_BINOP(FEXOp, ARMOp)                                       \
-  DEF_OP(FEXOp) {                                                     \
-    const auto Op = IROp->C<IR::IROp_##FEXOp>();                      \
-    const auto OpSize = IROp->Size;                                   \
-                                                                      \
-    const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; \
-    const auto SubRegSize = ConvertSubRegSize8(IROp);                 \
-                                                                      \
-    const auto Dst = GetVReg(Node);                                   \
-    const auto Vector1 = GetVReg(Op->Vector1.ID());                   \
-    const auto Vector2 = GetVReg(Op->Vector2.ID());                   \
-                                                                      \
-    if (HostSupportsSVE256 && Is256Bit) {                             \
-      ARMOp(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z());           \
-    } else {                                                          \
-      ARMOp(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());           \
-    }                                                                 \
+#define DEF_BINOP(FEXOp, ARMOp)                                                                                                                   \
+  DEF_OP(FEXOp) {                                                                                                                                 \
+    const auto Op = IROp->C<IR::IROp_##FEXOp>();                                                                                                  \
+    const auto OpSize = IROp->Size;                                                                                                               \
+                                                                                                                                                  \
+    const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;                                                                             \
+    LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); \
+    const auto SubRegSize = ConvertSubRegSize8(IROp);                                                                                             \
+                                                                                                                                                  \
+    const auto Dst = GetVReg(Node);                                                                                                               \
+    const auto Vector1 = GetVReg(Op->Vector1.ID());                                                                                               \
+    const auto Vector2 = GetVReg(Op->Vector2.ID());                                                                                               \
+                                                                                                                                                  \
+    if (HostSupportsSVE256 && Is256Bit) {                                                                                                         \
+      ARMOp(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z());                                                                                       \
+    } else {                                                                                                                                      \
+      ARMOp(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());                                                                                       \
+    }                                                                                                                                             \
   }
 
-#define DEF_ZIPOP(FEXOp, ARMOp)                                       \
-  DEF_OP(FEXOp) {                                                     \
-    const auto Op = IROp->C<IR::IROp_##FEXOp>();                      \
-    const auto OpSize = IROp->Size;                                   \
-                                                                      \
-    const auto SubRegSize = ConvertSubRegSize8(IROp);                 \
-    const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; \
-                                                                      \
-    const auto Dst = GetVReg(Node);                                   \
-    const auto VectorLower = GetVReg(Op->VectorLower.ID());           \
-    const auto VectorUpper = GetVReg(Op->VectorUpper.ID());           \
-                                                                      \
-    if (HostSupportsSVE256 && Is256Bit) {                             \
-      ARMOp(SubRegSize, Dst.Z(), VectorLower.Z(), VectorUpper.Z());   \
-    } else {                                                          \
-      if (OpSize == 8) {                                              \
-        ARMOp(SubRegSize, Dst.D(), VectorLower.D(), VectorUpper.D()); \
-      } else {                                                        \
-        ARMOp(SubRegSize, Dst.Q(), VectorLower.Q(), VectorUpper.Q()); \
-      }                                                               \
-    }                                                                 \
+#define DEF_ZIPOP(FEXOp, ARMOp)                                                                                                                   \
+  DEF_OP(FEXOp) {                                                                                                                                 \
+    const auto Op = IROp->C<IR::IROp_##FEXOp>();                                                                                                  \
+    const auto OpSize = IROp->Size;                                                                                                               \
+                                                                                                                                                  \
+    const auto SubRegSize = ConvertSubRegSize8(IROp);                                                                                             \
+    const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;                                                                             \
+    LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); \
+                                                                                                                                                  \
+    const auto Dst = GetVReg(Node);                                                                                                               \
+    const auto VectorLower = GetVReg(Op->VectorLower.ID());                                                                                       \
+    const auto VectorUpper = GetVReg(Op->VectorUpper.ID());                                                                                       \
+                                                                                                                                                  \
+    if (HostSupportsSVE256 && Is256Bit) {                                                                                                         \
+      ARMOp(SubRegSize, Dst.Z(), VectorLower.Z(), VectorUpper.Z());                                                                               \
+    } else {                                                                                                                                      \
+      if (OpSize == 8) {                                                                                                                          \
+        ARMOp(SubRegSize, Dst.D(), VectorLower.D(), VectorUpper.D());                                                                             \
+      } else {                                                                                                                                    \
+        ARMOp(SubRegSize, Dst.Q(), VectorLower.Q(), VectorUpper.Q());                                                                             \
+      }                                                                                                                                           \
+    }                                                                                                                                             \
   }
 
-#define DEF_FUNOP(FEXOp, ARMOp)                                       \
-  DEF_OP(FEXOp) {                                                     \
-    const auto Op = IROp->C<IR::IROp_##FEXOp>();                      \
-    const auto OpSize = IROp->Size;                                   \
-                                                                      \
-    const auto ElementSize = Op->Header.ElementSize;                  \
-    const auto SubRegSize = ConvertSubRegSize248(IROp);               \
-    const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; \
-                                                                      \
-    const auto Dst = GetVReg(Node);                                   \
-    const auto Src = GetVReg(Op->Vector.ID());                        \
-                                                                      \
-    if (HostSupportsSVE256 && Is256Bit) {                             \
-      ARMOp(SubRegSize, Dst.Z(), PRED_TMP_32B.Merging(), Src.Z());    \
-    } else {                                                          \
-      if (ElementSize == OpSize) {                                    \
-        switch (ElementSize) {                                        \
-        case 2: {                                                     \
-          ARMOp(Dst.H(), Src.H());                                    \
-          break;                                                      \
-        }                                                             \
-        case 4: {                                                     \
-          ARMOp(Dst.S(), Src.S());                                    \
-          break;                                                      \
-        }                                                             \
-        case 8: {                                                     \
-          ARMOp(Dst.D(), Src.D());                                    \
-          break;                                                      \
-        }                                                             \
-        default: break;                                               \
-        }                                                             \
-      } else {                                                        \
-        ARMOp(SubRegSize, Dst.Q(), Src.Q());                          \
-      }                                                               \
-    }                                                                 \
+#define DEF_FUNOP(FEXOp, ARMOp)                                                                                                                   \
+  DEF_OP(FEXOp) {                                                                                                                                 \
+    const auto Op = IROp->C<IR::IROp_##FEXOp>();                                                                                                  \
+    const auto OpSize = IROp->Size;                                                                                                               \
+                                                                                                                                                  \
+    const auto ElementSize = Op->Header.ElementSize;                                                                                              \
+    const auto SubRegSize = ConvertSubRegSize248(IROp);                                                                                           \
+    const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;                                                                             \
+    LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); \
+                                                                                                                                                  \
+    const auto Dst = GetVReg(Node);                                                                                                               \
+    const auto Src = GetVReg(Op->Vector.ID());                                                                                                    \
+                                                                                                                                                  \
+    if (HostSupportsSVE256 && Is256Bit) {                                                                                                         \
+      ARMOp(SubRegSize, Dst.Z(), PRED_TMP_32B.Merging(), Src.Z());                                                                                \
+    } else {                                                                                                                                      \
+      if (ElementSize == OpSize) {                                                                                                                \
+        switch (ElementSize) {                                                                                                                    \
+        case 2: {                                                                                                                                 \
+          ARMOp(Dst.H(), Src.H());                                                                                                                \
+          break;                                                                                                                                  \
+        }                                                                                                                                         \
+        case 4: {                                                                                                                                 \
+          ARMOp(Dst.S(), Src.S());                                                                                                                \
+          break;                                                                                                                                  \
+        }                                                                                                                                         \
+        case 8: {                                                                                                                                 \
+          ARMOp(Dst.D(), Src.D());                                                                                                                \
+          break;                                                                                                                                  \
+        }                                                                                                                                         \
+        default: break;                                                                                                                           \
+        }                                                                                                                                         \
+      } else {                                                                                                                                    \
+        ARMOp(SubRegSize, Dst.Q(), Src.Q());                                                                                                      \
+      }                                                                                                                                           \
+    }                                                                                                                                             \
   }
 
-#define DEF_FBINOP(FEXOp, ARMOp)                                      \
-  DEF_OP(FEXOp) {                                                     \
-    const auto Op = IROp->C<IR::IROp_##FEXOp>();                      \
-    const auto OpSize = IROp->Size;                                   \
-                                                                      \
-    const auto ElementSize = Op->Header.ElementSize;                  \
-    const auto SubRegSize = ConvertSubRegSize248(IROp);               \
-    const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; \
-    const auto IsScalar = ElementSize == OpSize;                      \
-                                                                      \
-    const auto Dst = GetVReg(Node);                                   \
-    const auto Vector1 = GetVReg(Op->Vector1.ID());                   \
-    const auto Vector2 = GetVReg(Op->Vector2.ID());                   \
-                                                                      \
-    if (HostSupportsSVE256 && Is256Bit) {                             \
-      ARMOp(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z());           \
-    } else {                                                          \
-      if (IsScalar) {                                                 \
-        switch (ElementSize) {                                        \
-        case 2: {                                                     \
-          ARMOp(Dst.H(), Vector1.H(), Vector2.H());                   \
-          break;                                                      \
-        }                                                             \
-        case 4: {                                                     \
-          ARMOp(Dst.S(), Vector1.S(), Vector2.S());                   \
-          break;                                                      \
-        }                                                             \
-        case 8: {                                                     \
-          ARMOp(Dst.D(), Vector1.D(), Vector2.D());                   \
-          break;                                                      \
-        }                                                             \
-        default: break;                                               \
-        }                                                             \
-      } else {                                                        \
-        ARMOp(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());         \
-      }                                                               \
-    }                                                                 \
+#define DEF_FBINOP(FEXOp, ARMOp)                                                                                                                  \
+  DEF_OP(FEXOp) {                                                                                                                                 \
+    const auto Op = IROp->C<IR::IROp_##FEXOp>();                                                                                                  \
+    const auto OpSize = IROp->Size;                                                                                                               \
+                                                                                                                                                  \
+    const auto ElementSize = Op->Header.ElementSize;                                                                                              \
+    const auto SubRegSize = ConvertSubRegSize248(IROp);                                                                                           \
+    const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;                                                                             \
+    LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__); \
+    const auto IsScalar = ElementSize == OpSize;                                                                                                  \
+                                                                                                                                                  \
+    const auto Dst = GetVReg(Node);                                                                                                               \
+    const auto Vector1 = GetVReg(Op->Vector1.ID());                                                                                               \
+    const auto Vector2 = GetVReg(Op->Vector2.ID());                                                                                               \
+                                                                                                                                                  \
+    if (HostSupportsSVE256 && Is256Bit) {                                                                                                         \
+      ARMOp(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z());                                                                                       \
+    } else {                                                                                                                                      \
+      if (IsScalar) {                                                                                                                             \
+        switch (ElementSize) {                                                                                                                    \
+        case 2: {                                                                                                                                 \
+          ARMOp(Dst.H(), Vector1.H(), Vector2.H());                                                                                               \
+          break;                                                                                                                                  \
+        }                                                                                                                                         \
+        case 4: {                                                                                                                                 \
+          ARMOp(Dst.S(), Vector1.S(), Vector2.S());                                                                                               \
+          break;                                                                                                                                  \
+        }                                                                                                                                         \
+        case 8: {                                                                                                                                 \
+          ARMOp(Dst.D(), Vector1.D(), Vector2.D());                                                                                               \
+          break;                                                                                                                                  \
+        }                                                                                                                                         \
+        default: break;                                                                                                                           \
+        }                                                                                                                                         \
+      } else {                                                                                                                                    \
+        ARMOp(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());                                                                                     \
+      }                                                                                                                                           \
+    }                                                                                                                                             \
   }
 
 #define DEF_FBINOP_SCALAR_INSERT(FEXOp, ARMOp)                                                                                \
@@ -285,6 +291,7 @@ void Arm64JITCore::VFScalarFMAOperation(uint8_t OpSize, uint8_t ElementSize, Sca
 void Arm64JITCore::VFScalarOperation(uint8_t OpSize, uint8_t ElementSize, bool ZeroUpperBits, ScalarBinaryOpCaller ScalarEmit,
                                      ARMEmitter::VRegister Dst, ARMEmitter::VRegister Vector1, ARMEmitter::VRegister Vector2) {
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
   LOGMAN_THROW_A_FMT(Is256Bit || !ZeroUpperBits, "128-bit operation doesn't support ZeroUpperBits in {}", __func__);
 
   // Bit of a tricky detail.
@@ -357,6 +364,7 @@ void Arm64JITCore::VFScalarUnaryOperation(uint8_t OpSize, uint8_t ElementSize, b
                                           ARMEmitter::VRegister Dst, ARMEmitter::VRegister Vector1,
                                           std::variant<ARMEmitter::VRegister, ARMEmitter::Register> Vector2) {
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
   LOGMAN_THROW_A_FMT(Is256Bit || !ZeroUpperBits, "128-bit operation doesn't support ZeroUpperBits in {}", __func__);
 
   LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size");
@@ -647,6 +655,7 @@ DEF_OP(VSToFVectorInsert) {
 
   // Dealing with the odd case of this being actually a vector operation rather than scalar.
   const auto Is256Bit = IROp->Size == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
   constexpr auto Predicate = ARMEmitter::PReg::p0;
 
   ScalarEmit(VTMP1, Vector2);
@@ -746,6 +755,7 @@ DEF_OP(VFCMPScalarInsert) {
 
   const auto ZeroUpperBits = Op->ZeroUpperBits;
   const auto Is256Bit = IROp->Size == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   auto ScalarEmitEQ = [this, SubRegSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) {
     switch (SubRegSize.Scalar) {
@@ -903,6 +913,7 @@ DEF_OP(VectorImm) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubRegSize = ConvertSubRegSize8(IROp);
 
@@ -1057,6 +1068,7 @@ DEF_OP(VAddP) {
   const auto OpSize = IROp->Size;
   const auto IsScalar = OpSize == 8;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
   const auto SubRegSize = ConvertSubRegSize8(IROp);
 
   const auto Dst = GetVReg(Node);
@@ -1092,6 +1104,7 @@ DEF_OP(VFAddV) {
   const auto Op = IROp->C<IR::IROp_VAddV>();
   const auto OpSize = IROp->Size;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubRegSize = ConvertSubRegSizePair248(IROp);
@@ -1126,6 +1139,7 @@ DEF_OP(VAddV) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubRegSize = ConvertSubRegSizePair8(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -1159,6 +1173,8 @@ DEF_OP(VUMinV) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto SubRegSize = ConvertSubRegSize8(IROp);
 
   const auto Dst = GetVReg(Node);
@@ -1178,6 +1194,8 @@ DEF_OP(VUMaxV) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto SubRegSize = ConvertSubRegSize8(IROp);
 
   const auto Dst = GetVReg(Node);
@@ -1198,6 +1216,7 @@ DEF_OP(VURAvg) {
 
   const auto SubRegSize = ConvertSubRegSize8(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -1229,6 +1248,8 @@ DEF_OP(VFAddP) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto SubRegSize = ConvertSubRegSize248(IROp);
 
   const auto Dst = GetVReg(Node);
@@ -1264,6 +1285,7 @@ DEF_OP(VFDiv) {
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto IsScalar = ElementSize == OpSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -1319,6 +1341,7 @@ DEF_OP(VFMin) {
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto IsScalar = ElementSize == OpSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -1409,6 +1432,7 @@ DEF_OP(VFMax) {
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto IsScalar = ElementSize == OpSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -1485,6 +1509,7 @@ DEF_OP(VFRecp) {
   const auto SubRegSize = ConvertSubRegSizePair16(IROp);
   const auto IsScalar = Op->Header.ElementSize == OpSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -1550,6 +1575,7 @@ DEF_OP(VFRSqrt) {
   const auto SubRegSize = ConvertSubRegSizePair16(IROp);
   const auto IsScalar = ElementSize == OpSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -1614,6 +1640,7 @@ DEF_OP(VNot) {
   const auto Op = IROp->C<IR::IROp_VNot>();
   const auto OpSize = IROp->Size;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -1632,6 +1659,7 @@ DEF_OP(VUMin) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubRegSize = ConvertSubRegSize16(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -1680,6 +1708,7 @@ DEF_OP(VSMin) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubRegSize = ConvertSubRegSize16(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -1728,6 +1757,7 @@ DEF_OP(VUMax) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubRegSize = ConvertSubRegSize16(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -1776,6 +1806,7 @@ DEF_OP(VSMax) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubRegSize = ConvertSubRegSize16(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -1821,6 +1852,8 @@ DEF_OP(VBSL) {
   const auto Op = IROp->C<IR::IROp_VBSL>();
   const auto OpSize = IROp->Size;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE;
 
   const auto Dst = GetVReg(Node);
@@ -1890,6 +1923,7 @@ DEF_OP(VCMPEQ) {
   const auto SubRegSize = ConvertSubRegSizePair16(IROp);
   const auto IsScalar = ElementSize == OpSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -1929,6 +1963,7 @@ DEF_OP(VCMPEQZ) {
   const auto SubRegSize = ConvertSubRegSizePair16(IROp);
   const auto IsScalar = ElementSize == OpSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -1970,6 +2005,7 @@ DEF_OP(VCMPGT) {
   const auto SubRegSize = ConvertSubRegSizePair16(IROp);
   const auto IsScalar = ElementSize == OpSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -2009,6 +2045,7 @@ DEF_OP(VCMPGTZ) {
   const auto SubRegSize = ConvertSubRegSizePair16(IROp);
   const auto IsScalar = ElementSize == OpSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -2047,6 +2084,7 @@ DEF_OP(VCMPLTZ) {
   const auto SubRegSize = ConvertSubRegSizePair16(IROp);
   const auto IsScalar = ElementSize == OpSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -2085,6 +2123,7 @@ DEF_OP(VFCMPEQ) {
   const auto SubRegSize = ConvertSubRegSizePair248(IROp);
   const auto IsScalar = ElementSize == OpSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -2123,6 +2162,7 @@ DEF_OP(VFCMPNEQ) {
   const auto SubRegSize = ConvertSubRegSizePair248(IROp);
   const auto IsScalar = ElementSize == OpSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -2163,6 +2203,7 @@ DEF_OP(VFCMPLT) {
   const auto SubRegSize = ConvertSubRegSizePair248(IROp);
   const auto IsScalar = ElementSize == OpSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -2201,6 +2242,7 @@ DEF_OP(VFCMPGT) {
   const auto SubRegSize = ConvertSubRegSizePair248(IROp);
   const auto IsScalar = ElementSize == OpSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -2239,6 +2281,7 @@ DEF_OP(VFCMPLE) {
   const auto SubRegSize = ConvertSubRegSizePair248(IROp);
   const auto IsScalar = ElementSize == OpSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -2277,6 +2320,7 @@ DEF_OP(VFCMPORD) {
   const auto SubRegSize = ConvertSubRegSizePair248(IROp);
   const auto IsScalar = ElementSize == OpSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -2327,6 +2371,7 @@ DEF_OP(VFCMPUNO) {
   const auto SubRegSize = ConvertSubRegSizePair248(IROp);
   const auto IsScalar = ElementSize == OpSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -2375,6 +2420,8 @@ DEF_OP(VUShl) {
   const auto ElementSize = IROp->ElementSize;
   const auto SubRegSize = ConvertSubRegSize8(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto MaxShift = ElementSize * 8;
 
   const auto Dst = GetVReg(Node);
@@ -2429,6 +2476,8 @@ DEF_OP(VUShr) {
   const auto ElementSize = IROp->ElementSize;
   const auto SubRegSize = ConvertSubRegSize8(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto MaxShift = ElementSize * 8;
 
   const auto Dst = GetVReg(Node);
@@ -2486,6 +2535,8 @@ DEF_OP(VSShr) {
   const auto ElementSize = IROp->ElementSize;
   const auto SubRegSize = ConvertSubRegSize8(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto MaxShift = (ElementSize * 8) - 1;
   const auto RangeCheck = Op->RangeCheck;
 
@@ -2542,6 +2593,7 @@ DEF_OP(VUShlS) {
 
   const auto SubRegSize = ConvertSubRegSize16(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto ShiftScalar = GetVReg(Op->ShiftScalar.ID());
@@ -2570,6 +2622,7 @@ DEF_OP(VUShrS) {
 
   const auto SubRegSize = ConvertSubRegSize16(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto ShiftScalar = GetVReg(Op->ShiftScalar.ID());
@@ -2600,6 +2653,7 @@ DEF_OP(VUShrSWide) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubRegSize = ConvertSubRegSize8(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto ShiftScalar = GetVReg(Op->ShiftScalar.ID());
@@ -2665,6 +2719,7 @@ DEF_OP(VSShrSWide) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubRegSize = ConvertSubRegSize8(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto ShiftScalar = GetVReg(Op->ShiftScalar.ID());
@@ -2730,6 +2785,7 @@ DEF_OP(VUShlSWide) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubRegSize = ConvertSubRegSize8(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto ShiftScalar = GetVReg(Op->ShiftScalar.ID());
@@ -2793,6 +2849,7 @@ DEF_OP(VSShrS) {
 
   const auto SubRegSize = ConvertSubRegSize16(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto ShiftScalar = GetVReg(Op->ShiftScalar.ID());
@@ -2820,6 +2877,7 @@ DEF_OP(VInsElement) {
   const auto Op = IROp->C<IR::IROp_VInsElement>();
   const auto OpSize = IROp->Size;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const uint32_t ElementSize = Op->Header.ElementSize;
   const auto SubRegSize = ConvertSubRegSize16(IROp);
@@ -2902,6 +2960,8 @@ DEF_OP(VDupElement) {
   const auto Index = Op->Index;
   const auto SubRegSize = ConvertSubRegSize16(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE;
 
   const auto Dst = GetVReg(Node);
@@ -2922,6 +2982,7 @@ DEF_OP(VExtr) {
   const auto Op = IROp->C<IR::IROp_VExtr>();
   const auto OpSize = IROp->Size;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   // AArch64 ext op has bit arrangement as [Vm:Vn] so arguments need to be swapped
   const auto Dst = GetVReg(Node);
@@ -2973,6 +3034,7 @@ DEF_OP(VUShrI) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubRegSize = ConvertSubRegSize8(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -3014,6 +3076,7 @@ DEF_OP(VUShraI) {
   const auto BitShift = Op->BitShift;
   const auto SubRegSize = ConvertSubRegSize8(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto DestVector = GetVReg(Op->DestVector.ID());
@@ -3056,6 +3119,7 @@ DEF_OP(VSShrI) {
   const auto SubRegSize = ConvertSubRegSize8(IROp);
   const auto Shift = std::min(uint8_t(ElementSize * 8 - 1), Op->BitShift);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -3094,6 +3158,7 @@ DEF_OP(VShlI) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubRegSize = ConvertSubRegSize8(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -3135,6 +3200,7 @@ DEF_OP(VUShrNI) {
   const auto BitShift = Op->BitShift;
   const auto SubRegSize = ConvertSubRegSize4(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -3154,6 +3220,7 @@ DEF_OP(VUShrNI2) {
   const auto BitShift = Op->BitShift;
   const auto SubRegSize = ConvertSubRegSize8(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto VectorLower = GetVReg(Op->VectorLower.ID());
@@ -3190,6 +3257,7 @@ DEF_OP(VSXTL) {
 
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -3207,6 +3275,7 @@ DEF_OP(VSXTL2) {
 
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -3224,6 +3293,7 @@ DEF_OP(VSSHLL) {
 
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -3245,6 +3315,7 @@ DEF_OP(VSSHLL2) {
 
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -3266,6 +3337,7 @@ DEF_OP(VUXTL) {
 
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -3283,6 +3355,7 @@ DEF_OP(VUXTL2) {
 
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -3300,6 +3373,7 @@ DEF_OP(VSQXTN) {
 
   const auto SubRegSize = ConvertSubRegSize4(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -3351,6 +3425,7 @@ DEF_OP(VSQXTN2) {
 
   const auto SubRegSize = ConvertSubRegSize4(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto VectorLower = GetVReg(Op->VectorLower.ID());
@@ -3394,6 +3469,7 @@ DEF_OP(VSQXTNPair) {
 
   const auto SubRegSize = ConvertSubRegSize4(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto VectorLower = GetVReg(Op->VectorLower.ID());
@@ -3437,6 +3513,7 @@ DEF_OP(VSQXTUN) {
 
   const auto SubRegSize = ConvertSubRegSize8(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -3455,6 +3532,7 @@ DEF_OP(VSQXTUN2) {
 
   const auto SubRegSize = ConvertSubRegSize8(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto VectorLower = GetVReg(Op->VectorLower.ID());
@@ -3500,6 +3578,7 @@ DEF_OP(VSQXTUNPair) {
 
   const auto SubRegSize = ConvertSubRegSize4(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto VectorLower = GetVReg(Op->VectorLower.ID());
@@ -3542,6 +3621,8 @@ DEF_OP(VSRSHR) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto SubRegSize = ConvertSubRegSize16(IROp);
 
   const auto Dst = GetVReg(Node);
@@ -3570,6 +3651,8 @@ DEF_OP(VSQSHL) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto SubRegSize = ConvertSubRegSize8(IROp);
 
   const auto Dst = GetVReg(Node);
@@ -3598,6 +3681,8 @@ DEF_OP(VMul) {
   const auto OpSize = IROp->Size;
 
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto SubRegSize = ConvertSubRegSize16(IROp);
 
   const auto Dst = GetVReg(Node);
@@ -3617,6 +3702,7 @@ DEF_OP(VUMull) {
 
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -3637,6 +3723,7 @@ DEF_OP(VSMull) {
 
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -3657,6 +3744,7 @@ DEF_OP(VUMull2) {
 
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -3677,6 +3765,7 @@ DEF_OP(VSMull2) {
 
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -3698,6 +3787,8 @@ DEF_OP(VUMulH) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubRegSize = ConvertSubRegSize8(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE;
 
   const auto Dst = GetVReg(Node);
@@ -3747,6 +3838,8 @@ DEF_OP(VSMulH) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubRegSize = ConvertSubRegSize8(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE;
 
   const auto Dst = GetVReg(Node);
@@ -3795,6 +3888,7 @@ DEF_OP(VUABDL) {
 
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -3820,6 +3914,7 @@ DEF_OP(VUABDL2) {
 
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -3969,6 +4064,7 @@ DEF_OP(VRev32) {
 
   const auto ElementSize = Op->Header.ElementSize;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -4007,6 +4103,7 @@ DEF_OP(VRev64) {
   const auto ElementSize = Op->Header.ElementSize;
   const auto SubRegSize = ConvertSubRegSize4(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector = GetVReg(Op->Vector.ID());
@@ -4044,6 +4141,7 @@ DEF_OP(VFCADD) {
 
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -4090,6 +4188,7 @@ DEF_OP(VFMLA) {
 
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -4156,6 +4255,7 @@ DEF_OP(VFMLS) {
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE;
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -4245,6 +4345,7 @@ DEF_OP(VFNMLA) {
 
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
 
   const auto Dst = GetVReg(Node);
   const auto Vector1 = GetVReg(Op->Vector1.ID());
@@ -4312,6 +4413,8 @@ DEF_OP(VFNMLS) {
 
   const auto SubRegSize = ConvertSubRegSize248(IROp);
   const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
+
   const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE;
 
   const auto Dst = GetVReg(Node);