Skip to content

Commit

Permalink
Arm64: Ensure 256-bit operations always assert without 256-bit SVE
Browse files Browse the repository at this point in the history
Our JIT will happily consume incorrectly formed 256-bit vector operations in a lot of cases when the host CPU doesn't support 256-bit SVE.
This is what caused the bug in #4006. For every vector operation that
can consume a 256-bit size, add an assert that always checks if 256-bit
SVE is supported in those cases.

This will ensure that #4006 doesn't happen again.
  • Loading branch information
Sonicadvance1 committed Aug 26, 2024
1 parent f897579 commit b19440c
Show file tree
Hide file tree
Showing 4 changed files with 273 additions and 163 deletions.
2 changes: 1 addition & 1 deletion FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1491,6 +1491,7 @@ DEF_OP(VExtractToGPR) {

const auto Offset = ElementSizeBits * Op->Index;
const auto Is256Bit = Offset >= SSERegBitSize;
LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);

const auto Dst = GetReg(Node);
const auto Vector = GetVReg(Op->Vector.ID());
Expand All @@ -1511,7 +1512,6 @@ DEF_OP(VExtractToGPR) {
// when acting on larger register sizes.
PerformMove(Vector, Op->Index);
} else {
LOGMAN_THROW_AA_FMT(HostSupportsSVE256, "Host doesn't support SVE. Cannot perform 256-bit operation.");
LOGMAN_THROW_AA_FMT(Is256Bit, "Can't perform 256-bit extraction with op side: {}", OpSize);
LOGMAN_THROW_AA_FMT(Offset < AVXRegBitSize, "Trying to extract element outside bounds of register. Offset={}, Index={}", Offset, Op->Index);

Expand Down
11 changes: 11 additions & 0 deletions FEXCore/Source/Interface/Core/JIT/Arm64/ConversionOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ DEF_OP(VInsGPR) {
const auto DestIdx = Op->DestIdx;
const auto ElementSize = Op->Header.ElementSize;
const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);

const auto SubEmitSize = ConvertSubRegSize8(IROp);
const auto ElementsPer128Bit = 16 / ElementSize;
Expand Down Expand Up @@ -112,6 +113,8 @@ DEF_OP(VDupFromGPR) {
const auto Src = GetReg(Op->Src.ID());

const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);

const auto SubEmitSize = ConvertSubRegSize8(IROp);

if (HostSupportsSVE256 && Is256Bit) {
Expand Down Expand Up @@ -204,6 +207,7 @@ DEF_OP(Vector_SToF) {
const auto ElementSize = Op->Header.ElementSize;
const auto SubEmitSize = ConvertSubRegSize248(IROp);
const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);

const auto Dst = GetVReg(Node);
const auto Vector = GetVReg(Op->Vector.ID());
Expand Down Expand Up @@ -236,6 +240,7 @@ DEF_OP(Vector_FToZS) {
const auto ElementSize = Op->Header.ElementSize;
const auto SubEmitSize = ConvertSubRegSize248(IROp);
const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);

const auto Dst = GetVReg(Node);
const auto Vector = GetVReg(Op->Vector.ID());
Expand Down Expand Up @@ -266,6 +271,8 @@ DEF_OP(Vector_FToS) {
const auto OpSize = IROp->Size;

const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);

const auto SubEmitSize = ConvertSubRegSize248(IROp);

const auto Dst = GetVReg(Node);
Expand Down Expand Up @@ -295,6 +302,8 @@ DEF_OP(Vector_FToF) {
const auto ElementSize = Op->Header.ElementSize;
const auto SubEmitSize = ConvertSubRegSize248(IROp);
const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);

const auto Conv = (ElementSize << 8) | Op->SrcElementSize;

const auto Dst = GetVReg(Node);
Expand Down Expand Up @@ -396,6 +405,7 @@ DEF_OP(Vector_FToI) {
const auto ElementSize = Op->Header.ElementSize;
const auto SubEmitSize = ConvertSubRegSize248(IROp);
const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);

const auto Dst = GetVReg(Node);
const auto Vector = GetVReg(Op->Vector.ID());
Expand Down Expand Up @@ -456,6 +466,7 @@ DEF_OP(Vector_F64ToI32) {
const auto Round = Op->Round;

const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);

const auto Dst = GetVReg(Node);
const auto Vector = GetVReg(Op->Vector.ID());
Expand Down
28 changes: 12 additions & 16 deletions FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,7 @@ DEF_OP(LoadMem) {
case 8: ldr(Dst.D(), MemSrc); break;
case 16: ldr(Dst.Q(), MemSrc); break;
case 32: {
LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
const auto Operand = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), PRED_TMP_32B.Zeroing(), Operand);
break;
Expand Down Expand Up @@ -747,6 +748,7 @@ DEF_OP(LoadMemTSO) {
case 8: ldr(Dst.D(), MemSrc); break;
case 16: ldr(Dst.Q(), MemSrc); break;
case 32: {
LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
const auto MemSrc = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), PRED_TMP_32B.Zeroing(), MemSrc);
break;
Expand All @@ -766,9 +768,7 @@ DEF_OP(VLoadVectorMasked) {
const auto OpSize = IROp->Size;

const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
if (Is256Bit) {
LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VLoadVectorMasked with 256-bit operation");
}
LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
const auto SubRegSize = ConvertSubRegSize8(IROp);

const auto CMPPredicate = ARMEmitter::PReg::p0;
Expand Down Expand Up @@ -863,9 +863,7 @@ DEF_OP(VStoreVectorMasked) {
const auto OpSize = IROp->Size;

const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
if (Is256Bit) {
LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VStoreVectorMasked with 256-bit operation");
}
LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
const auto SubRegSize = ConvertSubRegSize8(IROp);

const auto CMPPredicate = ARMEmitter::PReg::p0;
Expand Down Expand Up @@ -1082,9 +1080,7 @@ DEF_OP(VLoadVectorGatherMasked) {
/// - AddrBase also doesn't need to exist
/// - If the instruction is using 64-bit vector indexing or 32-bit addresses where the top-bit isn't set then this is valid!
const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
if (Is256Bit) {
LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VStoreVectorMasked with 256-bit operation");
}
LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);

const auto Dst = GetVReg(Node);
const auto IncomingDst = GetVReg(Op->Incoming.ID());
Expand Down Expand Up @@ -1310,6 +1306,7 @@ DEF_OP(VBroadcastFromMem) {
const auto OpSize = IROp->Size;

const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
const auto ElementSize = IROp->ElementSize;

const auto Dst = GetVReg(Node);
Expand All @@ -1318,11 +1315,6 @@ DEF_OP(VBroadcastFromMem) {
LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid element "
"size");

if (Is256Bit && !HostSupportsSVE256) {
LOGMAN_MSG_A_FMT("{}: 256-bit vectors must support SVE256", __func__);
return;
}

if (Is256Bit && HostSupportsSVE256) {
const auto GoverningPredicate = PRED_TMP_32B.Zeroing();

Expand Down Expand Up @@ -1511,6 +1503,7 @@ DEF_OP(StoreMem) {
break;
}
case 32: {
LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
const auto MemSrc = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
st1b<ARMEmitter::SubRegSize::i8Bit>(Src.Z(), PRED_TMP_32B, MemSrc);
break;
Expand Down Expand Up @@ -1608,6 +1601,7 @@ DEF_OP(StoreMemTSO) {
case 8: str(Src.D(), MemSrc); break;
case 16: str(Src.Q(), MemSrc); break;
case 32: {
LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
const auto Operand = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
st1b<ARMEmitter::SubRegSize::i8Bit>(Src.Z(), PRED_TMP_32B, Operand);
break;
Expand Down Expand Up @@ -2158,6 +2152,7 @@ DEF_OP(ParanoidLoadMemTSO) {
ins(ARMEmitter::SubRegSize::i64Bit, Dst, 1, TMP2);
break;
case 32:
LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
dmb(ARMEmitter::BarrierScope::ISH);
ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), PRED_TMP_32B.Zeroing(), MemReg);
dmb(ARMEmitter::BarrierScope::ISH);
Expand Down Expand Up @@ -2234,6 +2229,7 @@ DEF_OP(ParanoidStoreMemTSO) {
break;
}
case 32: {
LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
dmb(ARMEmitter::BarrierScope::ISH);
st1b<ARMEmitter::SubRegSize::i8Bit>(Src.Z(), PRED_TMP_32B, MemReg, 0);
dmb(ARMEmitter::BarrierScope::ISH);
Expand Down Expand Up @@ -2360,14 +2356,14 @@ DEF_OP(VStoreNonTemporal) {
const auto OpSize = IROp->Size;

const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE;

const auto Value = GetVReg(Op->Value.ID());
const auto MemReg = GetReg(Op->Addr.ID());
const auto Offset = Op->Offset;

if (Is256Bit) {
LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VStoreNonTemporal with 256-bit operation");
const auto GoverningPredicate = PRED_TMP_32B.Zeroing();
const auto OffsetScaled = Offset / 32;
stnt1b(Value.Z(), GoverningPredicate, MemReg, OffsetScaled);
Expand Down Expand Up @@ -2402,14 +2398,14 @@ DEF_OP(VLoadNonTemporal) {
const auto OpSize = IROp->Size;

const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);
const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE;

const auto Dst = GetVReg(Node);
const auto MemReg = GetReg(Op->Addr.ID());
const auto Offset = Op->Offset;

if (Is256Bit) {
LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VStoreNonTemporal with 256-bit operation");
const auto GoverningPredicate = PRED_TMP_32B.Zeroing();
const auto OffsetScaled = Offset / 32;
ldnt1b(Dst.Z(), GoverningPredicate, MemReg, OffsetScaled);
Expand Down
Loading

0 comments on commit b19440c

Please sign in to comment.