Merge pull request #3746 from Sonicadvance1/avx_13

AVX128: More instructions
FEX-Emu · Jun 24, 2024 · ddb9f6d · ddb9f6d
2 parents b2db04f + d29139d
commit ddb9f6d
Show file tree

Hide file tree

Showing 5 changed files with 380 additions and 64 deletions.
diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/ConversionOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/ConversionOps.cpp
@@ -355,6 +355,24 @@ DEF_OP(Vector_FToF) {
   }
 }
 
+DEF_OP(Vector_FToF2) {
+  const auto Op = IROp->C<IR::IROp_Vector_FToF2>();
+
+  const auto ElementSize = Op->Header.ElementSize;
+  const auto SubEmitSize = ConvertSubRegSize248(IROp);
+
+  const auto Dst = GetVReg(Node);
+  const auto Vector = GetVReg(Op->Vector.ID());
+
+  if (ElementSize > Op->SrcElementSize) {
+    LOGMAN_THROW_AA_FMT(Op->SrcElementSize == (ElementSize >> 1), "IR invariant");
+    fcvtl2(SubEmitSize, Dst.D(), Vector.D());
+  } else {
+    LOGMAN_THROW_AA_FMT(Op->SrcElementSize == (ElementSize << 1), "IR invariant");
+    fcvtn2(SubEmitSize, Dst.D(), Vector.D());
+  }
+}
+
 DEF_OP(Vector_FToI) {
   const auto Op = IROp->C<IR::IROp_Vector_FToI>();
   const auto OpSize = IROp->Size;

diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h
@@ -981,6 +981,14 @@ class OpDispatchBuilder final : public IREmitter {
   struct RefPair {
     Ref Low, High;
   };
+
+  RefPair AVX128_Zext(Ref R) {
+    RefPair Pair;
+    Pair.Low = R;
+    Pair.High = LoadZeroVector(OpSize::i128Bit);
+    return Pair;
+  }
+
   RefPair AVX128_LoadSource_WithOpSize(const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, uint32_t Flags,
                                        bool NeedsHigh, MemoryAccessType AccessType = MemoryAccessType::DEFAULT);
 
@@ -1078,6 +1086,40 @@ class OpDispatchBuilder final : public IREmitter {
   void AVX128_VPSRLDQ(OpcodeArgs);
   void AVX128_VPSLLDQ(OpcodeArgs);
 
+  void AVX128_VINSERT(OpcodeArgs);
+  void AVX128_VINSERTPS(OpcodeArgs);
+
+  Ref AVX128_PHSUBImpl(Ref Src1, Ref Src2, size_t ElementSize);
+  template<size_t ElementSize>
+  void AVX128_VPHSUB(OpcodeArgs);
+
+  void AVX128_VPHSUBSW(OpcodeArgs);
+
+  template<size_t ElementSize>
+  void AVX128_VADDSUBP(OpcodeArgs);
+
+  template<size_t ElementSize, bool Signed>
+  void AVX128_VPMULL(OpcodeArgs);
+
+  void AVX128_VPMULHRSW(OpcodeArgs);
+
+  template<bool Signed>
+  void AVX128_VPMULHW(OpcodeArgs);
+
+  template<size_t DstElementSize, size_t SrcElementSize>
+  void AVX128_InsertScalar_CVT_Float_To_Float(OpcodeArgs);
+
+  template<size_t DstElementSize, size_t SrcElementSize>
+  void AVX128_Vector_CVT_Float_To_Float(OpcodeArgs);
+
+  template<size_t SrcElementSize, bool Narrow, bool HostRoundingMode>
+  void AVX128_Vector_CVT_Float_To_Int(OpcodeArgs);
+
+  template<size_t SrcElementSize, bool Widen>
+  void AVX128_Vector_CVT_Int_To_Float(OpcodeArgs);
+
+  void AVX128_VEXTRACT128(OpcodeArgs);
+
   // End of AVX 128-bit implementation
 
   void InvalidOp(OpcodeArgs);
@@ -1164,7 +1206,7 @@ class OpDispatchBuilder final : public IREmitter {
 
   // Opcode helpers for generalizing behavior across VEX and non-VEX variants.
 
-  Ref ADDSUBPOpImpl(OpcodeArgs, size_t ElementSize, Ref Src1, Ref Src2);
+  Ref ADDSUBPOpImpl(OpSize Size, size_t ElementSize, Ref Src1, Ref Src2);
 
   void AVXVectorALUOpImpl(OpcodeArgs, IROps IROp, size_t ElementSize);
   void AVXVectorUnaryOpImpl(OpcodeArgs, IROps IROp, size_t ElementSize);
@@ -1201,9 +1243,9 @@ class OpDispatchBuilder final : public IREmitter {
 
   Ref PHMINPOSUWOpImpl(OpcodeArgs);
 
-  Ref PHSUBOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2, size_t ElementSize);
+  Ref PHSUBOpImpl(OpSize Size, Ref Src1, Ref Src2, size_t ElementSize);
 
-  Ref PHSUBSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op);
+  Ref PHSUBSOpImpl(OpSize Size, Ref Src1, Ref Src2);
 
   Ref PINSROpImpl(OpcodeArgs, size_t ElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op,
                   const X86Tables::DecodedOperand& Imm);
@@ -1212,11 +1254,11 @@ class OpDispatchBuilder final : public IREmitter {
 
   Ref PMADDUBSWOpImpl(size_t Size, Ref Src1, Ref Src2);
 
-  Ref PMULHRSWOpImpl(OpcodeArgs, Ref Src1, Ref Src2);
+  Ref PMULHRSWOpImpl(OpSize Size, Ref Src1, Ref Src2);
 
   Ref PMULHWOpImpl(OpcodeArgs, bool Signed, Ref Src1, Ref Src2);
 
-  Ref PMULLOpImpl(OpcodeArgs, size_t ElementSize, bool Signed, Ref Src1, Ref Src2);
+  Ref PMULLOpImpl(OpSize Size, size_t ElementSize, bool Signed, Ref Src1, Ref Src2);
 
   Ref PSADBWOpImpl(size_t Size, Ref Src1, Ref Src2);