Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Helper files for Arm64 SVE design #99349

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,951 changes: 2,951 additions & 0 deletions sve_api/out_GenerateHWIntrinsicTests_Arm.cs

Large diffs are not rendered by default.

554 changes: 554 additions & 0 deletions sve_api/out_api/apiraw_FEAT_BF16__.cs

Large diffs are not rendered by default.

29 changes: 29 additions & 0 deletions sve_api/out_api/apiraw_FEAT_F32MM__.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
namespace System.Runtime.Intrinsics.Arm;

/// VectorT Summary
public abstract partial class SveF32mm : AdvSimd /// Feature: FEAT_F32MM
{

public static unsafe Vector<float> MatrixMultiplyAccumulate(Vector<float> op1, Vector<float> op2, Vector<float> op3); // FMMLA // MOVPRFX

/// total method signatures: 1

}


/// Full API
public abstract partial class SveF32mm : AdvSimd /// Feature: FEAT_F32MM
{
/// MatrixMultiplyAccumulate : Matrix multiply-accumulate

/// svfloat32_t svmmla[_f32](svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) : "FMMLA Ztied1.S, Zop2.S, Zop3.S" or "MOVPRFX Zresult, Zop1; FMMLA Zresult.S, Zop2.S, Zop3.S"
public static unsafe Vector<float> MatrixMultiplyAccumulate(Vector<float> op1, Vector<float> op2, Vector<float> op3);


/// total method signatures: 1
/// total method names: 1
}


/// Total ACLE covered across API: 1

281 changes: 281 additions & 0 deletions sve_api/out_api/apiraw_FEAT_F64MM__.cs

Large diffs are not rendered by default.

1,321 changes: 1,321 additions & 0 deletions sve_api/out_api/apiraw_FEAT_FP16__.cs

Large diffs are not rendered by default.

80 changes: 80 additions & 0 deletions sve_api/out_api/apiraw_FEAT_I8MM__.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
namespace System.Runtime.Intrinsics.Arm;

/// VectorT Summary
public abstract partial class SveI8mm : AdvSimd /// Feature: FEAT_I8MM
{

public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, Vector<byte> op3); // USDOT // MOVPRFX

public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, Vector<byte> op3, ulong imm_index); // SUDOT // MOVPRFX

public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3); // USDOT // MOVPRFX

public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3, ulong imm_index); // USDOT // MOVPRFX

/// T: [int, sbyte], [uint, byte]
public static unsafe Vector<T> MatrixMultiplyAccumulate(Vector<T> op1, Vector<T2> op2, Vector<T2> op3); // SMMLA or UMMLA // MOVPRFX

public static unsafe Vector<int> MatrixMultiplyAccumulateUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3); // USMMLA // MOVPRFX

/// total method signatures: 6


/// Optional Entries:

public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, byte op3); // USDOT // MOVPRFX

public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, sbyte op3); // USDOT // MOVPRFX

/// total optional method signatures: 2

}


/// Full API
public abstract partial class SveI8mm : AdvSimd /// Feature: FEAT_I8MM
{
/// DotProductSignedUnsigned : Dot product (signed × unsigned)

/// svint32_t svsudot[_s32](svint32_t op1, svint8_t op2, svuint8_t op3) : "USDOT Ztied1.S, Zop3.B, Zop2.B" or "MOVPRFX Zresult, Zop1; USDOT Zresult.S, Zop3.B, Zop2.B"
public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, Vector<byte> op3);

/// svint32_t svsudot_lane[_s32](svint32_t op1, svint8_t op2, svuint8_t op3, uint64_t imm_index) : "SUDOT Ztied1.S, Zop2.B, Zop3.B[imm_index]" or "MOVPRFX Zresult, Zop1; SUDOT Zresult.S, Zop2.B, Zop3.B[imm_index]"
public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, Vector<byte> op3, ulong imm_index);


/// DotProductUnsignedSigned : Dot product (unsigned × signed)

/// svint32_t svusdot[_s32](svint32_t op1, svuint8_t op2, svint8_t op3) : "USDOT Ztied1.S, Zop2.B, Zop3.B" or "MOVPRFX Zresult, Zop1; USDOT Zresult.S, Zop2.B, Zop3.B"
public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3);

/// svint32_t svusdot_lane[_s32](svint32_t op1, svuint8_t op2, svint8_t op3, uint64_t imm_index) : "USDOT Ztied1.S, Zop2.B, Zop3.B[imm_index]" or "MOVPRFX Zresult, Zop1; USDOT Zresult.S, Zop2.B, Zop3.B[imm_index]"
public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3, ulong imm_index);


/// MatrixMultiplyAccumulate : Matrix multiply-accumulate

/// svint32_t svmmla[_s32](svint32_t op1, svint8_t op2, svint8_t op3) : "SMMLA Ztied1.S, Zop2.B, Zop3.B" or "MOVPRFX Zresult, Zop1; SMMLA Zresult.S, Zop2.B, Zop3.B"
public static unsafe Vector<int> MatrixMultiplyAccumulate(Vector<int> op1, Vector<sbyte> op2, Vector<sbyte> op3);

/// svuint32_t svmmla[_u32](svuint32_t op1, svuint8_t op2, svuint8_t op3) : "UMMLA Ztied1.S, Zop2.B, Zop3.B" or "MOVPRFX Zresult, Zop1; UMMLA Zresult.S, Zop2.B, Zop3.B"
public static unsafe Vector<uint> MatrixMultiplyAccumulate(Vector<uint> op1, Vector<byte> op2, Vector<byte> op3);


/// MatrixMultiplyAccumulateUnsignedSigned : Matrix multiply-accumulate (unsigned × signed)

/// svint32_t svusmmla[_s32](svint32_t op1, svuint8_t op2, svint8_t op3) : "USMMLA Ztied1.S, Zop2.B, Zop3.B" or "MOVPRFX Zresult, Zop1; USMMLA Zresult.S, Zop2.B, Zop3.B"
public static unsafe Vector<int> MatrixMultiplyAccumulateUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3);


/// total method signatures: 7
/// total method names: 4
}

/// Optional Entries:
/// public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, byte op3); // svsudot[_n_s32]
/// public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, sbyte op3); // svusdot[_n_s32]
/// Total Maybe: 2

/// Total ACLE covered across API: 9

97 changes: 97 additions & 0 deletions sve_api/out_api/apiraw_FEAT_SHA3__.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
namespace System.Runtime.Intrinsics.Arm;

/// VectorT Summary
public abstract partial class Sha3 : AdvSimd /// Feature: FEAT_SHA3
{

/// T: byte, ushort, uint, ulong, sbyte, short, int, long
public static unsafe Vector128<T> BitwiseClearXor(Vector128<T> xor, Vector128<T> value, Vector128<T> mask); // BCAX

public static unsafe Vector128<ulong> BitwiseRotateLeftBy1AndXor(Vector128<ulong> a, Vector128<ulong> b); // RAX1

/// T: byte, ushort, uint, ulong, sbyte, short, int, long
public static unsafe Vector128<T> Xor(Vector128<T> value1, Vector128<T> value2, Vector128<T> value3); // EOR3

public static unsafe Vector128<ulong> XorRotateRight(Vector128<ulong> left, Vector128<ulong> right, [ConstantExpected] byte count); // XAR

/// total method signatures: 4

}


/// Full API
public abstract partial class Sha3 : AdvSimd /// Feature: FEAT_SHA3
{
/// BitwiseClearXor : Bit Clear and Exclusive OR performs a bitwise AND of the 128-bit vector in a source SIMD&FP register and the complement of the vector in another source SIMD&FP register, then performs a bitwise exclusive OR of the resulting vector and the vector in a third source SIMD&FP register, and writes the result to the destination SIMD&FP register.

/// uint8x16_t vbcaxq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) : "BCAX Vd.16B,Vn.16B,Vm.16B,Va.16B"
public static unsafe Vector128<byte> BitwiseClearXor(Vector128<byte> xor, Vector128<byte> value, Vector128<byte> mask);

/// uint16x8_t vbcaxq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) : "BCAX Vd.16B,Vn.16B,Vm.16B,Va.16B"
public static unsafe Vector128<ushort> BitwiseClearXor(Vector128<ushort> xor, Vector128<ushort> value, Vector128<ushort> mask);

/// uint32x4_t vbcaxq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) : "BCAX Vd.16B,Vn.16B,Vm.16B,Va.16B"
public static unsafe Vector128<uint> BitwiseClearXor(Vector128<uint> xor, Vector128<uint> value, Vector128<uint> mask);

/// uint64x2_t vbcaxq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) : "BCAX Vd.16B,Vn.16B,Vm.16B,Va.16B"
public static unsafe Vector128<ulong> BitwiseClearXor(Vector128<ulong> xor, Vector128<ulong> value, Vector128<ulong> mask);

/// int8x16_t vbcaxq_s8(int8x16_t a, int8x16_t b, int8x16_t c) : "BCAX Vd.16B,Vn.16B,Vm.16B,Va.16B"
public static unsafe Vector128<sbyte> BitwiseClearXor(Vector128<sbyte> xor, Vector128<sbyte> value, Vector128<sbyte> mask);

/// int16x8_t vbcaxq_s16(int16x8_t a, int16x8_t b, int16x8_t c) : "BCAX Vd.16B,Vn.16B,Vm.16B,Va.16B"
public static unsafe Vector128<short> BitwiseClearXor(Vector128<short> xor, Vector128<short> value, Vector128<short> mask);

/// int32x4_t vbcaxq_s32(int32x4_t a, int32x4_t b, int32x4_t c) : "BCAX Vd.16B,Vn.16B,Vm.16B,Va.16B"
public static unsafe Vector128<int> BitwiseClearXor(Vector128<int> xor, Vector128<int> value, Vector128<int> mask);

/// int64x2_t vbcaxq_s64(int64x2_t a, int64x2_t b, int64x2_t c) : "BCAX Vd.16B,Vn.16B,Vm.16B,Va.16B"
public static unsafe Vector128<long> BitwiseClearXor(Vector128<long> xor, Vector128<long> value, Vector128<long> mask);


/// BitwiseRotateLeftBy1AndXor : Rotate and Exclusive OR rotates each 64-bit element of the 128-bit vector in a source SIMD&FP register left by 1, performs a bitwise exclusive OR of the resulting 128-bit vector and the vector in another source SIMD&FP register, and writes the result to the destination SIMD&FP register.

/// uint64x2_t vrax1q_u64(uint64x2_t a, uint64x2_t b) : "RAX1 Vd.2D,Vn.2D,Vm.2D"
public static unsafe Vector128<ulong> BitwiseRotateLeftBy1AndXor(Vector128<ulong> a, Vector128<ulong> b);


/// Xor : Three-way Exclusive OR performs a three-way exclusive OR of the values in the three source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

/// uint8x16_t veor3q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) : "EOR3 Vd.16B,Vn.16B,Vm.16B,Va.16B"
public static unsafe Vector128<byte> Xor(Vector128<byte> value1, Vector128<byte> value2, Vector128<byte> value3);

/// uint16x8_t veor3q_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) : "EOR3 Vd.16B,Vn.16B,Vm.16B,Va.16B"
public static unsafe Vector128<ushort> Xor(Vector128<ushort> value1, Vector128<ushort> value2, Vector128<ushort> value3);

/// uint32x4_t veor3q_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) : "EOR3 Vd.16B,Vn.16B,Vm.16B,Va.16B"
public static unsafe Vector128<uint> Xor(Vector128<uint> value1, Vector128<uint> value2, Vector128<uint> value3);

/// uint64x2_t veor3q_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) : "EOR3 Vd.16B,Vn.16B,Vm.16B,Va.16B"
public static unsafe Vector128<ulong> Xor(Vector128<ulong> value1, Vector128<ulong> value2, Vector128<ulong> value3);

/// int8x16_t veor3q_s8(int8x16_t a, int8x16_t b, int8x16_t c) : "EOR3 Vd.16B,Vn.16B,Vm.16B,Va.16B"
public static unsafe Vector128<sbyte> Xor(Vector128<sbyte> value1, Vector128<sbyte> value2, Vector128<sbyte> value3);

/// int16x8_t veor3q_s16(int16x8_t a, int16x8_t b, int16x8_t c) : "EOR3 Vd.16B,Vn.16B,Vm.16B,Va.16B"
public static unsafe Vector128<short> Xor(Vector128<short> value1, Vector128<short> value2, Vector128<short> value3);

/// int32x4_t veor3q_s32(int32x4_t a, int32x4_t b, int32x4_t c) : "EOR3 Vd.16B,Vn.16B,Vm.16B,Va.16B"
public static unsafe Vector128<int> Xor(Vector128<int> value1, Vector128<int> value2, Vector128<int> value3);

/// int64x2_t veor3q_s64(int64x2_t a, int64x2_t b, int64x2_t c) : "EOR3 Vd.16B,Vn.16B,Vm.16B,Va.16B"
public static unsafe Vector128<long> Xor(Vector128<long> value1, Vector128<long> value2, Vector128<long> value3);


/// XorRotateRight : Exclusive OR and Rotate performs a bitwise exclusive OR of the 128-bit vectors in the two source SIMD&FP registers, rotates each 64-bit element of the resulting 128-bit vector right by the value specified by a 6-bit immediate value, and writes the result to the destination SIMD&FP register.

/// uint64x2_t vxarq_u64(uint64x2_t a, uint64x2_t b, const int imm6) : "XAR Vd.2D,Vn.2D,Vm.2D,imm6"
public static unsafe Vector128<ulong> XorRotateRight(Vector128<ulong> left, Vector128<ulong> right, [ConstantExpected] byte count);


/// total method signatures: 18
/// total method names: 4
}


/// Total ACLE covered across API: 18

37 changes: 37 additions & 0 deletions sve_api/out_api/apiraw_FEAT_SM4__.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
namespace System.Runtime.Intrinsics.Arm;

/// VectorT Summary
public abstract partial class Sm4 : AdvSimd /// Feature: FEAT_SM4
{

public static unsafe Vector128<uint> Sm4EncryptionAndDecryption(Vector128<uint> a, Vector128<uint> b); // SM4E

public static unsafe Vector128<uint> Sm4KeyUpdates(Vector128<uint> a, Vector128<uint> b); // SM4EKEY

/// total method signatures: 2

}


/// Full API
public abstract partial class Sm4 : AdvSimd /// Feature: FEAT_SM4
{
/// Sm4EncryptionAndDecryption : SM4 Encode takes input data as a 128-bit vector from the first source SIMD&FP register, and four iterations of the round key held as the elements of the 128-bit vector in the second source SIMD&FP register. It encrypts the data by four rounds, in accordance with the SM4 standard, returning the 128-bit result to the destination SIMD&FP register.

/// uint32x4_t vsm4eq_u32(uint32x4_t a, uint32x4_t b) : "SM4E Vd.4S,Vn.4S"
public static unsafe Vector128<uint> Sm4EncryptionAndDecryption(Vector128<uint> a, Vector128<uint> b);


/// Sm4KeyUpdates : SM4 Key takes an input as a 128-bit vector from the first source SIMD&FP register and a 128-bit constant from the second SIMD&FP register. It derives four iterations of the output key, in accordance with the SM4 standard, returning the 128-bit result to the destination SIMD&FP register.

/// uint32x4_t vsm4ekeyq_u32(uint32x4_t a, uint32x4_t b) : "SM4EKEY Vd.4S,Vn.4S,Vm.4S"
public static unsafe Vector128<uint> Sm4KeyUpdates(Vector128<uint> a, Vector128<uint> b);


/// total method signatures: 2
/// total method names: 2
}


/// Total ACLE covered across API: 2

Empty file.
Loading
Loading