[API Proposal]: Arm64: FEAT_I8MM #94027

a74nh · 2023-10-26T11:23:45Z

namespace System.Runtime.Intrinsics.Arm;

/// VectorT Summary
public abstract partial class SveI8mm : AdvSimd /// Feature: FEAT_I8MM
{

  public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, Vector<byte> op3); // USDOT // MOVPRFX

  public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, Vector<byte> op3, ulong imm_index); // SUDOT // MOVPRFX

  public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3); // USDOT // MOVPRFX

  public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3, ulong imm_index); // USDOT // MOVPRFX

  /// T: [int, sbyte], [uint, byte]
  public static unsafe Vector<T> MatrixMultiplyAccumulate(Vector<T> op1, Vector<T2> op2, Vector<T2> op3); // SMMLA or UMMLA // MOVPRFX

  public static unsafe Vector<int> MatrixMultiplyAccumulateUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3); // USMMLA // MOVPRFX

  /// total method signatures: 6


  /// Optional Entries:

  public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, byte op3); // USDOT // MOVPRFX

  public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, sbyte op3); // USDOT // MOVPRFX

  /// total optional method signatures: 2

}


/// Full API
public abstract partial class SveI8mm : AdvSimd /// Feature: FEAT_I8MM
{
    /// DotProductSignedUnsigned : Dot product (signed × unsigned)

    /// svint32_t svsudot[_s32](svint32_t op1, svint8_t op2, svuint8_t op3) : "USDOT Ztied1.S, Zop3.B, Zop2.B" or "MOVPRFX Zresult, Zop1; USDOT Zresult.S, Zop3.B, Zop2.B"
  public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, Vector<byte> op3);

    /// svint32_t svsudot_lane[_s32](svint32_t op1, svint8_t op2, svuint8_t op3, uint64_t imm_index) : "SUDOT Ztied1.S, Zop2.B, Zop3.B[imm_index]" or "MOVPRFX Zresult, Zop1; SUDOT Zresult.S, Zop2.B, Zop3.B[imm_index]"
  public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, Vector<byte> op3, ulong imm_index);


    /// DotProductUnsignedSigned : Dot product (unsigned × signed)

    /// svint32_t svusdot[_s32](svint32_t op1, svuint8_t op2, svint8_t op3) : "USDOT Ztied1.S, Zop2.B, Zop3.B" or "MOVPRFX Zresult, Zop1; USDOT Zresult.S, Zop2.B, Zop3.B"
  public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3);

    /// svint32_t svusdot_lane[_s32](svint32_t op1, svuint8_t op2, svint8_t op3, uint64_t imm_index) : "USDOT Ztied1.S, Zop2.B, Zop3.B[imm_index]" or "MOVPRFX Zresult, Zop1; USDOT Zresult.S, Zop2.B, Zop3.B[imm_index]"
  public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3, ulong imm_index);


    /// MatrixMultiplyAccumulate : Matrix multiply-accumulate

    /// svint32_t svmmla[_s32](svint32_t op1, svint8_t op2, svint8_t op3) : "SMMLA Ztied1.S, Zop2.B, Zop3.B" or "MOVPRFX Zresult, Zop1; SMMLA Zresult.S, Zop2.B, Zop3.B"
  public static unsafe Vector<int> MatrixMultiplyAccumulate(Vector<int> op1, Vector<sbyte> op2, Vector<sbyte> op3);

    /// svuint32_t svmmla[_u32](svuint32_t op1, svuint8_t op2, svuint8_t op3) : "UMMLA Ztied1.S, Zop2.B, Zop3.B" or "MOVPRFX Zresult, Zop1; UMMLA Zresult.S, Zop2.B, Zop3.B"
  public static unsafe Vector<uint> MatrixMultiplyAccumulate(Vector<uint> op1, Vector<byte> op2, Vector<byte> op3);


    /// MatrixMultiplyAccumulateUnsignedSigned : Matrix multiply-accumulate (unsigned × signed)

    /// svint32_t svusmmla[_s32](svint32_t op1, svuint8_t op2, svint8_t op3) : "USMMLA Ztied1.S, Zop2.B, Zop3.B" or "MOVPRFX Zresult, Zop1; USMMLA Zresult.S, Zop2.B, Zop3.B"
  public static unsafe Vector<int> MatrixMultiplyAccumulateUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3);


  /// total method signatures: 7
  /// total method names:      4
}

  /// Optional Entries:
  ///   public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, byte op3); // svsudot[_n_s32]
  ///   public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, sbyte op3); // svusdot[_n_s32]
  ///   Total Maybe: 2

  /// Total ACLE covered across API:      9

ghost · 2023-10-26T11:23:51Z

Tagging subscribers to this area: @dotnet/area-system-numerics
See info in area-owners.md if you want to be subscribed.

Issue Details

namespace System.Runtime.Intrinsics.Arm

/// VectorT Summary
public abstract class Sve : AdvSimd /// Feature: FEAT_I8MM
{
  public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, Vector<byte> op3);

  public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, Vector<byte> op3, ulong imm_index);

  public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3);

  public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3, ulong imm_index);

  /// T: [int, sbyte], [uint, byte]
  public static unsafe Vector<T> MatrixMultiplyAccumulate(Vector<T> op1, Vector<T2> op2, Vector<T2> op3); // SMMLA or UMMLA (MOVPRFX)

  public static unsafe Vector<int> MatrixMultiplyAccumulateUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3);

  /// total method signatures: 6
}


/// Full API
public abstract class Sve : AdvSimd /// Feature: FEAT_I8MM
{
    /// DotProductSignedUnsigned : Dot product (signed × unsigned)

    /// svint32_t svsudot[_s32](svint32_t op1, svint8_t op2, svuint8_t op3) : "USDOT Ztied1.S, Zop3.B, Zop2.B" or "MOVPRFX Zresult, Zop1; USDOT Zresult.S, Zop3.B, Zop2.B"
  public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, Vector<byte> op3);

    /// svint32_t svsudot_lane[_s32](svint32_t op1, svint8_t op2, svuint8_t op3, uint64_t imm_index) : "SUDOT Ztied1.S, Zop2.B, Zop3.B[imm_index]" or "MOVPRFX Zresult, Zop1; SUDOT Zresult.S, Zop2.B, Zop3.B[imm_index]"
  public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, Vector<byte> op3, ulong imm_index);


    /// DotProductUnsignedSigned : Dot product (unsigned × signed)

    /// svint32_t svusdot[_s32](svint32_t op1, svuint8_t op2, svint8_t op3) : "USDOT Ztied1.S, Zop2.B, Zop3.B" or "MOVPRFX Zresult, Zop1; USDOT Zresult.S, Zop2.B, Zop3.B"
  public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3);

    /// svint32_t svusdot_lane[_s32](svint32_t op1, svuint8_t op2, svint8_t op3, uint64_t imm_index) : "USDOT Ztied1.S, Zop2.B, Zop3.B[imm_index]" or "MOVPRFX Zresult, Zop1; USDOT Zresult.S, Zop2.B, Zop3.B[imm_index]"
  public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3, ulong imm_index);


    /// MatrixMultiplyAccumulate : Matrix multiply-accumulate

    /// svint32_t svmmla[_s32](svint32_t op1, svint8_t op2, svint8_t op3) : "SMMLA Ztied1.S, Zop2.B, Zop3.B" or "MOVPRFX Zresult, Zop1; SMMLA Zresult.S, Zop2.B, Zop3.B"
  public static unsafe Vector<int> MatrixMultiplyAccumulate(Vector<int> op1, Vector<sbyte> op2, Vector<sbyte> op3);

    /// svuint32_t svmmla[_u32](svuint32_t op1, svuint8_t op2, svuint8_t op3) : "UMMLA Ztied1.S, Zop2.B, Zop3.B" or "MOVPRFX Zresult, Zop1; UMMLA Zresult.S, Zop2.B, Zop3.B"
  public static unsafe Vector<uint> MatrixMultiplyAccumulate(Vector<uint> op1, Vector<byte> op2, Vector<byte> op3);


    /// MatrixMultiplyAccumulateUnsignedSigned : Matrix multiply-accumulate (unsigned × signed)

    /// svint32_t svusmmla[_s32](svint32_t op1, svuint8_t op2, svint8_t op3) : "USMMLA Ztied1.S, Zop2.B, Zop3.B" or "MOVPRFX Zresult, Zop1; USMMLA Zresult.S, Zop2.B, Zop3.B"
  public static unsafe Vector<int> MatrixMultiplyAccumulateUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3);


  /// total method signatures: 7
  /// total method names:      4
}

  /// Rejected:
  ///   public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, byte op3); // svsudot[_n_s32]
  ///   public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, sbyte op3); // svusdot[_n_s32]
  ///   Total Rejected: 787

  /// Total ACLE covered across API:      9

Author:	a74nh
Assignees:	-
Labels:	`area-System.Numerics`
Milestone:	-

a74nh · 2023-10-26T11:24:54Z

This contributes to #93095

It covers all of the instructions in FEAT_I8MM. This an optional 8.2 feature and from 8.6 is mandatory in any hardware with SVE.

This list was auto generated from the C ACLE for SVE, and is in three parts:

The methods list reduced down to Vector versions. All possible varaints of T are given above the method.
The complete list of all methods. The corresponding ACLE methods and SVE instructions are given above the method.
All rejected ACLE methods. These are methods we have agreed that do not need including in C#.
Where possible, existing C# naming conventions have been matched.

Many of the C functions include predicate argument(s), of type svbool_t as the first argument. These are missing from the C# method. It is expected that the Jit will create predicates where required, or combine with uses of conditionalSelect(). For more discussion see #88140 comment.

a74nh · 2023-11-06T17:14:20Z

Updated to reflect review comments from other API proposals.

ghost · 2024-02-08T05:23:14Z

Tagging subscribers to this area: @dotnet/area-system-runtime-intrinsics
See info in area-owners.md if you want to be subscribed.

Issue Details

namespace System.Runtime.Intrinsics.Arm

/// VectorT Summary
public abstract class SveI8mm : AdvSimd /// Feature: FEAT_I8MM
{

  public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, Vector<byte> op3); // USDOT // MOVPRFX

  public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, Vector<byte> op3, ulong imm_index); // SUDOT // MOVPRFX

  public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3); // USDOT // MOVPRFX

  public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3, ulong imm_index); // USDOT // MOVPRFX

  /// T: [int, sbyte], [uint, byte]
  public static unsafe Vector<T> MatrixMultiplyAccumulate(Vector<T> op1, Vector<T2> op2, Vector<T2> op3); // SMMLA or UMMLA // MOVPRFX

  public static unsafe Vector<int> MatrixMultiplyAccumulateUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3); // USMMLA // MOVPRFX

  /// total method signatures: 6


  /// Optional Entries:

  public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, byte op3); // USDOT // MOVPRFX

  public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, sbyte op3); // USDOT // MOVPRFX

  /// total optional method signatures: 2

}


/// Full API
public abstract class SveI8mm : AdvSimd /// Feature: FEAT_I8MM
{
    /// DotProductSignedUnsigned : Dot product (signed × unsigned)

    /// svint32_t svsudot[_s32](svint32_t op1, svint8_t op2, svuint8_t op3) : "USDOT Ztied1.S, Zop3.B, Zop2.B" or "MOVPRFX Zresult, Zop1; USDOT Zresult.S, Zop3.B, Zop2.B"
  public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, Vector<byte> op3);

    /// svint32_t svsudot_lane[_s32](svint32_t op1, svint8_t op2, svuint8_t op3, uint64_t imm_index) : "SUDOT Ztied1.S, Zop2.B, Zop3.B[imm_index]" or "MOVPRFX Zresult, Zop1; SUDOT Zresult.S, Zop2.B, Zop3.B[imm_index]"
  public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, Vector<byte> op3, ulong imm_index);


    /// DotProductUnsignedSigned : Dot product (unsigned × signed)

    /// svint32_t svusdot[_s32](svint32_t op1, svuint8_t op2, svint8_t op3) : "USDOT Ztied1.S, Zop2.B, Zop3.B" or "MOVPRFX Zresult, Zop1; USDOT Zresult.S, Zop2.B, Zop3.B"
  public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3);

    /// svint32_t svusdot_lane[_s32](svint32_t op1, svuint8_t op2, svint8_t op3, uint64_t imm_index) : "USDOT Ztied1.S, Zop2.B, Zop3.B[imm_index]" or "MOVPRFX Zresult, Zop1; USDOT Zresult.S, Zop2.B, Zop3.B[imm_index]"
  public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3, ulong imm_index);


    /// MatrixMultiplyAccumulate : Matrix multiply-accumulate

    /// svint32_t svmmla[_s32](svint32_t op1, svint8_t op2, svint8_t op3) : "SMMLA Ztied1.S, Zop2.B, Zop3.B" or "MOVPRFX Zresult, Zop1; SMMLA Zresult.S, Zop2.B, Zop3.B"
  public static unsafe Vector<int> MatrixMultiplyAccumulate(Vector<int> op1, Vector<sbyte> op2, Vector<sbyte> op3);

    /// svuint32_t svmmla[_u32](svuint32_t op1, svuint8_t op2, svuint8_t op3) : "UMMLA Ztied1.S, Zop2.B, Zop3.B" or "MOVPRFX Zresult, Zop1; UMMLA Zresult.S, Zop2.B, Zop3.B"
  public static unsafe Vector<uint> MatrixMultiplyAccumulate(Vector<uint> op1, Vector<byte> op2, Vector<byte> op3);


    /// MatrixMultiplyAccumulateUnsignedSigned : Matrix multiply-accumulate (unsigned × signed)

    /// svint32_t svusmmla[_s32](svint32_t op1, svuint8_t op2, svint8_t op3) : "USMMLA Ztied1.S, Zop2.B, Zop3.B" or "MOVPRFX Zresult, Zop1; USMMLA Zresult.S, Zop2.B, Zop3.B"
  public static unsafe Vector<int> MatrixMultiplyAccumulateUnsignedSigned(Vector<int> op1, Vector<byte> op2, Vector<sbyte> op3);


  /// total method signatures: 7
  /// total method names:      4
}

  /// Optional Entries:
  ///   public static unsafe Vector<int> DotProductSignedUnsigned(Vector<int> op1, Vector<sbyte> op2, byte op3); // svsudot[_n_s32]
  ///   public static unsafe Vector<int> DotProductUnsignedSigned(Vector<int> op1, Vector<byte> op2, sbyte op3); // svusdot[_n_s32]
  ///   Total Maybe: 2

  /// Total ACLE covered across API:      9

Author:	a74nh
Assignees:	-
Labels:	`area-System.Runtime.Intrinsics`, `untriaged`, `api-ready-for-review`
Milestone:	-

tannergooding · 2024-02-08T06:57:08Z

Fixed various method names to match those used in already exposed API surface and fixed the class to inherit from Sve

a74nh · 2024-08-21T09:49:54Z

Updated to match implemented SVE1 methods.

dotnet-issue-labeler bot added the area-System.Numerics label Oct 26, 2023

ghost added the untriaged New issue has not been triaged by the area owner label Oct 26, 2023

a74nh mentioned this issue Oct 26, 2023

Arm64: Add SVE/SVE2 support in .NET 9 #93095

Closed

31 tasks

a74nh mentioned this issue Nov 13, 2023

Sve: Early versions of Intrinsics/Arm/Sve and hwintrinsiclistarm64sve #94606

Closed

kunalspathak added the api-ready-for-review API is ready for review, it is NOT ready for implementation label Feb 7, 2024

tannergooding added area-System.Runtime.Intrinsics and removed area-System.Numerics labels Feb 8, 2024

tannergooding removed the untriaged New issue has not been triaged by the area owner label Feb 8, 2024

kunalspathak mentioned this issue Mar 19, 2024

Arm64: Implement SVE APIs #99957

Closed

kunalspathak added this to the Future milestone May 1, 2024

tannergooding added api-suggestion Early API idea and discussion, it is NOT ready for implementation arch-arm64 and removed api-ready-for-review API is ready for review, it is NOT ready for implementation labels Aug 20, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[API Proposal]: Arm64: FEAT_I8MM #94027

[API Proposal]: Arm64: FEAT_I8MM #94027

a74nh commented Oct 26, 2023 •

edited

Loading

ghost commented Oct 26, 2023

a74nh commented Oct 26, 2023

a74nh commented Nov 6, 2023

ghost commented Feb 8, 2024

tannergooding commented Feb 8, 2024

a74nh commented Aug 21, 2024

[API Proposal]: Arm64: FEAT_I8MM #94027

[API Proposal]: Arm64: FEAT_I8MM #94027

Comments

a74nh commented Oct 26, 2023 • edited Loading

ghost commented Oct 26, 2023

a74nh commented Oct 26, 2023

a74nh commented Nov 6, 2023

ghost commented Feb 8, 2024

tannergooding commented Feb 8, 2024

a74nh commented Aug 21, 2024

a74nh commented Oct 26, 2023 •

edited

Loading