Skip to content

Commit

Permalink
Expose the rest of embedded rounding APIs
Browse files Browse the repository at this point in the history
  • Loading branch information
Ruihan-Yin committed Feb 13, 2024
1 parent d0c805c commit fd5a2d5
Show file tree
Hide file tree
Showing 10 changed files with 222 additions and 39 deletions.
6 changes: 6 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26498,6 +26498,7 @@ bool GenTreeHWIntrinsic::OperIsEmbRoundingEnabled() const
case NI_AVX512F_Subtract:

case NI_AVX512F_Scale:
case NI_AVX512F_ScaleScalar:

case NI_AVX512F_ConvertScalarToVector128Single:
#if defined(TARGET_AMD64)
Expand All @@ -26521,9 +26522,14 @@ bool GenTreeHWIntrinsic::OperIsEmbRoundingEnabled() const
case NI_AVX512F_X64_ConvertToInt64:
case NI_AVX512F_X64_ConvertToUInt64:
#endif // TARGET_AMD64
case NI_AVX512DQ_ConvertToVector256Single:
case NI_AVX512DQ_ConvertToVector512Double:
case NI_AVX512DQ_ConvertToVector512Int64:
case NI_AVX512DQ_ConvertToVector512UInt64:
{
return numArgs == 2;
}

default:
unreached();
}
Expand Down
12 changes: 6 additions & 6 deletions src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -941,8 +941,8 @@ HARDWARE_INTRINSIC(AVX512F, RotateRight,
HARDWARE_INTRINSIC(AVX512F, RotateRightVariable, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprorvd, INS_vprorvd, INS_vprorvq, INS_vprorvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX512F, RoundScale, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaleps, INS_vrndscalepd}, HW_Category_IMM, HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX512F, RoundScaleScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaless, INS_vrndscalesd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(AVX512F, Scale, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible)
HARDWARE_INTRINSIC(AVX512F, ScaleScalar, 16, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefss, INS_vscalefsd}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(AVX512F, Scale, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible)
HARDWARE_INTRINSIC(AVX512F, ScaleScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefss, INS_vscalefsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
HARDWARE_INTRINSIC(AVX512F, ShiftLeftLogical, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX512F, ShiftLeftLogicalVariable, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible)
HARDWARE_INTRINSIC(AVX512F, ShiftRightArithmetic, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrad, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
Expand Down Expand Up @@ -1132,11 +1132,11 @@ HARDWARE_INTRINSIC(AVX512DQ, AndNot,
HARDWARE_INTRINSIC(AVX512DQ, BroadcastPairScalarToVector512, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX512DQ, BroadcastVector128ToVector512, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti64x2, INS_vbroadcasti64x2, INS_invalid, INS_vbroadcastf64x2}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX512DQ, BroadcastVector256ToVector512, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x8, INS_vbroadcasti32x8, INS_invalid, INS_invalid, INS_vbroadcastf32x8, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector256Single, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Double, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Int64, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector256Single, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible)
HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Double, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible)
HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Int64, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible)
HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Int64WithTruncation, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512UInt64, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512UInt64, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible)
HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512UInt64WithTruncation, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512DQ, ExtractVector128, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti64x2, INS_vextracti64x2, INS_invalid, INS_vextractf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX512DQ, ExtractVector256, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x8, INS_vextracti32x8, INS_invalid, INS_invalid, INS_vextractf32x8, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,16 @@ internal X64() { }
/// VCVTUQQ2PS ymm1 {k1}{z}, zmm2/m512/m64bcst
/// </summary>
public static Vector256<float> ConvertToVector256Single(Vector512<ulong> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// __m256 _mm512_cvt_roundepi64_ps (__m512i a, int r)
/// VCVTQQ2PS ymm1, zmm2 {er}
/// </summary>
public static Vector256<float> ConvertToVector256Single(Vector512<long> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m256 _mm512_cvt_roundepu64_ps (__m512i a, int r)
/// VCVTUQQ2PS ymm1, zmm2 {er}
/// </summary>
public static Vector256<float> ConvertToVector256Single(Vector512<ulong> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m512d _mm512_cvtepi64_pd (__m512i a)
/// VCVTQQ2PD zmm1 {k1}{z}, zmm2/m512/m64bcst
Expand All @@ -334,6 +343,16 @@ internal X64() { }
/// </summary>
public static Vector512<double> ConvertToVector512Double(Vector512<ulong> value) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m512d _mm512_cvt_roundepi64_pd (__m512i a, int r)
/// VCVTQQ2PD zmm1, zmm2 {er}
/// </summary>
public static Vector512<double> ConvertToVector512Double(Vector512<long> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m512d _mm512_cvt_roundepu64_pd (__m512i a, int r)
/// VCVTUQQ2PD zmm1, zmm2 {er}
/// </summary>
public static Vector512<double> ConvertToVector512Double(Vector512<ulong> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m512i _mm512_cvtps_epi64 (__m512 a)
/// VCVTPS2QQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
/// </summary>
Expand All @@ -344,6 +363,16 @@ internal X64() { }
/// </summary>
public static Vector512<long> ConvertToVector512Int64(Vector512<double> value) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m512i _mm512_cvt_roundps_epi64 (__m512 a, int r)
/// VCVTPS2QQ zmm1, ymm2 {er}
/// </summary>
public static Vector512<long> ConvertToVector512Int64(Vector256<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m512i _mm512_cvt_roundpd_epi64 (__m512d a, int r)
/// VCVTPD2QQ zmm1, zmm2 {er}
/// </summary>
public static Vector512<long> ConvertToVector512Int64(Vector512<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m512i _mm512_cvttps_epi64 (__m512 a)
/// VCVTTPS2QQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
/// </summary>
Expand All @@ -364,6 +393,16 @@ internal X64() { }
/// </summary>
public static Vector512<ulong> ConvertToVector512UInt64(Vector512<double> value) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m512i _mm512_cvt_roundps_epu64 (__m512 a, int r)
/// VCVTPS2UQQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
/// </summary>
public static Vector512<ulong> ConvertToVector512UInt64(Vector256<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m512i _mm512_cvt_roundpd_epu64 (__m512d a, int r)
/// VCVTPD2UQQ zmm1 {k1}{z}, zmm2/m512/m64bcst{er}
/// </summary>
public static Vector512<ulong> ConvertToVector512UInt64(Vector512<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m512i _mm512_cvttps_epu64 (__m512 a)
/// VCVTTPS2UQQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,16 @@ internal X64() { }
/// VCVTUQQ2PS ymm1 {k1}{z}, zmm2/m512/m64bcst
/// </summary>
public static Vector256<float> ConvertToVector256Single(Vector512<ulong> value) => ConvertToVector256Single(value);
/// <summary>
/// __m256 _mm512_cvt_roundepi64_ps (__m512i a, int r)
/// VCVTQQ2PS ymm1, zmm2 {er}
/// </summary>
public static Vector256<float> ConvertToVector256Single(Vector512<long> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Single(value, mode);
/// <summary>
/// __m256 _mm512_cvt_roundepu64_ps (__m512i a, int r)
/// VCVTUQQ2PS ymm1, zmm2 {er}
/// </summary>
public static Vector256<float> ConvertToVector256Single(Vector512<ulong> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Single(value, mode);

/// <summary>
/// __m512d _mm512_cvtepi64_pd (__m512i a)
Expand All @@ -334,6 +344,17 @@ internal X64() { }
/// VCVTUQQ2PD zmm1 {k1}{z}, zmm2/m512/m64bcst
/// </summary>
public static Vector512<double> ConvertToVector512Double(Vector512<ulong> value) => ConvertToVector512Double(value);
/// <summary>
/// __m512d _mm512_cvt_roundepi64_pd (__m512i a, int r)
/// VCVTQQ2PD zmm1, zmm2 {er}
/// </summary>
public static Vector512<double> ConvertToVector512Double(Vector512<long> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector512Double(value, mode);
/// <summary>
/// __m512d _mm512_cvt_roundepu64_pd (__m512i a, int r)
/// VCVTUQQ2PD zmm1, zmm2 {er}
/// </summary>
public static Vector512<double> ConvertToVector512Double(Vector512<ulong> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector512Double(value, mode);

/// <summary>
/// __m512i _mm512_cvtps_epi64 (__m512 a)
/// VCVTPS2QQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
Expand All @@ -344,6 +365,17 @@ internal X64() { }
/// VCVTPD2QQ zmm1 {k1}{z}, zmm2/m512/m64bcst{er}
/// </summary>
public static Vector512<long> ConvertToVector512Int64(Vector512<double> value) => ConvertToVector512Int64(value);
/// <summary>
/// __m512i _mm512_cvt_roundps_epi64 (__m512 a, int r)
/// VCVTPS2QQ zmm1, ymm2 {er}
/// </summary>
public static Vector512<long> ConvertToVector512Int64(Vector256<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector512Int64(value, mode);
/// <summary>
/// __m512i _mm512_cvt_roundpd_epi64 (__m512d a, int r)
/// VCVTPD2QQ zmm1, zmm2 {er}
/// </summary>
public static Vector512<long> ConvertToVector512Int64(Vector512<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector512Int64(value, mode);

/// <summary>
/// __m512i _mm512_cvttps_epi64 (__m512 a)
/// VCVTTPS2QQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
Expand All @@ -365,6 +397,16 @@ internal X64() { }
/// </summary>
public static Vector512<ulong> ConvertToVector512UInt64(Vector512<double> value) => ConvertToVector512UInt64(value);
/// <summary>
/// __m512i _mm512_cvt_roundps_epu64 (__m512 a, int r)
/// VCVTPS2UQQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
/// </summary>
public static Vector512<ulong> ConvertToVector512UInt64(Vector256<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector512UInt64(value, mode);
/// <summary>
/// __m512i _mm512_cvt_roundpd_epu64 (__m512d a, int r)
/// VCVTPD2UQQ zmm1 {k1}{z}, zmm2/m512/m64bcst{er}
/// </summary>
public static Vector512<ulong> ConvertToVector512UInt64(Vector512<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector512UInt64(value, mode);
/// <summary>
/// __m512i _mm512_cvttps_epu64 (__m512 a)
/// VCVTTPS2UQQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
/// </summary>
Expand Down
Loading

0 comments on commit fd5a2d5

Please sign in to comment.