Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for Avx512Vbmi.MultiShift #103310

Merged
merged 1 commit into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19665,6 +19665,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_vpermt2pd:
case INS_vpermt2ps:
case INS_vpermt2q:
case INS_vpmultishiftqb:
case INS_vshuff32x4:
case INS_vshuff64x2:
case INS_vshufi32x4:
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -1109,6 +1109,7 @@ HARDWARE_INTRINSIC(AVX512DQ_VL, MultiplyLow,
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// AVX512VBMI Intrinsics
HARDWARE_INTRINSIC(AVX512VBMI, MultiShift, 64, 2, false, {INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8, 64, 2, false, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8x2, 64, 3, false, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible)

Expand All @@ -1117,6 +1118,7 @@ HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8x2,
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// AVX512VBMI.VL Intrinsics
HARDWARE_INTRINSIC(AVX512VBMI_VL, MultiShift, -1, 2, false, {INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar16x8, 16, 2, false, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar16x8x2, 16, 3, false, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar32x8, 32, 2, false, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
Expand Down Expand Up @@ -1197,6 +1199,7 @@ HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractAdd,
HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractNegated, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractNegatedScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(AVX10v1, MultiShift, -1, 2, false, {INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x16, 32, 2, false, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x16x2, 32, 3, false, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x8, 16, 2, false, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
Expand Down Expand Up @@ -1259,6 +1262,7 @@ HARDWARE_INTRINSIC(AVX10v1_V512, InsertVector128,
HARDWARE_INTRINSIC(AVX10v1_V512, InsertVector256, 64, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x8, INS_vinserti32x8, INS_invalid, INS_invalid, INS_vinsertf32x8, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1_V512, LeadingZeroCount, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1_V512, MultiplyLow, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1_V512, MultiShift, 64, 2, false, {INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1_V512, Or, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1_V512, PermuteVar64x8, 64, 2, false, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1_V512, PermuteVar64x8x2, 64, 3, false, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible)
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -877,6 +877,7 @@ INST3(vpmullq, "pmullq", IUM_WR, BAD_CODE, BAD_
INST3(vpermb, "permb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x8D), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute Packed Byte Elements
INST3(vpermi2b, "permi2b", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x75), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute of Bytes from Two Tables Overwriting the Index
INST3(vpermt2b, "permt2b", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7D), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute of Bytes from Two Tables Overwriting one Table
INST3(vpmultishiftqb, "pmultishiftqb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x83), INS_TT_FULL_MEM, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute of Bytes from Two Tables Overwriting one Table

INST3(LAST_AVX512_INSTRUCTION, "LAST_AVX512_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1876,6 +1876,28 @@ internal Avx10v1() { }
/// </summary>
public static Vector128<float> MultiplySubtractScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c) { throw new PlatformNotSupportedException(); }

/// <summary>
/// __m128i _mm_multishift_epi64_epi8(__m128i a, __m128i b)
/// VPMULTISHIFTQB xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
/// </summary>
public static Vector128<byte> MultiShift(Vector128<byte> control, Vector128<ulong> value) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m128i _mm_multishift_epi64_epi8(__m128i a, __m128i b)
/// VPMULTISHIFTQB xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
/// </summary>
public static Vector128<sbyte> MultiShift(Vector128<sbyte> control, Vector128<long> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// __m256i _mm256_multishift_epi64_epi8(__m256i a, __m256i b)
/// VPMULTISHIFTQB ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
/// </summary>
public static Vector256<byte> MultiShift(Vector256<byte> control, Vector256<ulong> value) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m256i _mm256_multishift_epi64_epi8(__m256i a, __m256i b)
/// VPMULTISHIFTQB ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
/// </summary>
public static Vector256<sbyte> MultiShift(Vector256<sbyte> control, Vector256<long> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// __m256i _mm256_permutevar16x16_epi16 (__m256i a, __m256i b)
/// VPERMW ymm1 {k1}{z}, ymm2, ymm3/m256
Expand Down Expand Up @@ -3299,6 +3321,17 @@ internal V512() { }
/// </summary>
public static Vector512<ulong> MultiplyLow(Vector512<ulong> left, Vector512<ulong> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// __m512i _mm512_multishift_epi64_epi8( __m512i a, __m512i b)
/// VPMULTISHIFTQB zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst
/// </summary>
public static Vector512<byte> MultiShift(Vector512<byte> control, Vector512<ulong> value) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m512i _mm512_multishift_epi64_epi8( __m512i a, __m512i b)
/// VPMULTISHIFTQB zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst
/// </summary>
public static Vector512<sbyte> MultiShift(Vector512<sbyte> control, Vector512<long> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// __m512 _mm512_or_ps (__m512 a, __m512 b)
/// VORPS zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1875,6 +1875,28 @@ internal Avx10v1() { }
/// </summary>
public static Vector128<float> MultiplySubtractScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c) => MultiplySubtractScalar(a, b, c);

/// <summary>
/// __m128i _mm_multishift_epi64_epi8(__m128i a, __m128i b)
/// VPMULTISHIFTQB xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
/// </summary>
public static Vector128<byte> MultiShift(Vector128<byte> control, Vector128<ulong> value) => MultiShift(control, value);
/// <summary>
/// __m128i _mm_multishift_epi64_epi8(__m128i a, __m128i b)
/// VPMULTISHIFTQB xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
/// </summary>
public static Vector128<sbyte> MultiShift(Vector128<sbyte> control, Vector128<long> value) => MultiShift(control, value);

/// <summary>
/// __m256i _mm256_multishift_epi64_epi8(__m256i a, __m256i b)
/// VPMULTISHIFTQB ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
/// </summary>
public static Vector256<byte> MultiShift(Vector256<byte> control, Vector256<ulong> value) => MultiShift(control, value);
/// <summary>
/// __m256i _mm256_multishift_epi64_epi8(__m256i a, __m256i b)
/// VPMULTISHIFTQB ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
/// </summary>
public static Vector256<sbyte> MultiShift(Vector256<sbyte> control, Vector256<long> value) => MultiShift(control, value);

/// <summary>
/// __m256i _mm256_permutevar16x16_epi16 (__m256i a, __m256i b)
/// VPERMW ymm1 {k1}{z}, ymm2, ymm3/m256
Expand Down Expand Up @@ -3289,6 +3311,17 @@ internal V512() { }
/// </summary>
public static Vector512<ulong> MultiplyLow(Vector512<ulong> left, Vector512<ulong> right) => MultiplyLow(left, right);

/// <summary>
/// __m512i _mm512_multishift_epi64_epi8( __m512i a, __m512i b)
/// VPMULTISHIFTQB zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst
/// </summary>
public static Vector512<byte> MultiShift(Vector512<byte> control, Vector512<ulong> value) => MultiShift(control, value);
/// <summary>
/// __m512i _mm512_multishift_epi64_epi8( __m512i a, __m512i b)
/// VPMULTISHIFTQB zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst
/// </summary>
public static Vector512<sbyte> MultiShift(Vector512<sbyte> control, Vector512<long> value) => MultiShift(control, value);

/// <summary>
/// __m512 _mm512_or_ps (__m512 a, __m512 b)
/// VORPS zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst
Expand Down
Loading
Loading