Skip to content

Commit

Permalink
[InstCombine][X86] Only demand used bits for VPERMILPD/VPERMILPS mask…
Browse files Browse the repository at this point in the history
… values

VPERMILPS lower bits0-3 (to index per-lane i32/f32 0-3)
VPERMILPD uses bit1  (to index per-lane i64/f64 0-1)

Use SimplifyDemandedBits to ignore anything touching the remaining bits.

Part of llvm#106413
  • Loading branch information
RKSimon committed Aug 29, 2024
1 parent 25c9410 commit d57c046
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 26 deletions.
19 changes: 17 additions & 2 deletions llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2963,14 +2963,29 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {

case Intrinsic::x86_avx_vpermilvar_ps:
case Intrinsic::x86_avx_vpermilvar_ps_256:
case Intrinsic::x86_avx512_vpermilvar_ps_512:
case Intrinsic::x86_avx512_vpermilvar_ps_512: {
if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}

KnownBits KnownMask(32);
if (IC.SimplifyDemandedBits(&II, 1, APInt(32, 0b00011), KnownMask))
return ⅈ
break;
}

case Intrinsic::x86_avx_vpermilvar_pd:
case Intrinsic::x86_avx_vpermilvar_pd_256:
case Intrinsic::x86_avx512_vpermilvar_pd_512:
case Intrinsic::x86_avx512_vpermilvar_pd_512: {
if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}

KnownBits KnownMask(64);
if (IC.SimplifyDemandedBits(&II, 1, APInt(64, 0b00010), KnownMask))
return ⅈ
break;
}

case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
Expand Down
18 changes: 6 additions & 12 deletions llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,7 @@ define <8 x double> @poison_test_vpermilvar_pd_512(<8 x double> %v) {

define <4 x float> @bits_test_vpermilvar_ps(<4 x float> %InVec, <4 x i32> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_ps(
; CHECK-NEXT: [[M:%.*]] = or <4 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4>
; CHECK-NEXT: [[S:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[INVEC:%.*]], <4 x i32> [[M]])
; CHECK-NEXT: [[S:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[INVEC:%.*]], <4 x i32> [[INMASK:%.*]])
; CHECK-NEXT: ret <4 x float> [[S]]
;
%m = or <4 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4>
Expand All @@ -236,8 +235,7 @@ define <4 x float> @bits_test_vpermilvar_ps(<4 x float> %InVec, <4 x i32> %InMas

define <8 x float> @bits_test_vpermilvar_ps_256(<8 x float> %InVec, <8 x i32> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_ps_256(
; CHECK-NEXT: [[M:%.*]] = or <8 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4>
; CHECK-NEXT: [[S:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[INVEC:%.*]], <8 x i32> [[M]])
; CHECK-NEXT: [[S:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[INVEC:%.*]], <8 x i32> [[INMASK:%.*]])
; CHECK-NEXT: ret <8 x float> [[S]]
;
%m = or <8 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4>
Expand All @@ -247,8 +245,7 @@ define <8 x float> @bits_test_vpermilvar_ps_256(<8 x float> %InVec, <8 x i32> %I

define <16 x float> @bits_test_vpermilvar_ps_512(<16 x float> %InVec, <16 x i32> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_ps_512(
; CHECK-NEXT: [[M:%.*]] = or <16 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4>
; CHECK-NEXT: [[S:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[INVEC:%.*]], <16 x i32> [[M]])
; CHECK-NEXT: [[S:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[INVEC:%.*]], <16 x i32> [[INMASK:%.*]])
; CHECK-NEXT: ret <16 x float> [[S]]
;
%m = or <16 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4>
Expand All @@ -258,8 +255,7 @@ define <16 x float> @bits_test_vpermilvar_ps_512(<16 x float> %InVec, <16 x i32>

define <2 x double> @bits_test_vpermilvar_pd(<2 x double> %InVec, <2 x i64> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_pd(
; CHECK-NEXT: [[M:%.*]] = or <2 x i64> [[INMASK:%.*]], <i64 0, i64 4294967293>
; CHECK-NEXT: [[S:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[INVEC:%.*]], <2 x i64> [[M]])
; CHECK-NEXT: [[S:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[INVEC:%.*]], <2 x i64> [[INMASK:%.*]])
; CHECK-NEXT: ret <2 x double> [[S]]
;
%m = or <2 x i64> %InMask, <i64 0, i64 4294967293>
Expand All @@ -269,8 +265,7 @@ define <2 x double> @bits_test_vpermilvar_pd(<2 x double> %InVec, <2 x i64> %InM

define <4 x double> @bits_test_vpermilvar_pd_256(<4 x double> %InVec, <4 x i64> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_pd_256(
; CHECK-NEXT: [[M:%.*]] = or <4 x i64> [[INMASK:%.*]], <i64 0, i64 1, i64 4294967293, i64 -3>
; CHECK-NEXT: [[S:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[INVEC:%.*]], <4 x i64> [[M]])
; CHECK-NEXT: [[S:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[INVEC:%.*]], <4 x i64> [[INMASK:%.*]])
; CHECK-NEXT: ret <4 x double> [[S]]
;
%m = or <4 x i64> %InMask, <i64 0, i64 1, i64 4294967293, i64 -3>
Expand All @@ -280,8 +275,7 @@ define <4 x double> @bits_test_vpermilvar_pd_256(<4 x double> %InVec, <4 x i64>

define <8 x double> @bits_test_vpermilvar_pd_512(<8 x double> %InVec, <8 x i64> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_pd_512(
; CHECK-NEXT: [[M:%.*]] = or <8 x i64> [[INMASK:%.*]], <i64 0, i64 1, i64 4294967293, i64 -3, i64 0, i64 1, i64 4294967293, i64 -3>
; CHECK-NEXT: [[S:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[INVEC:%.*]], <8 x i64> [[M]])
; CHECK-NEXT: [[S:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[INVEC:%.*]], <8 x i64> [[INMASK:%.*]])
; CHECK-NEXT: ret <8 x double> [[S]]
;
%m = or <8 x i64> %InMask, <i64 0, i64 1, i64 4294967293, i64 -3, i64 0, i64 1, i64 4294967293, i64 -3>
Expand Down
18 changes: 6 additions & 12 deletions llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,7 @@ define <8 x double> @undef_test_vpermilvar_pd_512(<8 x double> %v) {

define <4 x float> @bits_test_vpermilvar_ps(<4 x float> %InVec, <4 x i32> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_ps(
; CHECK-NEXT: [[M:%.*]] = or <4 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4>
; CHECK-NEXT: [[S:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[INVEC:%.*]], <4 x i32> [[M]])
; CHECK-NEXT: [[S:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[INVEC:%.*]], <4 x i32> [[INMASK:%.*]])
; CHECK-NEXT: ret <4 x float> [[S]]
;
%m = or <4 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4>
Expand All @@ -236,8 +235,7 @@ define <4 x float> @bits_test_vpermilvar_ps(<4 x float> %InVec, <4 x i32> %InMas

define <8 x float> @bits_test_vpermilvar_ps_256(<8 x float> %InVec, <8 x i32> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_ps_256(
; CHECK-NEXT: [[M:%.*]] = or <8 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4>
; CHECK-NEXT: [[S:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[INVEC:%.*]], <8 x i32> [[M]])
; CHECK-NEXT: [[S:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[INVEC:%.*]], <8 x i32> [[INMASK:%.*]])
; CHECK-NEXT: ret <8 x float> [[S]]
;
%m = or <8 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4>
Expand All @@ -247,8 +245,7 @@ define <8 x float> @bits_test_vpermilvar_ps_256(<8 x float> %InVec, <8 x i32> %I

define <16 x float> @bits_test_vpermilvar_ps_512(<16 x float> %InVec, <16 x i32> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_ps_512(
; CHECK-NEXT: [[M:%.*]] = or <16 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4>
; CHECK-NEXT: [[S:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[INVEC:%.*]], <16 x i32> [[M]])
; CHECK-NEXT: [[S:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[INVEC:%.*]], <16 x i32> [[INMASK:%.*]])
; CHECK-NEXT: ret <16 x float> [[S]]
;
%m = or <16 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4>
Expand All @@ -258,8 +255,7 @@ define <16 x float> @bits_test_vpermilvar_ps_512(<16 x float> %InVec, <16 x i32>

define <2 x double> @bits_test_vpermilvar_pd(<2 x double> %InVec, <2 x i64> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_pd(
; CHECK-NEXT: [[M:%.*]] = or <2 x i64> [[INMASK:%.*]], <i64 0, i64 4294967293>
; CHECK-NEXT: [[S:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[INVEC:%.*]], <2 x i64> [[M]])
; CHECK-NEXT: [[S:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[INVEC:%.*]], <2 x i64> [[INMASK:%.*]])
; CHECK-NEXT: ret <2 x double> [[S]]
;
%m = or <2 x i64> %InMask, <i64 0, i64 4294967293>
Expand All @@ -269,8 +265,7 @@ define <2 x double> @bits_test_vpermilvar_pd(<2 x double> %InVec, <2 x i64> %InM

define <4 x double> @bits_test_vpermilvar_pd_256(<4 x double> %InVec, <4 x i64> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_pd_256(
; CHECK-NEXT: [[M:%.*]] = or <4 x i64> [[INMASK:%.*]], <i64 0, i64 1, i64 4294967293, i64 -3>
; CHECK-NEXT: [[S:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[INVEC:%.*]], <4 x i64> [[M]])
; CHECK-NEXT: [[S:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[INVEC:%.*]], <4 x i64> [[INMASK:%.*]])
; CHECK-NEXT: ret <4 x double> [[S]]
;
%m = or <4 x i64> %InMask, <i64 0, i64 1, i64 4294967293, i64 -3>
Expand All @@ -280,8 +275,7 @@ define <4 x double> @bits_test_vpermilvar_pd_256(<4 x double> %InVec, <4 x i64>

define <8 x double> @bits_test_vpermilvar_pd_512(<8 x double> %InVec, <8 x i64> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_pd_512(
; CHECK-NEXT: [[M:%.*]] = or <8 x i64> [[INMASK:%.*]], <i64 0, i64 1, i64 4294967293, i64 -3, i64 0, i64 1, i64 4294967293, i64 -3>
; CHECK-NEXT: [[S:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[INVEC:%.*]], <8 x i64> [[M]])
; CHECK-NEXT: [[S:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[INVEC:%.*]], <8 x i64> [[INMASK:%.*]])
; CHECK-NEXT: ret <8 x double> [[S]]
;
%m = or <8 x i64> %InMask, <i64 0, i64 1, i64 4294967293, i64 -3, i64 0, i64 1, i64 4294967293, i64 -3>
Expand Down

0 comments on commit d57c046

Please sign in to comment.