Skip to content

Commit

Permalink
[InstCombine] Only fold extract element to trunc if vector hasOneUse (
Browse files Browse the repository at this point in the history
#115627)

This fixes a missed optimization caused by the `foldBitcastExtElt`
pattern interfering with other combine patterns. In the case I was
hitting, we have IR that combines two vectors into a new larger vector
by extracting elements and inserting them into the new vector.

```llvm
define <4 x half> @bitcast_extract_insert_to_shuffle(i32 %a, i32 %b) {
  %avec = bitcast i32 %a to <2 x half>
  %a0 = extractelement <2 x half> %avec, i32 0
  %a1 = extractelement <2 x half> %avec, i32 1
  %bvec = bitcast i32 %b to <2 x half>
  %b0 = extractelement <2 x half> %bvec, i32 0
  %b1 = extractelement <2 x half> %bvec, i32 1
  %ins0 = insertelement <4 x half> undef, half %a0, i32 0
  %ins1 = insertelement <4 x half> %ins0, half %a1, i32 1
  %ins2 = insertelement <4 x half> %ins1, half %b0, i32 2
  %ins3 = insertelement <4 x half> %ins2, half %b1, i32 3
  ret <4 x half> %ins3
}
```

With the current behavior, `InstCombine` converts each vector extract
sequence to

```llvm
  %tmp = trunc i32 %a to i16
  %a0 = bitcast i16 %tmp to half
  %a1 = extractelement <2 x half> %avec, i32 1
```

where the extraction of `%a0` is now done by truncating the original
integer. While on it's own this is fairly reasonable, in this case it
also blocks the pattern which converts `extractelement` -
`insertelement` into shuffles which gives the overall simpler result:

```llvm
define <4 x half> @bitcast_extract_insert_to_shuffle(i32 %a, i32 %b) {
  %avec = bitcast i32 %a to <2 x half>
  %bvec = bitcast i32 %b to <2 x half>
  %ins3 = shufflevector <2 x half> %avec, <2 x half> %bvec, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  ret <4 x half> %ins3
}
```

In this PR I fix the conflict by obeying the `hasOneUse` check even if
there is no shift instruction required. In these cases we can't remove
the vector completely, so the pattern has less benefit anyway.

Also fwiw, I think dropping the `hasOneUse` check for the 0th element
might have been a mistake in the first place. Looking at
535c5d5
the commit message only mentions loosening the `isDesirableIntType`
requirement and doesn't mention changing the `hasOneUse` check at all.
  • Loading branch information
peterbell10 authored Nov 20, 2024
1 parent e468653 commit a3f2e01
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 15 deletions.
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,9 @@ Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
if (IsBigEndian)
ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
unsigned ShiftAmountC = ExtIndexC * DestWidth;
if (!ShiftAmountC ||
(isDesirableIntType(X->getType()->getPrimitiveSizeInBits()) &&
Ext.getVectorOperand()->hasOneUse())) {
if ((!ShiftAmountC ||
isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) &&
Ext.getVectorOperand()->hasOneUse()) {
if (ShiftAmountC)
X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
if (DestTy->isFloatingPointTy()) {
Expand Down
18 changes: 6 additions & 12 deletions llvm/test/Transforms/InstCombine/extractelement.ll
Original file line number Diff line number Diff line change
Expand Up @@ -722,20 +722,14 @@ define i8 @bitcast_scalar_index_variable(i32 %x, i64 %y) {
ret i8 %r
}

; extra use is ok if we don't need a shift
; extra use is not ok, even if we don't need a shift

define i8 @bitcast_scalar_index0_use(i64 %x) {
; ANYLE-LABEL: @bitcast_scalar_index0_use(
; ANYLE-NEXT: [[V:%.*]] = bitcast i64 [[X:%.*]] to <8 x i8>
; ANYLE-NEXT: call void @use(<8 x i8> [[V]])
; ANYLE-NEXT: [[R:%.*]] = trunc i64 [[X]] to i8
; ANYLE-NEXT: ret i8 [[R]]
;
; ANYBE-LABEL: @bitcast_scalar_index0_use(
; ANYBE-NEXT: [[V:%.*]] = bitcast i64 [[X:%.*]] to <8 x i8>
; ANYBE-NEXT: call void @use(<8 x i8> [[V]])
; ANYBE-NEXT: [[R:%.*]] = extractelement <8 x i8> [[V]], i64 0
; ANYBE-NEXT: ret i8 [[R]]
; ANY-LABEL: @bitcast_scalar_index0_use(
; ANY-NEXT: [[V:%.*]] = bitcast i64 [[X:%.*]] to <8 x i8>
; ANY-NEXT: call void @use(<8 x i8> [[V]])
; ANY-NEXT: [[R:%.*]] = extractelement <8 x i8> [[V]], i64 0
; ANY-NEXT: ret i8 [[R]]
;

%v = bitcast i64 %x to <8 x i8>
Expand Down
22 changes: 22 additions & 0 deletions llvm/test/Transforms/InstCombine/vector_insertelt_shuffle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,26 @@ define <4 x float> @bazzzzzz(<4 x float> %x, i32 %a) {
ret <4 x float> %ins1
}

; test that foldBitcastExtElt doesn't interfere with shuffle folding

define <4 x half> @bitcast_extract_insert_to_shuffle(i32 %a, i32 %b) {
; CHECK-LABEL: @bitcast_extract_insert_to_shuffle(
; CHECK-NEXT: [[AVEC:%.*]] = bitcast i32 [[A:%.*]] to <2 x half>
; CHECK-NEXT: [[BVEC:%.*]] = bitcast i32 [[B:%.*]] to <2 x half>
; CHECK-NEXT: [[INS3:%.*]] = shufflevector <2 x half> [[AVEC]], <2 x half> [[BVEC]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <4 x half> [[INS3]]
;
%avec = bitcast i32 %a to <2 x half>
%a0 = extractelement <2 x half> %avec, i32 0
%a1 = extractelement <2 x half> %avec, i32 1
%bvec = bitcast i32 %b to <2 x half>
%b0 = extractelement <2 x half> %bvec, i32 0
%b1 = extractelement <2 x half> %bvec, i32 1
%ins0 = insertelement <4 x half> undef, half %a0, i32 0
%ins1 = insertelement <4 x half> %ins0, half %a1, i32 1
%ins2 = insertelement <4 x half> %ins1, half %b0, i32 2
%ins3 = insertelement <4 x half> %ins2, half %b1, i32 3
ret <4 x half> %ins3
}


0 comments on commit a3f2e01

Please sign in to comment.