Skip to content

Commit

Permalink
[SLP]Do not create new vector node if scalars fully overlap with the …
Browse files Browse the repository at this point in the history
…existing one

If the list of scalars vectorized as the part of the same vector node,
no need to generate vector node again, it will be handled as part of
overlapping matching.

Fixes llvm#113810
  • Loading branch information
alexey-bataev authored and NoumanAmir657 committed Nov 4, 2024
1 parent 21a3df7 commit bd52e4f
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 2 deletions.
9 changes: 7 additions & 2 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7947,8 +7947,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Nodes.insert(E);
SmallPtrSet<Value *, 8> Values(VL.begin(), VL.end());
if (any_of(Nodes, [&](const TreeEntry *E) {
return all_of(E->Scalars,
[&](Value *V) { return Values.contains(V); });
if (all_of(E->Scalars,
[&](Value *V) { return Values.contains(V); }))
return true;
SmallPtrSet<Value *, 8> EValues(E->Scalars.begin(),
E->Scalars.end());
return (
all_of(VL, [&](Value *V) { return EValues.contains(V); }));
})) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
if (TryToFindDuplicates(S))
Expand Down
93 changes: 93 additions & 0 deletions llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s

define void @test(ptr %p1, ptr %0, i32 %1, i1 %c1, ptr %p2) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr [[P1:%.*]], ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], i1 [[C1:%.*]], ptr [[P2:%.*]]) {
; CHECK-NEXT: [[TOP:.*:]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <4 x ptr> [[TMP4]], <4 x i64> <i64 8, i64 12, i64 16, i64 20>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP5]], i32 2
; CHECK-NEXT: br i1 [[C1]], label %[[L42:.*]], label %[[L41:.*]]
; CHECK: [[L41]]:
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x ptr> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> zeroinitializer, <4 x i32> [[TMP8]]
; CHECK-NEXT: br label %[[L112:.*]]
; CHECK: [[L42]]:
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[DOTNOT280:%.*]] = icmp eq i32 [[TMP10]], 0
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP1]], i32 2
; CHECK-NEXT: br i1 [[DOTNOT280]], label %[[L112]], label %[[L47:.*]]
; CHECK: [[L47]]:
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[TMP5]], i32 1
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x ptr> [[TMP14]], zeroinitializer
; CHECK-NEXT: [[TMP16:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> zeroinitializer, <2 x i32> [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>, i32 [[TMP13]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP18]], <2 x i32> [[TMP17]], i64 2)
; CHECK-NEXT: br label %[[L112]]
; CHECK: [[L112]]:
; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ [[TMP19]], %[[L47]] ], [ [[TMP9]], %[[L41]] ], [ [[TMP11]], %[[L42]] ]
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP20]], i32 0
; CHECK-NEXT: store i32 [[TMP21]], ptr [[P2]], align 4
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP20]], i32 1
; CHECK-NEXT: store i32 [[TMP22]], ptr [[P1]], align 4
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP20]], i32 2
; CHECK-NEXT: store i32 [[TMP23]], ptr [[P2]], align 4
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP20]], i32 3
; CHECK-NEXT: store i32 [[TMP24]], ptr [[P1]], align 4
; CHECK-NEXT: ret void
;
top:
%2 = getelementptr i8, ptr %0, i64 8
%3 = getelementptr i8, ptr %0, i64 12
%4 = getelementptr i8, ptr %0, i64 16
%5 = getelementptr i8, ptr %0, i64 20
br i1 %c1, label %L42, label %L41

L41:
%.not276 = icmp eq ptr %2, null
%6 = load i32, ptr %2, align 4
%7 = select i1 %.not276, i32 0, i32 %6
%.not277 = icmp eq ptr %3, null
%8 = load i32, ptr %3, align 4
%9 = select i1 %.not277, i32 0, i32 %8
%.not278 = icmp eq ptr %4, null
%10 = load i32, ptr %4, align 4
%11 = select i1 %.not278, i32 0, i32 %10
%.not279 = icmp eq ptr %5, null
%12 = load i32, ptr %5, align 4
%13 = select i1 %.not279, i32 0, i32 %12
br label %L112

L42:
%14 = load i32, ptr %2, align 4
%.not280 = icmp eq i32 %14, 0
br i1 %.not280, label %L112, label %L47

L47:
%15 = load i32, ptr %3, align 4
%.not282 = icmp eq ptr %4, null
%16 = load i32, ptr %4, align 4
%17 = select i1 %.not282, i32 0, i32 %16
%.not283 = icmp eq ptr %5, null
%18 = load i32, ptr %5, align 4
%19 = select i1 %.not283, i32 0, i32 %18
br label %L112

L112:
%value_phi13336 = phi i32 [ %19, %L47 ], [ %13, %L41 ], [ 0, %L42 ]
%value_phi12335 = phi i32 [ %17, %L47 ], [ %11, %L41 ], [ %1, %L42 ]
%value_phi11334 = phi i32 [ %15, %L47 ], [ %9, %L41 ], [ 0, %L42 ]
%value_phi10333 = phi i32 [ 0, %L47 ], [ %7, %L41 ], [ 0, %L42 ]
store i32 %value_phi10333, ptr %p2, align 4
store i32 %value_phi11334, ptr %p1, align 4
store i32 %value_phi12335, ptr %p2, align 4
store i32 %value_phi13336, ptr %p1, align 4
ret void
}

0 comments on commit bd52e4f

Please sign in to comment.