forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SLP]Do not create new vector node if scalars fully overlap with the …
…existing one If the list of scalars vectorized as the part of the same vector node, no need to generate vector node again, it will be handled as part of overlapping matching. Fixes llvm#113810
- Loading branch information
1 parent
21a3df7
commit bd52e4f
Showing
2 changed files
with
100 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
93 changes: 93 additions & 0 deletions
93
llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s | ||
|
||
define void @test(ptr %p1, ptr %0, i32 %1, i1 %c1, ptr %p2) { | ||
; CHECK-LABEL: define void @test( | ||
; CHECK-SAME: ptr [[P1:%.*]], ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], i1 [[C1:%.*]], ptr [[P2:%.*]]) { | ||
; CHECK-NEXT: [[TOP:.*:]] | ||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 | ||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0 | ||
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer | ||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <4 x ptr> [[TMP4]], <4 x i64> <i64 8, i64 12, i64 16, i64 20> | ||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP5]], i32 2 | ||
; CHECK-NEXT: br i1 [[C1]], label %[[L42:.*]], label %[[L41:.*]] | ||
; CHECK: [[L41]]: | ||
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x ptr> [[TMP5]], zeroinitializer | ||
; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 | ||
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> zeroinitializer, <4 x i32> [[TMP8]] | ||
; CHECK-NEXT: br label %[[L112:.*]] | ||
; CHECK: [[L42]]: | ||
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 | ||
; CHECK-NEXT: [[DOTNOT280:%.*]] = icmp eq i32 [[TMP10]], 0 | ||
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP1]], i32 2 | ||
; CHECK-NEXT: br i1 [[DOTNOT280]], label %[[L112]], label %[[L47:.*]] | ||
; CHECK: [[L47]]: | ||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[TMP5]], i32 1 | ||
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 | ||
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> poison, <2 x i32> <i32 2, i32 3> | ||
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x ptr> [[TMP14]], zeroinitializer | ||
; CHECK-NEXT: [[TMP16:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4 | ||
; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> zeroinitializer, <2 x i32> [[TMP16]] | ||
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>, i32 [[TMP13]], i32 1 | ||
; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP18]], <2 x i32> [[TMP17]], i64 2) | ||
; CHECK-NEXT: br label %[[L112]] | ||
; CHECK: [[L112]]: | ||
; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ [[TMP19]], %[[L47]] ], [ [[TMP9]], %[[L41]] ], [ [[TMP11]], %[[L42]] ] | ||
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP20]], i32 0 | ||
; CHECK-NEXT: store i32 [[TMP21]], ptr [[P2]], align 4 | ||
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP20]], i32 1 | ||
; CHECK-NEXT: store i32 [[TMP22]], ptr [[P1]], align 4 | ||
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP20]], i32 2 | ||
; CHECK-NEXT: store i32 [[TMP23]], ptr [[P2]], align 4 | ||
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP20]], i32 3 | ||
; CHECK-NEXT: store i32 [[TMP24]], ptr [[P1]], align 4 | ||
; CHECK-NEXT: ret void | ||
; | ||
top: | ||
%2 = getelementptr i8, ptr %0, i64 8 | ||
%3 = getelementptr i8, ptr %0, i64 12 | ||
%4 = getelementptr i8, ptr %0, i64 16 | ||
%5 = getelementptr i8, ptr %0, i64 20 | ||
br i1 %c1, label %L42, label %L41 | ||
|
||
L41: | ||
%.not276 = icmp eq ptr %2, null | ||
%6 = load i32, ptr %2, align 4 | ||
%7 = select i1 %.not276, i32 0, i32 %6 | ||
%.not277 = icmp eq ptr %3, null | ||
%8 = load i32, ptr %3, align 4 | ||
%9 = select i1 %.not277, i32 0, i32 %8 | ||
%.not278 = icmp eq ptr %4, null | ||
%10 = load i32, ptr %4, align 4 | ||
%11 = select i1 %.not278, i32 0, i32 %10 | ||
%.not279 = icmp eq ptr %5, null | ||
%12 = load i32, ptr %5, align 4 | ||
%13 = select i1 %.not279, i32 0, i32 %12 | ||
br label %L112 | ||
|
||
L42: | ||
%14 = load i32, ptr %2, align 4 | ||
%.not280 = icmp eq i32 %14, 0 | ||
br i1 %.not280, label %L112, label %L47 | ||
|
||
L47: | ||
%15 = load i32, ptr %3, align 4 | ||
%.not282 = icmp eq ptr %4, null | ||
%16 = load i32, ptr %4, align 4 | ||
%17 = select i1 %.not282, i32 0, i32 %16 | ||
%.not283 = icmp eq ptr %5, null | ||
%18 = load i32, ptr %5, align 4 | ||
%19 = select i1 %.not283, i32 0, i32 %18 | ||
br label %L112 | ||
|
||
L112: | ||
%value_phi13336 = phi i32 [ %19, %L47 ], [ %13, %L41 ], [ 0, %L42 ] | ||
%value_phi12335 = phi i32 [ %17, %L47 ], [ %11, %L41 ], [ %1, %L42 ] | ||
%value_phi11334 = phi i32 [ %15, %L47 ], [ %9, %L41 ], [ 0, %L42 ] | ||
%value_phi10333 = phi i32 [ 0, %L47 ], [ %7, %L41 ], [ 0, %L42 ] | ||
store i32 %value_phi10333, ptr %p2, align 4 | ||
store i32 %value_phi11334, ptr %p1, align 4 | ||
store i32 %value_phi12335, ptr %p2, align 4 | ||
store i32 %value_phi13336, ptr %p1, align 4 | ||
ret void | ||
} |