Skip to content

Commit

Permalink
Check for extends coming from outside of the plan
Browse files Browse the repository at this point in the history
  • Loading branch information
SamTebbs33 committed Jan 6, 2025
1 parent 0ff3e60 commit 356ca97
Show file tree
Hide file tree
Showing 2 changed files with 141 additions and 0 deletions.
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,9 @@ VPPartialReductionRecipe::computeCost(ElementCount VF,
VPRecipeBase *ExtBR = BinOpR->getOperand(1)->getDefiningRecipe();

auto GetExtendKind = [](VPRecipeBase *R) {
// The extend could come from outside the plan.
if (!R)
return TargetTransformInfo::PR_None;
auto *WidenCastR = dyn_cast<VPWidenCastRecipe>(R);
if (!WidenCastR)
return TargetTransformInfo::PR_None;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1881,6 +1881,144 @@ exit: ; preds = %for.exit, %entry
ret void
}

define i64 @not_dotp_ext_outside_plan(ptr %a, i16 %b, i64 %n) #0 {
; CHECK-INTERLEAVE1-LABEL: define i64 @not_dotp_ext_outside_plan(
; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], i16 [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-INTERLEAVE1-NEXT: entry:
; CHECK-INTERLEAVE1-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 0
; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_PH:%.*]]
; CHECK-INTERLEAVE1: for.ph:
; CHECK-INTERLEAVE1-NEXT: [[EXT_B:%.*]] = zext i16 [[B]] to i64
; CHECK-INTERLEAVE1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[EXT_B]], i64 0
; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-INTERLEAVE1: vector.body:
; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[TMP0]]
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i32 0
; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP2]], align 2
; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[WIDE_LOAD]] to <8 x i64>
; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = mul nuw nsw <8 x i64> [[TMP3]], [[BROADCAST_SPLAT]]
; CHECK-INTERLEAVE1-NEXT: [[TMP5]] = add <8 x i64> [[TMP4]], [[VEC_PHI]]
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-INTERLEAVE1: middle.block:
; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]])
; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
;
; CHECK-INTERLEAVED-LABEL: define i64 @not_dotp_ext_outside_plan(
; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], i16 [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-INTERLEAVED-NEXT: entry:
; CHECK-INTERLEAVED-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 0
; CHECK-INTERLEAVED-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_PH:%.*]]
; CHECK-INTERLEAVED: for.ph:
; CHECK-INTERLEAVED-NEXT: [[EXT_B:%.*]] = zext i16 [[B]] to i64
; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[EXT_B]], i64 0
; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-INTERLEAVED: vector.body:
; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[TMP0]]
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i32 0
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i32 8
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP2]], align 2
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <8 x i16> [[WIDE_LOAD]] to <8 x i64>
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <8 x i16> [[WIDE_LOAD2]] to <8 x i64>
; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = mul nuw nsw <8 x i64> [[TMP4]], [[BROADCAST_SPLAT]]
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = mul nuw nsw <8 x i64> [[TMP5]], [[BROADCAST_SPLAT]]
; CHECK-INTERLEAVED-NEXT: [[TMP8]] = add <8 x i64> [[TMP6]], [[VEC_PHI]]
; CHECK-INTERLEAVED-NEXT: [[TMP9]] = add <8 x i64> [[TMP7]], [[VEC_PHI1]]
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-INTERLEAVED: middle.block:
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <8 x i64> [[TMP9]], [[TMP8]]
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[BIN_RDX]])
; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
;
; CHECK-MAXBW-LABEL: define i64 @not_dotp_ext_outside_plan(
; CHECK-MAXBW-SAME: ptr [[A:%.*]], i16 [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-MAXBW-NEXT: entry:
; CHECK-MAXBW-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 0
; CHECK-MAXBW-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_PH:%.*]]
; CHECK-MAXBW: for.ph:
; CHECK-MAXBW-NEXT: [[EXT_B:%.*]] = zext i16 [[B]] to i64
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
; CHECK-MAXBW-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[EXT_B]], i64 0
; CHECK-MAXBW-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-MAXBW: vector.body:
; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[TMP6]]
; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP7]], i32 0
; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i16>, ptr [[TMP8]], align 2
; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i16> [[WIDE_LOAD]] to <vscale x 4 x i64>
; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = mul nuw nsw <vscale x 4 x i64> [[TMP9]], [[BROADCAST_SPLAT]]
; CHECK-MAXBW-NEXT: [[TMP11]] = add <vscale x 4 x i64> [[TMP10]], [[VEC_PHI]]
; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-MAXBW-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-MAXBW: middle.block:
; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> [[TMP11]])
; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
;
entry:
%cmp = icmp eq i64 %n, 0
br i1 %cmp, label %exit, label %for.ph

for.ph: ; preds = %entry
%ext.b = zext i16 %b to i64
br label %for.body

for.body: ; preds = %for.body.lr.ph, %for.body
%iv = phi i64 [ 0, %for.ph ], [ %iv.next, %for.body ]
%accum = phi i64 [ 0, %for.ph ], [ %add, %for.body ]
%gep.a = getelementptr inbounds nuw i16, ptr %a, i64 %iv
%load.a = load i16, ptr %gep.a, align 2
%ext.a = zext i16 %load.a to i64
%mul = mul nuw nsw i64 %ext.a, %ext.b
%add = add i64 %mul, %accum
%iv.next = add nuw nsw i64 %iv, 1
%cmp.1 = icmp eq i64 %iv.next, %n
br i1 %cmp.1, label %exit, label %for.body

exit: ; preds = %for.cond.cleanup.loopexit, %entry
%result = phi i64 [ 0, %entry ], [ %add, %for.body ]
ret i64 %result
}

!7 = distinct !{!7, !8, !9, !10}
!8 = !{!"llvm.loop.mustprogress"}
!9 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
Expand Down

0 comments on commit 356ca97

Please sign in to comment.