From 0e8d022ffe008dd7afffa5140c4d87ce3d77902d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 18 Dec 2024 14:47:16 +0000 Subject: [PATCH] [VPlan] Handle exit phis with multiple operands in addUsersInExitBlocks. (#120260) Currently the addUsersInExitBlocks incorrectly assumes exit phis only have a single operand, which may not be the case for loops with early exits when they share a common exit block. Also further relax the assertion in fixupIVUsers to allow exit values if they come from theloop latch/middle.block. PR: https://github.com/llvm/llvm-project/pull/120260 --- .../Transforms/Vectorize/LoopVectorize.cpp | 77 ++++++++----------- .../LoopVectorize/early_exit_legality.ll | 4 +- .../single_early_exit_live_outs.ll | 44 +++++++++-- 3 files changed, 71 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index a6acc710a34c89..a8511483e00fbe 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2905,8 +2905,17 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, } } - assert((MissingVals.empty() || OrigLoop->getUniqueExitBlock()) && - "Expected a single exit block for escaping values"); + assert((MissingVals.empty() || + all_of(MissingVals, + [MiddleBlock, this](const std::pair &P) { + return all_of( + predecessors(cast(P.first)->getParent()), + [MiddleBlock, this](BasicBlock *Pred) { + return Pred == MiddleBlock || + Pred == OrigLoop->getLoopLatch(); + }); + })) && + "Expected escaping values from latch/middle.block only"); for (auto &I : MissingVals) { PHINode *PHI = cast(I.first); @@ -9049,22 +9058,23 @@ addUsersInExitBlocks(VPlan &Plan, // Introduce extract for exiting values and update the VPIRInstructions // modeling the corresponding LCSSA phis. for (VPIRInstruction *ExitIRI : ExitUsersToFix) { - VPValue *V = ExitIRI->getOperand(0); - // Pass live-in values used by exit phis directly through to their users in - // the exit block. - if (V->isLiveIn()) - continue; + for (const auto &[Idx, Op] : enumerate(ExitIRI->operands())) { + // Pass live-in values used by exit phis directly through to their users + // in the exit block. + if (Op->isLiveIn()) + continue; - // Currently only live-ins can be used by exit values from blocks not - // exiting via the vector latch through to the middle block. - if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB) - return false; + // Currently only live-ins can be used by exit values from blocks not + // exiting via the vector latch through to the middle block. + if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB) + return false; - LLVMContext &Ctx = ExitIRI->getInstruction().getContext(); - VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd, - {V, Plan.getOrAddLiveIn(ConstantInt::get( - IntegerType::get(Ctx, 32), 1))}); - ExitIRI->setOperand(0, Ext); + LLVMContext &Ctx = ExitIRI->getInstruction().getContext(); + VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd, + {Op, Plan.getOrAddLiveIn(ConstantInt::get( + IntegerType::get(Ctx, 32), 1))}); + ExitIRI->setOperand(Idx, Ext); + } } return true; } @@ -10226,36 +10236,11 @@ bool LoopVectorizePass::processLoop(Loop *L) { return false; } - if (LVL.hasUncountableEarlyExit()) { - if (!EnableEarlyExitVectorization) { - reportVectorizationFailure("Auto-vectorization of loops with uncountable " - "early exit is not enabled", - "UncountableEarlyExitLoopsDisabled", ORE, L); - return false; - } - - // In addUsersInExitBlocks we already bail out if there is an outside use - // of a loop-defined variable, but it ignores induction variables which are - // handled by InnerLoopVectorizer::fixupIVUsers. We need to bail out if we - // encounter induction variables too otherwise fixupIVUsers will crash. - BasicBlock *LoopLatch = L->getLoopLatch(); - for (const auto &Induction : LVL.getInductionVars()) { - PHINode *Ind = Induction.first; - Instruction *IndUpdate = - cast(Ind->getIncomingValueForBlock(LoopLatch)); - for (Instruction *I : {cast(Ind), IndUpdate}) { - for (User *U : I->users()) { - Instruction *UI = cast(U); - if (!L->contains(UI)) { - reportVectorizationFailure( - "Auto-vectorization of loops with uncountable early exits and " - "outside uses of induction variables unsupported", - "UncountableEarlyExitLoopIndLiveOutsUnsupported", ORE, L); - return false; - } - } - } - } + if (LVL.hasUncountableEarlyExit() && !EnableEarlyExitVectorization) { + reportVectorizationFailure("Auto-vectorization of loops with uncountable " + "early exit is not enabled", + "UncountableEarlyExitLoopsDisabled", ORE, L); + return false; } // Entrance to the VPlan-native vectorization path. Outer loops are processed diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll index ac78c40ec92c6c..8df0eaec6a8c9d 100644 --- a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll +++ b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll @@ -49,7 +49,7 @@ define i64 @same_exit_block_pre_inc_use1() { ; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1' ; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63 ; CHECK-NEXT: LV: We can vectorize this loop! -; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exits and outside uses of induction variables unsupported +; CHECK: LV: Not vectorizing: Some exit values in loop with uncountable exit not supported yet. entry: %p1 = alloca [1024 x i8] %p2 = alloca [1024 x i8] @@ -141,7 +141,7 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align( ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit' ; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63 ; CHECK-NEXT: LV: We can vectorize this loop! -; CHECK: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exits and outside uses of induction variables unsupported +; CHECK: LV: Not vectorizing: Some exit values in loop with uncountable exit not supported yet. entry: %p1 = alloca [1024 x i8] call void @init_mem(ptr %p1, i64 1024) diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll index 7f00e77b9169dd..085438aa80f246 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s +; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -force-vector-width=4 | FileCheck %s declare void @init_mem(ptr, i64); @@ -527,24 +527,50 @@ define i64 @diff_exit_block_pre_inc_use2() { ; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 ; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) ; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] ; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT]] ; CHECK: loop.inc: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: loop.early.exit: -; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ 67, [[LOOP]] ] +; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ 67, [[LOOP1]] ], [ 67, [[MIDDLE_SPLIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL1]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ] +; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RETVAL2]] ; entry: @@ -995,3 +1021,9 @@ declare i32 @foo(i32) readonly declare @foo_vec() attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" } +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +;.