Skip to content

Commit

Permalink
[LoopUnroll] Ignore inlinable calls if unrolling is forced
Browse files Browse the repository at this point in the history
`NumInlineCandidates` counts candidates that are _very likely_ to be
inlined. This is a useful metric, but causes linker warnings if:
- the loop to be unrolled has had unrolling forced by the user, and
- the inliner fails to inline the call (e.g., because it's considered a
  very cold callsite)

Fixes #88141
  • Loading branch information
gburgessiv committed Nov 18, 2024
1 parent 2f925d7 commit fdc96e4
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 3 deletions.
15 changes: 12 additions & 3 deletions llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1179,9 +1179,11 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
// automatic unrolling from interfering with the user requested
// transformation.
Loop *ParentL = L->getParentLoop();
const bool UnrollIsForcedByUser =
hasUnrollTransformation(L) == TM_ForcedByUser;
if (ParentL != nullptr &&
hasUnrollAndJamTransformation(ParentL) == TM_ForcedByUser &&
hasUnrollTransformation(L) != TM_ForcedByUser) {
!UnrollIsForcedByUser) {
LLVM_DEBUG(dbgs() << "Not unrolling loop since parent loop has"
<< " llvm.loop.unroll_and_jam.\n");
return LoopUnrollResult::Unmodified;
Expand All @@ -1191,7 +1193,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
// loop has an explicit unroll-and-jam pragma. This is to prevent automatic
// unrolling from interfering with the user requested transformation.
if (hasUnrollAndJamTransformation(L) == TM_ForcedByUser &&
hasUnrollTransformation(L) != TM_ForcedByUser) {
!UnrollIsForcedByUser) {
LLVM_DEBUG(
dbgs()
<< " Not unrolling loop since it has llvm.loop.unroll_and_jam.\n");
Expand Down Expand Up @@ -1240,7 +1242,14 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
if (OptForSize)
UP.Threshold = std::max(UP.Threshold, LoopSize + 1);

if (UCE.NumInlineCandidates != 0) {
// Ignore the potential for inlining if unrolling is forced by the user. It's
// only _very likely_ that `NumInlineCandidates` functions will be inlined;
// things like profile data can make this significantly less likely.
//
// TODO: It could be beneficial to allow this check in pre-link LTO, since
// that pipeline prefers to minimize IR size, but that requires a decent
// amount of extra plumbing.
if (!UnrollIsForcedByUser && UCE.NumInlineCandidates != 0) {
LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
return LoopUnrollResult::Unmodified;
}
Expand Down
63 changes: 63 additions & 0 deletions llvm/test/Transforms/LoopUnroll/unroll-calls-if-forced.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes=loop-unroll -S < %s | FileCheck %s
;
; Ensure that loop unrolling occurs if the user forces it, even if there are
; calls that may be inlined.

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

define internal void @foo() {
; CHECK-LABEL: define internal void @foo() {
; CHECK-NEXT: ret void
;
ret void
}

define void @forced(ptr nocapture %a) {
; CHECK-LABEL: define void @forced(
; CHECK-SAME: ptr nocapture [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT_1:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[INC_1:%.*]] = add nsw i32 [[TMP1]], 1
; CHECK-NEXT: store i32 [[INC_1]], ptr [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT_1]] = add nuw nsw i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[EXITCOND_1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_1]], 64
; CHECK-NEXT: br i1 [[EXITCOND_1]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body

for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
call void @foo()
%0 = load i32, ptr %arrayidx, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, ptr %arrayidx, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 64
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0

for.end:
ret void
}

!0 = distinct !{!0, !{!"llvm.loop.unroll.enable"},
!{!"llvm.loop.unroll.count", i32 2}}
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.unroll.disable"}
;.

0 comments on commit fdc96e4

Please sign in to comment.