Skip to content

Commit

Permalink
LSV: forbid load-cycles when vectorizing; fix bug (#104815)
Browse files Browse the repository at this point in the history
Forbid load-load cycles which would crash LoadStoreVectorizer when
reordering instructions.

Fixes #37865.
  • Loading branch information
artagnon authored Aug 22, 2024
1 parent f673882 commit c46b41a
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 5 deletions.
24 changes: 23 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -998,10 +998,32 @@ bool Vectorizer::isSafeToMove(
LLVM_DEBUG(dbgs() << "LSV: isSafeToMove(" << *ChainElem << " -> "
<< *ChainBegin << ")\n");

assert(isa<LoadInst>(ChainElem) == IsLoadChain);
assert(isa<LoadInst>(ChainElem) == IsLoadChain &&
isa<LoadInst>(ChainBegin) == IsLoadChain);

if (ChainElem == ChainBegin)
return true;

if constexpr (IsLoadChain) {
// If ChainElem depends on ChainBegin, they're not safe to reorder.
SmallVector<Instruction *, 8> Worklist;
Worklist.emplace_back(ChainElem);
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
for (Use &O : I->operands()) {

This comment has been minimized.

Copy link
@JoelWee

JoelWee Aug 27, 2024

Contributor

Could we do some caching here? This walk is causing some of our tests to time out (e.g. this test here https://github.com/google/jax/blob/main/tests/random_lax_test.py#L1309).

E.g. the following code would visit %x0 2^20 times, but a single visit is enough to determine whether it's ok

...
%x1 = add i32 %x0, %x0
%x2 = add i32 %x1, %x1
...
%x21 = add i32 %x20, %x20
...

This comment has been minimized.

Copy link
@fhahn

fhahn Aug 28, 2024

Contributor

If caching isn't possible, we would need to at least add a cut-off, like in other places where we walk use-lists to avoid this kind of compile-time explosion

This comment has been minimized.

Copy link
@fhahn

fhahn Aug 30, 2024

Contributor

@artagnon any chance you could take a look at the compile-time regression?

This comment has been minimized.

Copy link
@artagnon

artagnon Aug 30, 2024

Author Contributor

@fhahn Looking into it, thanks.

if (isa<PHINode>(O))
continue;
if (auto *J = dyn_cast<Instruction>(O)) {
if (J == ChainBegin) {
LLVM_DEBUG(dbgs() << "LSV: dependent loads; not safe to reorder\n");
return false;
}
Worklist.emplace_back(J);
}
}
}
}

// Invariant loads can always be reordered; by definition they are not
// clobbered by stores.
if (isInvariantLoad(ChainElem))
Expand Down
79 changes: 75 additions & 4 deletions llvm/test/Transforms/LoadStoreVectorizer/AArch64/pr37865.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,22 @@
; REQUIRES: asserts
; RUN: not --crash opt -mtriple=aarch64 -passes=load-store-vectorizer \
; RUN: -disable-output %s 2>&1 | FileCheck %s
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=aarch64 -passes=load-store-vectorizer -S %s | FileCheck %s

; LSV was attempting to vectorize this earlier, but crashed while re-ordering
; instructions due to the load-load cycle. Now, the candidate loads are no
; longer considered safe for reordering.

define i32 @load_cycle(ptr %x) {
; CHECK: Unexpected cycle while re-ordering instructions
; CHECK-LABEL: define i32 @load_cycle(
; CHECK-SAME: ptr [[X:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[GEP_X_1:%.*]] = getelementptr inbounds [2 x i32], ptr [[X]], i32 0, i32 1
; CHECK-NEXT: [[LOAD_X_1:%.*]] = load i32, ptr [[GEP_X_1]], align 4
; CHECK-NEXT: [[REM:%.*]] = urem i32 [[LOAD_X_1]], 1
; CHECK-NEXT: [[GEP_X_2:%.*]] = getelementptr inbounds [2 x i32], ptr [[X]], i32 [[REM]], i32 0
; CHECK-NEXT: [[LOAD_X_2:%.*]] = load i32, ptr [[GEP_X_2]], align 4
; CHECK-NEXT: [[RET:%.*]] = add i32 [[LOAD_X_2]], [[LOAD_X_1]]
; CHECK-NEXT: ret i32 [[RET]]
;
entry:
%gep.x.1 = getelementptr inbounds [2 x i32], ptr %x, i32 0, i32 1
%load.x.1 = load i32, ptr %gep.x.1
Expand All @@ -13,3 +26,61 @@ entry:
%ret = add i32 %load.x.2, %load.x.1
ret i32 %ret
}

define i32 @load_cycle2(ptr %x, i32 %y) {
; CHECK-LABEL: define i32 @load_cycle2(
; CHECK-SAME: ptr [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[GEP_X_1:%.*]] = getelementptr inbounds [2 x i32], ptr [[X]], i32 [[Y]], i32 1
; CHECK-NEXT: [[LOAD_X_1:%.*]] = load i32, ptr [[GEP_X_1]], align 4
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[LOAD_X_1]], 2
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[Y]], [[MUL]]
; CHECK-NEXT: [[SUB_1:%.*]] = sub i32 [[ADD]], [[LOAD_X_1]]
; CHECK-NEXT: [[SUB_2:%.*]] = sub i32 [[SUB_1]], [[LOAD_X_1]]
; CHECK-NEXT: [[GEP_X_2:%.*]] = getelementptr inbounds [2 x i32], ptr [[X]], i32 [[SUB_2]], i32 0
; CHECK-NEXT: [[LOAD_X_2:%.*]] = load i32, ptr [[GEP_X_2]], align 4
; CHECK-NEXT: [[RET:%.*]] = add i32 [[LOAD_X_2]], [[LOAD_X_1]]
; CHECK-NEXT: ret i32 [[RET]]
;
entry:
%gep.x.1 = getelementptr inbounds [2 x i32], ptr %x, i32 %y, i32 1
%load.x.1 = load i32, ptr %gep.x.1
%mul = mul i32 %load.x.1, 2
%add = add i32 %y, %mul
%sub.1 = sub i32 %add, %load.x.1
%sub.2 = sub i32 %sub.1, %load.x.1
%gep.x.2 = getelementptr inbounds [2 x i32], ptr %x, i32 %sub.2, i32 0
%load.x.2 = load i32, ptr %gep.x.2
%ret = add i32 %load.x.2, %load.x.1
ret i32 %ret
}

@global.1 = global i32 0
@global.2 = global [1 x [3 x i32]] zeroinitializer

define i16 @load_cycle3() {
; CHECK-LABEL: define i16 @load_cycle3() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[LOAD_1:%.*]] = load i32, ptr @global.1, align 4
; CHECK-NEXT: [[UREM_1:%.*]] = urem i32 [[LOAD_1]], 1
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds [1 x [3 x i32]], ptr @global.2, i32 0, i32 [[UREM_1]]
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds [3 x i32], ptr [[GEP_1]], i32 0, i32 2
; CHECK-NEXT: [[LOAD_2:%.*]] = load i32, ptr [[GEP_2]], align 4
; CHECK-NEXT: [[UREM_2:%.*]] = urem i32 [[LOAD_2]], 1
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds [1 x [3 x i32]], ptr @global.2, i32 0, i32 [[UREM_2]]
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds [3 x i32], ptr [[GEP_3]], i32 0, i32 1
; CHECK-NEXT: [[LOAD_3:%.*]] = load i32, ptr [[GEP_4]], align 4
; CHECK-NEXT: ret i16 0
;
entry:
%load.1 = load i32, ptr @global.1
%urem.1 = urem i32 %load.1, 1
%gep.1 = getelementptr inbounds [1 x [3 x i32]], ptr @global.2, i32 0, i32 %urem.1
%gep.2 = getelementptr inbounds [3 x i32], ptr %gep.1, i32 0, i32 2
%load.2 = load i32, ptr %gep.2
%urem.2 = urem i32 %load.2, 1
%gep.3 = getelementptr inbounds [1 x [3 x i32]], ptr @global.2, i32 0, i32 %urem.2
%gep.4 = getelementptr inbounds [3 x i32], ptr %gep.3, i32 0, i32 1
%load.3 = load i32, ptr %gep.4
ret i16 0
}

0 comments on commit c46b41a

Please sign in to comment.