Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cherry-pick two fixes #164

Merged
merged 4 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 46 additions & 5 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -625,14 +625,17 @@ class AccessAnalysis {

AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI,
MemoryDepChecker::DepCandidates &DA,
PredicatedScalarEvolution &PSE)
: TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE) {
PredicatedScalarEvolution &PSE,
SmallPtrSetImpl<MDNode *> &LoopAliasScopes)
: TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE),
LoopAliasScopes(LoopAliasScopes) {
// We're analyzing dependences across loop iterations.
BAA.enableCrossIterationMode();
}

/// Register a load and whether it is only read from.
void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
void addLoad(MemoryLocation Loc, Type *AccessTy, bool IsReadOnly) {
Loc = adjustLoc(Loc);
Value *Ptr = const_cast<Value*>(Loc.Ptr);
AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
Expand All @@ -641,7 +644,8 @@ class AccessAnalysis {
}

/// Register a store.
void addStore(MemoryLocation &Loc, Type *AccessTy) {
void addStore(MemoryLocation Loc, Type *AccessTy) {
Loc = adjustLoc(Loc);
Value *Ptr = const_cast<Value*>(Loc.Ptr);
AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
Expand Down Expand Up @@ -694,6 +698,32 @@ class AccessAnalysis {
private:
typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;

/// Adjust the MemoryLocation so that it represents accesses to this
/// location across all iterations, rather than a single one.
MemoryLocation adjustLoc(MemoryLocation Loc) const {
// The accessed location varies within the loop, but remains within the
// underlying object.
Loc.Size = LocationSize::beforeOrAfterPointer();
Loc.AATags.Scope = adjustAliasScopeList(Loc.AATags.Scope);
Loc.AATags.NoAlias = adjustAliasScopeList(Loc.AATags.NoAlias);
return Loc;
}

/// Drop alias scopes that are only valid within a single loop iteration.
MDNode *adjustAliasScopeList(MDNode *ScopeList) const {
if (!ScopeList)
return nullptr;

// For the sake of simplicity, drop the whole scope list if any scope is
// iteration-local.
if (any_of(ScopeList->operands(), [&](Metadata *Scope) {
return LoopAliasScopes.contains(cast<MDNode>(Scope));
}))
return nullptr;

return ScopeList;
}

/// Go over all memory access and check whether runtime pointer checks
/// are needed and build sets of dependency check candidates.
void processMemAccesses();
Expand Down Expand Up @@ -736,6 +766,10 @@ class AccessAnalysis {

/// The SCEV predicate containing all the SCEV-related assumptions.
PredicatedScalarEvolution &PSE;

/// Alias scopes that are declared inside the loop, and as such not valid
/// across iterations.
SmallPtrSetImpl<MDNode *> &LoopAliasScopes;
};

} // end anonymous namespace
Expand Down Expand Up @@ -2149,6 +2183,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// Holds the Load and Store instructions.
SmallVector<LoadInst *, 16> Loads;
SmallVector<StoreInst *, 16> Stores;
SmallPtrSet<MDNode *, 8> LoopAliasScopes;

// Holds all the different accesses in the loop.
unsigned NumReads = 0;
Expand Down Expand Up @@ -2192,6 +2227,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
if (HasComplexMemInst)
continue;

// Record alias scopes defined inside the loop.
if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
for (Metadata *Op : Decl->getScopeList()->operands())
LoopAliasScopes.insert(cast<MDNode>(Op));

// If this is a load, save it. If this instruction can read from memory
// but is not a load, then we quit. Notice that we don't handle function
// calls that read or write.
Expand Down Expand Up @@ -2273,7 +2313,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
}

MemoryDepChecker::DepCandidates DependentAccesses;
AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE);
AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE,
LoopAliasScopes);

// Holds the analyzed pointers. We don't want to call getUnderlyingObjects
// multiple times on the same object. If the ptr is accessed twice, once
Expand Down
22 changes: 3 additions & 19 deletions llvm/lib/Analysis/MemorySSAUpdater.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -692,25 +692,9 @@ void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
continue;

// Determine incoming value and add it as incoming from IncBB.
if (MemoryUseOrDef *IncMUD = dyn_cast<MemoryUseOrDef>(IncomingAccess)) {
if (!MSSA->isLiveOnEntryDef(IncMUD)) {
Instruction *IncI = IncMUD->getMemoryInst();
assert(IncI && "Found MemoryUseOrDef with no Instruction.");
if (Instruction *NewIncI =
cast_or_null<Instruction>(VMap.lookup(IncI))) {
IncMUD = MSSA->getMemoryAccess(NewIncI);
assert(IncMUD &&
"MemoryUseOrDef cannot be null, all preds processed.");
}
}
NewPhi->addIncoming(IncMUD, IncBB);
} else {
MemoryPhi *IncPhi = cast<MemoryPhi>(IncomingAccess);
if (MemoryAccess *NewDefPhi = MPhiMap.lookup(IncPhi))
NewPhi->addIncoming(NewDefPhi, IncBB);
else
NewPhi->addIncoming(IncPhi, IncBB);
}
NewPhi->addIncoming(
getNewDefiningAccessForClone(IncomingAccess, VMap, MPhiMap, MSSA),
IncBB);
}
if (auto *SingleAccess = onlySingleValue(NewPhi)) {
MPhiMap[Phi] = SingleAccess;
Expand Down
103 changes: 103 additions & 0 deletions llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
; RUN: opt -passes='print<access-info>' -disable-output 2>&1 < %s | FileCheck %s

; PR79137: If the noalias.scope.decl is located inside the loop, we cannot
; assume that the accesses don't alias across iterations.

define void @test_scope_in_loop(ptr %arg, i64 %num) {
; CHECK-LABEL: 'test_scope_in_loop'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Backward loop carried data dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: %load.prev = load i8, ptr %prev.ptr, align 1, !alias.scope !0, !noalias !3 ->
; CHECK-NEXT: store i8 %add, ptr %cur.ptr, align 1, !alias.scope !3
; CHECK-EMPTY:
; CHECK-NEXT: Forward:
; CHECK-NEXT: %load.cur = load i8, ptr %cur.ptr, align 1, !alias.scope !3 ->
; CHECK-NEXT: store i8 %add, ptr %cur.ptr, align 1, !alias.scope !3
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
entry:
%icmp = icmp ult i64 %num, 2
br i1 %icmp, label %exit, label %preheader

preheader:
%arg.1 = getelementptr inbounds i8, ptr %arg, i64 1
%end = add i64 %num, -2
br label %loop

loop:
%prev.ptr = phi ptr [ %cur.ptr, %loop ], [ %arg, %preheader ]
%iv = phi i64 [ %iv.next, %loop ], [ 0, %preheader ]
%cur.ptr = getelementptr inbounds i8, ptr %arg.1, i64 %iv
call void @llvm.experimental.noalias.scope.decl(metadata !0)
call void @llvm.experimental.noalias.scope.decl(metadata !3)
%load.prev = load i8, ptr %prev.ptr, align 1, !alias.scope !0, !noalias !3
%load.cur = load i8, ptr %cur.ptr, align 1, !alias.scope !3
%add = add i8 %load.cur, %load.prev
store i8 %add, ptr %cur.ptr, align 1, !alias.scope !3
%iv.next = add nuw i64 %iv, 1
%cmp = icmp eq i64 %iv, %end
br i1 %cmp, label %exit, label %loop

exit:
ret void
}

define void @test_scope_out_of_loop(ptr %arg, i64 %num) {
; CHECK-LABEL: 'test_scope_out_of_loop'
; CHECK-NEXT: loop:
; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
entry:
%icmp = icmp ult i64 %num, 2
br i1 %icmp, label %exit, label %preheader

preheader:
call void @llvm.experimental.noalias.scope.decl(metadata !0)
call void @llvm.experimental.noalias.scope.decl(metadata !3)
%arg.1 = getelementptr inbounds i8, ptr %arg, i64 1
%end = add i64 %num, -2
br label %loop

loop:
%prev.ptr = phi ptr [ %cur.ptr, %loop ], [ %arg, %preheader ]
%iv = phi i64 [ %iv.next, %loop ], [ 0, %preheader ]
%cur.ptr = getelementptr inbounds i8, ptr %arg.1, i64 %iv
%load.prev = load i8, ptr %prev.ptr, align 1, !alias.scope !0, !noalias !3
%load.cur = load i8, ptr %cur.ptr, align 1, !alias.scope !3
%add = add i8 %load.cur, %load.prev
store i8 %add, ptr %cur.ptr, align 1, !alias.scope !3
%iv.next = add nuw i64 %iv, 1
%cmp = icmp eq i64 %iv, %end
br i1 %cmp, label %exit, label %loop

exit:
ret void
}

declare void @llvm.experimental.noalias.scope.decl(metadata)

!0 = !{!1}
!1 = distinct !{!1, !2}
!2 = distinct !{!2}
!3 = !{!4}
!4 = distinct !{!4, !5}
!5 = distinct !{!5}
78 changes: 78 additions & 0 deletions llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S -O3 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s

define internal void @acc(ptr noalias noundef %val, ptr noalias noundef %prev) {
entry:
%0 = load i8, ptr %prev, align 1
%conv = zext i8 %0 to i32
%1 = load i8, ptr %val, align 1
%conv1 = zext i8 %1 to i32
%add = add nsw i32 %conv1, %conv
%conv2 = trunc i32 %add to i8
store i8 %conv2, ptr %val, align 1
ret void
}

; This loop should not get vectorized.
define void @accsum(ptr noundef %vals, i64 noundef %num) #0 {
; CHECK-LABEL: define void @accsum(
; CHECK-SAME: ptr nocapture noundef [[VALS:%.*]], i64 noundef [[NUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i64 [[NUM]], 1
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[LOAD_INITIAL:%.*]] = load i8, ptr [[VALS]], align 1
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[STORE_FORWARDED:%.*]] = phi i8 [ [[LOAD_INITIAL]], [[FOR_BODY_PREHEADER]] ], [ [[ADD_I:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 1, [[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[VALS]], i64 [[I_02]]
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
; CHECK-NEXT: [[ADD_I]] = add i8 [[TMP0]], [[STORE_FORWARDED]]
; CHECK-NEXT: store i8 [[ADD_I]], ptr [[ARRAYIDX]], align 1, !alias.scope [[META0]], !noalias [[META3]]
; CHECK-NEXT: [[INC]] = add nuw i64 [[I_02]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUM]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
entry:
br label %for.cond

for.cond: ; preds = %for.inc, %entry
%i.0 = phi i64 [ 1, %entry ], [ %inc, %for.inc ]
%cmp = icmp ult i64 %i.0, %num
br i1 %cmp, label %for.body, label %for.cond.cleanup

for.cond.cleanup: ; preds = %for.cond
br label %for.end

for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds i8, ptr %vals, i64 %i.0
%sub = sub i64 %i.0, 1
%arrayidx1 = getelementptr inbounds i8, ptr %vals, i64 %sub
call void @acc(ptr noundef %arrayidx, ptr noundef %arrayidx1)
br label %for.inc

for.inc: ; preds = %for.body
%inc = add i64 %i.0, 1
br label %for.cond

for.end: ; preds = %for.cond.cleanup
ret void
}

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1

attributes #0 = { "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87"}
;.
; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], !"acc: %val"}
; CHECK: [[META2]] = distinct !{[[META2]], !"acc"}
; CHECK: [[META3]] = !{[[META4:![0-9]+]]}
; CHECK: [[META4]] = distinct !{[[META4]], [[META2]], !"acc: %prev"}
;.
Loading