Skip to content

Commit

Permalink
Merge pull request #26985 from JuliaLang/kf/simdloop
Browse files Browse the repository at this point in the history
[NewOptimizer] Make simdloop marker more robust
  • Loading branch information
Keno authored May 7, 2018
2 parents 989de79 + 56d7ebe commit 71f6bfe
Show file tree
Hide file tree
Showing 8 changed files with 118 additions and 41 deletions.
4 changes: 2 additions & 2 deletions base/compiler/ssair/queries.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ function stmt_effect_free(@nospecialize(stmt), src, mod::Module)
if isa(stmt, Expr)
e = stmt::Expr
head = e.head
is_meta_expr_head(head) && return true
if head === :static_parameter
# if we aren't certain enough about the type, it might be an UndefVarError at runtime
return isa(e.typ, Const) || issingletontype(widenconst(e.typ))
Expand Down Expand Up @@ -45,9 +44,10 @@ function stmt_effect_free(@nospecialize(stmt), src, mod::Module)
eT fT || return false
end
return true
elseif head === :isdefined || head === :the_exception || head === :copyast
elseif head === :isdefined || head === :the_exception || head === :copyast || head === :inbounds || head === :boundscheck
return true
else
# e.g. :simdloop
return false
end
end
Expand Down
2 changes: 1 addition & 1 deletion deps/Versions.make
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
LLVM_VER = 6.0.0
LLVM_BB_REL = 1
LLVM_BB_REL = 2
PCRE_VER = 10.30
DSFMT_VER = 2.2.3
LAPACK_VER = 3.5.0
Expand Down
4 changes: 4 additions & 0 deletions deps/llvm.mk
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,7 @@ $(eval $(call LLVM_PATCH,llvm-D30114)) # PPC remove for 5.0
$(eval $(call LLVM_PATCH,llvm-PR36292)) # PPC fixes #26249, remove for 6.0
$(eval $(call LLVM_PATCH,llvm-D39297-musl-dynamiclibrary-pre5)) # Remove for 6.0
$(eval $(call LLVM_PATCH,llvm-D28476-musl-targetlibraryinfo_3.9)) # Remove for 5.0
$(eval $(call LLVM_PATCH,llvm-D46460))
ifeq ($(BUILD_LLVM_CLANG),1)
$(eval $(call LLVM_PATCH,compiler_rt-3.9-glibc_2.25.90)) # Remove for 5.0
$(eval $(call LLVM_PATCH,clang-D28477)) # Remove for 5.0
Expand Down Expand Up @@ -452,6 +453,7 @@ $(eval $(call LLVM_PATCH,llvm-D30114)) # PPC remove for 5.0
$(eval $(call LLVM_PATCH,llvm-PR36292)) # PPC fixes #26249, remove for 6.0
$(eval $(call LLVM_PATCH,llvm-D39297-musl-dynamiclibrary-pre5)) # Remove for 6.0
$(eval $(call LLVM_PATCH,llvm-D28476-musl-targetlibraryinfo_4.0)) # Remove for 5.0
$(eval $(call LLVM_PATCH,llvm-D46460))
ifeq ($(BUILD_LLVM_CLANG),1)
$(eval $(call LLVM_PATCH,compiler_rt-3.9-glibc_2.25.90)) # Remove for 5.0
$(eval $(call LLVM_PATCH,clang-D28477)) # Remove for 5.0
Expand All @@ -470,6 +472,7 @@ $(eval $(call LLVM_PATCH,llvm-D42262-jumpthreading-not-i1)) # remove for 7.0
$(eval $(call LLVM_PATCH,llvm-PPC-addrspaces)) # PPC
$(eval $(call LLVM_PATCH,llvm-PR36292-5.0)) # PPC fixes #26249, remove for 6.0
$(eval $(call LLVM_PATCH,llvm-D39297-musl-dynamiclibrary)) # Remove for 6.0
$(eval $(call LLVM_PATCH,llvm-D46460))
else ifeq ($(LLVM_VER_SHORT),6.0)
$(eval $(call LLVM_PATCH,llvm-D27629-AArch64-large_model_4.0))
$(eval $(call LLVM_PATCH,llvm-D34078-vectorize-fdiv))
Expand All @@ -484,6 +487,7 @@ $(eval $(call LLVM_PATCH,llvm-6.0-D44650)) # mingw32 build fix
$(eval $(call LLVM_PATCH,llvm-D45008)) # remove for 7.0
$(eval $(call LLVM_PATCH,llvm-D45070)) # remove for 7.0
$(eval $(call LLVM_PATCH,llvm-6.0.0-ifconv-D45819)) # remove for 7.0
$(eval $(call LLVM_PATCH,llvm-D46460))
endif # LLVM_VER

# Remove hardcoded OS X requirements in compilter-rt cmake build
Expand Down
26 changes: 26 additions & 0 deletions deps/patches/llvm-D46460.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Index: lib/Analysis/LoopInfo.cpp
===================================================================
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -223,15 +223,14 @@
BasicBlock *H = getHeader();
for (BasicBlock *BB : this->blocks()) {
TerminatorInst *TI = BB->getTerminator();
- MDNode *MD = nullptr;

// Check if this terminator branches to the loop header.
- for (BasicBlock *Successor : TI->successors()) {
- if (Successor == H) {
- MD = TI->getMetadata(LLVMContext::MD_loop);
- break;
- }
- }
+ bool IsPredecessor = any_of(TI->successors(),
+ [=](BasicBlock *Successor) { return Successor == H; });
+ if (!IsPredecessor)
+ continue;
+
+ MDNode *MD = TI->getMetadata(LLVMContext::MD_loop);
if (!MD)
return nullptr;

10 changes: 9 additions & 1 deletion src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ static Function *jlegal_func;
static Function *jl_alloc_obj_func;
static Function *jl_newbits_func;
static Function *jl_typeof_func;
static Function *jl_simdloop_marker_func;
static Function *jl_write_barrier_func;
static Function *jlisa_func;
static Function *jlsubtype_func;
Expand Down Expand Up @@ -4084,7 +4085,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr)
maybe_decay_untracked(boxed(ctx, ast))), true, jl_expr_type);
}
else if (head == simdloop_sym) {
llvm::annotateSimdLoop(ctx.builder.GetInsertBlock());
ctx.builder.CreateCall(prepare_call(jl_simdloop_marker_func));
return jl_cgval_t();
}
else if (head == goto_ifnot_sym) {
Expand Down Expand Up @@ -7437,6 +7438,13 @@ static void init_julia_llvm_env(Module *m)
add_return_attr(jl_newbits_func, Attribute::NonNull);
add_named_global(jl_newbits_func, (void*)jl_new_bits);

jl_simdloop_marker_func = Function::Create(FunctionType::get(T_void, {}, false),
Function::ExternalLinkage,
"julia.simdloop_marker");
jl_simdloop_marker_func->addFnAttr(Attribute::NoUnwind);
jl_simdloop_marker_func->addFnAttr(Attribute::NoRecurse);
jl_simdloop_marker_func->addFnAttr(Attribute::InaccessibleMemOnly);

jl_typeof_func = Function::Create(FunctionType::get(T_prjlvalue, {T_prjlvalue}, false),
Function::ExternalLinkage,
"julia.typeof");
Expand Down
5 changes: 3 additions & 2 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, bool dump
PM->add(createLateLowerGCFramePass());
PM->add(createLowerPTLSPass(dump_native));
#endif
PM->add(createLowerSimdLoopPass()); // Annotate loop marked with "simdloop" as LLVM parallel loop
if (dump_native)
PM->add(createMultiVersioningPass());
return;
Expand All @@ -145,8 +146,8 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, bool dump
}
// list of passes from vmkit
PM->add(createCFGSimplificationPass()); // Clean up disgusting code
PM->add(createDeadInstEliminationPass());
PM->add(createPromoteMemoryToRegisterPass()); // Kill useless allocas
PM->add(createDeadCodeEliminationPass());
PM->add(createSROAPass()); // Kill useless allocas

// Due to bugs and missing features LLVM < 5.0, does not properly propagate
// our invariants. We need to do GC rooting here. This reduces the
Expand Down
100 changes: 67 additions & 33 deletions src/llvm-simdloop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,26 @@ bool annotateSimdLoop(BasicBlock *incr)
return false;
}

/// Pass that lowers a loop marked by annotateSimdLoop.
/// This pass should run after reduction variables have been converted to phi nodes,
/// otherwise floating-point reductions might not be recognized as such and
/// prevent SIMDization.
struct LowerSIMDLoop: public LoopPass {
struct LowerSIMDLoop : public ModulePass {
static char ID;
LowerSIMDLoop() : LoopPass(ID) {}
LowerSIMDLoop() : ModulePass(ID)
{
}

protected:
void getAnalysisUsage(AnalysisUsage &AU) const override
{
ModulePass::getAnalysisUsage(AU);
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.setPreservesCFG();
}

private:
bool runOnLoop(Loop *, LPPassManager &LPM) override;
private:
bool runOnModule(Module &M) override;

/// Check if loop has "simd_loop" annotation.
/// If present, the annotation is an MDNode attached to an instruction in the loop's latch.
Expand Down Expand Up @@ -160,41 +170,65 @@ void LowerSIMDLoop::enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L) const
}
}

bool LowerSIMDLoop::runOnLoop(Loop *L, LPPassManager &LPM)
bool LowerSIMDLoop::runOnModule(Module &M)
{
if (!simd_loop_mdkind) {
simd_loop_mdkind = L->getHeader()->getContext().getMDKindID("simd_loop");
simd_loop_md = MDNode::get(L->getHeader()->getContext(), ArrayRef<Metadata*>());
}
Function *simdloop_marker = M.getFunction("julia.simdloop_marker");

if (!hasSIMDLoopMetadata(L))
if (!simdloop_marker)
return false;

DEBUG(dbgs() << "LSL: simd_loop found\n");
BasicBlock *Lh = L->getHeader();
DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n");
MDNode *n = L->getLoopID();
if (!n) {
// Loop does not have a LoopID yet, so give it one.
n = MDNode::get(Lh->getContext(), ArrayRef<Metadata*>(NULL));
n->replaceOperandWith(0,n);
L->setLoopID(n);
}
MDNode *m = MDNode::get(Lh->getContext(), ArrayRef<Metadata*>(n));
bool Changed = false;
std::vector<Instruction*> ToDelete;
for (User *U : simdloop_marker->users()) {
Instruction *I = cast<Instruction>(U);
ToDelete.push_back(I);
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>(*I->getParent()->getParent()).getLoopInfo();
Loop *L = LI.getLoopFor(I->getParent());
I->removeFromParent();
if (!L)
continue;

DEBUG(dbgs() << "LSL: simd_loop found\n");
BasicBlock *Lh = L->getHeader();
DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n");
MDNode *n = L->getLoopID();
if (!n) {
// Loop does not have a LoopID yet, so give it one.
n = MDNode::get(Lh->getContext(), ArrayRef<Metadata *>(NULL));
n->replaceOperandWith(0, n);
L->setLoopID(n);
}

assert(L->getLoopID());

// Mark memory references so that Loop::isAnnotatedParallel will return true for this loop.
for(Loop::block_iterator BBI = L->block_begin(), E=L->block_end(); BBI!=E; ++BBI)
for (BasicBlock::iterator I = (*BBI)->begin(), EE = (*BBI)->end(); I!=EE; ++I)
if (I->mayReadOrWriteMemory())
I->setMetadata("llvm.mem.parallel_loop_access", m);
assert(L->isAnnotatedParallel());
MDNode *m = MDNode::get(Lh->getContext(), ArrayRef<Metadata *>(n));

// Mark memory references so that Loop::isAnnotatedParallel will return true for this loop.
for (BasicBlock *BB : L->blocks()) {
for (Instruction &I : *BB) {
if (I.mayReadOrWriteMemory()) {
I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, m);
}
}
}
assert(L->isAnnotatedParallel());

// Mark floating-point reductions as okay to reassociate/commute.
for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I != E; ++I) {
if (PHINode *Phi = dyn_cast<PHINode>(I))
enableUnsafeAlgebraIfReduction(Phi, L);
else
break;
}

Changed = true;
}

// Mark floating-point reductions as okay to reassociate/commute.
for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I!=E; ++I)
if (PHINode *Phi = dyn_cast<PHINode>(I))
enableUnsafeAlgebraIfReduction(Phi,L);
for (Instruction *I : ToDelete)
I->deleteValue();
simdloop_marker->eraseFromParent();

return true;
return Changed;
}

char LowerSIMDLoop::ID = 0;
Expand Down
8 changes: 6 additions & 2 deletions test/llvmpasses/simdloop.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
; RUN: opt -load libjulia%shlibext -LowerSIMDLoop -S %s | FileCheck %s

declare void @julia.simdloop_marker()

define void @simd_test(double *%a, double *%b) {
top:
br label %loop
Expand All @@ -12,7 +14,8 @@ loop:
%bval = load double, double *%aptr
%cval = fadd double %aval, %bval
store double %cval, double *%bptr
%nexti = add i64 %i, 1, !simd_loop !1
%nexti = add i64 %i, 1
call void @julia.simdloop_marker()
%done = icmp sgt i64 %nexti, 500
br i1 %done, label %loopdone, label %loop
loopdone:
Expand All @@ -30,7 +33,8 @@ loop:
%aval = load double, double *%aptr
%nextv = fsub double %v, %aval
; CHECK: fsub fast double %v, %aval
%nexti = add i64 %i, 1, !simd_loop !1
%nexti = add i64 %i, 1
call void @julia.simdloop_marker()
%done = icmp sgt i64 %nexti, 500
br i1 %done, label %loopdone, label %loop
loopdone:
Expand Down

0 comments on commit 71f6bfe

Please sign in to comment.