Skip to content

Commit

Permalink
Add LLVM level allocation optimization pass
Browse files Browse the repository at this point in the history
This can obtain escape information with much higher precision than what we can currently do
in typeinf. However, it does not replace the alloc_elim_pass! in type inference either since
this cannot handle objects with reference fields.

Fix #20452
  • Loading branch information
yuyichao committed Jul 8, 2017
1 parent ad8589a commit e4b07a8
Show file tree
Hide file tree
Showing 9 changed files with 732 additions and 45 deletions.
2 changes: 1 addition & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ endif
LLVMLINK :=

ifeq ($(JULIACODEGEN),LLVM)
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces cgmemmgr
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces llvm-alloc-opt cgmemmgr
FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
LLVM_LIBS := all
ifeq ($(USE_POLLY),1)
Expand Down
2 changes: 1 addition & 1 deletion src/ccall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2143,7 +2143,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
size_t rtsz = jl_datatype_size(rt);
assert(rtsz > 0);
Value *strct = emit_allocobj(ctx, rtsz, runtime_bt);
int boxalign = jl_gc_alignment(rtsz);
int boxalign = jl_datatype_align(rt);
#ifndef JL_NDEBUG
#if JL_LLVM_VERSION >= 40000
const DataLayout &DL = jl_data_layout;
Expand Down
21 changes: 3 additions & 18 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2149,25 +2149,10 @@ static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
{
JL_FEAT_REQUIRE(ctx, dynamic_alloc);
JL_FEAT_REQUIRE(ctx, runtime);

int osize;
int offset = jl_gc_classify_pools(static_size, &osize);
Value *ptls_ptr = emit_bitcast(ctx, ctx.ptlsStates, T_pint8);
Value *v;
if (offset < 0) {
Value *args[] = {ptls_ptr,
ConstantInt::get(T_size, static_size + sizeof(void*))};
v = ctx.builder.CreateCall(prepare_call(jlalloc_big_func),
ArrayRef<Value*>(args, 2));
}
else {
Value *pool_offs = ConstantInt::get(T_int32, offset);
Value *args[] = {ptls_ptr, pool_offs, ConstantInt::get(T_int32, osize)};
v = ctx.builder.CreateCall(prepare_call(jlalloc_pool_func),
ArrayRef<Value*>(args, 3));
}
tbaa_decorate(tbaa_tag, ctx.builder.CreateStore(maybe_decay_untracked(jt), emit_typeptr_addr(ctx, v)));
return v;
return ctx.builder.CreateCall(prepare_call(jl_alloc_obj_func),
{ptls_ptr, ConstantInt::get(T_size, static_size),
maybe_decay_untracked(jt)});
}

// if ptr is NULL this emits a write barrier _back_
Expand Down
34 changes: 14 additions & 20 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,7 @@ static Function *jlgenericfunction_func;
static Function *jlenter_func;
static Function *jlleave_func;
static Function *jlegal_func;
static Function *jlalloc_pool_func;
static Function *jlalloc_big_func;
static Function *jl_alloc_obj_func;
static Function *jlisa_func;
static Function *jlsubtype_func;
static Function *jlapplytype_func;
Expand Down Expand Up @@ -6635,24 +6634,19 @@ static void init_julia_llvm_env(Module *m)
"jl_instantiate_type_in_env", m);
add_named_global(jlapplytype_func, &jl_instantiate_type_in_env);

std::vector<Type*> alloc_pool_args(0);
alloc_pool_args.push_back(T_pint8);
alloc_pool_args.push_back(T_int32);
alloc_pool_args.push_back(T_int32);
jlalloc_pool_func =
Function::Create(FunctionType::get(T_prjlvalue, alloc_pool_args, false),
Function::ExternalLinkage,
"jl_gc_pool_alloc", m);
add_named_global(jlalloc_pool_func, &jl_gc_pool_alloc);

std::vector<Type*> alloc_big_args(0);
alloc_big_args.push_back(T_pint8);
alloc_big_args.push_back(T_size);
jlalloc_big_func =
Function::Create(FunctionType::get(T_prjlvalue, alloc_big_args, false),
Function::ExternalLinkage,
"jl_gc_big_alloc", m);
add_named_global(jlalloc_big_func, &jl_gc_big_alloc);
std::vector<Type*> gc_alloc_args(0);
gc_alloc_args.push_back(T_pint8);
gc_alloc_args.push_back(T_size);
gc_alloc_args.push_back(T_prjlvalue);
jl_alloc_obj_func = Function::Create(FunctionType::get(T_prjlvalue, gc_alloc_args, false),
Function::ExternalLinkage,
"julia.gc_alloc_obj");
#if JL_LLVM_VERSION >= 50000
jl_alloc_obj_func->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
#else
jl_alloc_obj_func->addAttribute(AttributeSet::ReturnIndex, Attribute::NoAlias);
#endif
add_named_global(jl_alloc_obj_func, (void*)NULL, /*dllimport*/false);

std::vector<Type *> dlsym_args(0);
dlsym_args.push_back(T_pint8);
Expand Down
6 changes: 1 addition & 5 deletions src/intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,11 +325,7 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
}

int alignment;
if (x.isboxed) {
// julia's gc gives 16-byte aligned addresses
alignment = 16;
}
else if (jt) {
if (jt) {
alignment = julia_alignment(p, jt, 0);
}
else {
Expand Down
8 changes: 8 additions & 0 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level)
#endif
if (opt_level == 0) {
PM->add(createCFGSimplificationPass()); // Clean up disgusting code
PM->add(createAllocOptPass(false));
#if JL_LLVM_VERSION < 50000
PM->add(createBarrierNoopPass());
PM->add(createLowerExcHandlersPass());
Expand Down Expand Up @@ -147,6 +148,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level)
// effectiveness of the optimization, but should retain correctness.
#if JL_LLVM_VERSION < 50000
PM->add(createLowerExcHandlersPass());
PM->add(createAllocOptPass(true));
PM->add(createLateLowerGCFramePass());
// Remove dead use of ptls
PM->add(createDeadCodeEliminationPass());
Expand All @@ -161,6 +163,12 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level)
PM->add(createAlwaysInlinerPass()); // Respect always_inline
#endif

#if JL_LLVM_VERSION >= 50000
// Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard time
// merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt`
// pass.
PM->add(createAllocOptPass(true));
#endif
PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
PM->add(createSROAPass()); // Break up aggregate allocas
PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
Expand Down
1 change: 1 addition & 0 deletions src/jitlayers.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ Pass *createLateLowerGCFramePass();
Pass *createLowerExcHandlersPass();
Pass *createGCInvariantVerifierPass(bool Strong);
Pass *createPropagateJuliaAddrspaces();
Pass *createAllocOptPass(bool);
// Whether the Function is an llvm or julia intrinsic.
static inline bool isIntrinsicFunction(Function *F)
{
Expand Down
Loading

0 comments on commit e4b07a8

Please sign in to comment.