Skip to content

Commit

Permalink
Merge pull request #22684 from JuliaLang/yyc/codegen/alloc-elim
Browse files Browse the repository at this point in the history
Add LLVM level allocation optimization pass
  • Loading branch information
yuyichao authored Jul 28, 2017
2 parents b11aec6 + b1a188c commit e1a604e
Show file tree
Hide file tree
Showing 13 changed files with 1,081 additions and 74 deletions.
20 changes: 10 additions & 10 deletions base/math.jl
Original file line number Diff line number Diff line change
Expand Up @@ -724,16 +724,16 @@ julia> modf(3.5)
"""
modf(x) = rem(x,one(x)), trunc(x)

const _modff_temp = Ref{Float32}()
function modf(x::Float32)
f = ccall((:modff,libm), Float32, (Float32,Ptr{Float32}), x, _modff_temp)
f, _modff_temp[]
temp = Ref{Float32}()
f = ccall((:modff, libm), Float32, (Float32, Ptr{Float32}), x, temp)
f, temp[]
end

const _modf_temp = Ref{Float64}()
function modf(x::Float64)
f = ccall((:modf,libm), Float64, (Float64,Ptr{Float64}), x, _modf_temp)
f, _modf_temp[]
temp = Ref{Float64}()
f = ccall((:modf, libm), Float64, (Float64, Ptr{Float64}), x, temp)
f, temp[]
end

@inline function ^(x::Float64, y::Float64)
Expand Down Expand Up @@ -781,7 +781,7 @@ function add22condh(xh::Float64, xl::Float64, yh::Float64, yl::Float64)
return zh
end

function ieee754_rem_pio2(x::Float64)
@inline function ieee754_rem_pio2(x::Float64)
# rem_pio2 essentially computes x mod pi/2 (ie within a quarter circle)
# and returns the result as
# y between + and - pi/4 (for maximal accuracy (as the sign bit is exploited)), and
Expand All @@ -795,9 +795,9 @@ function ieee754_rem_pio2(x::Float64)
# this is just wrapping up
# https://github.com/JuliaLang/openspecfun/blob/master/rem_pio2/e_rem_pio2.c

y = [0.0,0.0]
n = ccall((:__ieee754_rem_pio2, openspecfun), Cint, (Float64,Ptr{Float64}), x, y)
return (n,y)
y = Ref{NTuple{2,Float64}}()
n = ccall((:__ieee754_rem_pio2, openspecfun), Cint, (Float64, Ptr{Void}), x, y)
return (n, y[])
end

# multiples of pi/2, as double-double (ie with "tail")
Expand Down
4 changes: 3 additions & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ endif
LLVMLINK :=

ifeq ($(JULIACODEGEN),LLVM)
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-muladd llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces cgmemmgr
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-muladd \
llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier \
llvm-propagate-addrspaces llvm-alloc-opt cgmemmgr
FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
LLVM_LIBS := all
ifeq ($(USE_POLLY),1)
Expand Down
2 changes: 1 addition & 1 deletion src/ccall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2106,7 +2106,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
size_t rtsz = jl_datatype_size(rt);
assert(rtsz > 0);
Value *strct = emit_allocobj(ctx, rtsz, runtime_bt);
int boxalign = jl_gc_alignment(rtsz);
int boxalign = jl_datatype_align(rt);
#ifndef JL_NDEBUG
#if JL_LLVM_VERSION >= 40000
const DataLayout &DL = jl_data_layout;
Expand Down
23 changes: 5 additions & 18 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2097,25 +2097,12 @@ static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
{
JL_FEAT_REQUIRE(ctx, dynamic_alloc);
JL_FEAT_REQUIRE(ctx, runtime);

int osize;
int offset = jl_gc_classify_pools(static_size, &osize);
Value *ptls_ptr = emit_bitcast(ctx, ctx.ptlsStates, T_pint8);
Value *v;
if (offset < 0) {
Value *args[] = {ptls_ptr,
ConstantInt::get(T_size, static_size + sizeof(void*))};
v = ctx.builder.CreateCall(prepare_call(jlalloc_big_func),
ArrayRef<Value*>(args, 2));
}
else {
Value *pool_offs = ConstantInt::get(T_int32, offset);
Value *args[] = {ptls_ptr, pool_offs, ConstantInt::get(T_int32, osize)};
v = ctx.builder.CreateCall(prepare_call(jlalloc_pool_func),
ArrayRef<Value*>(args, 3));
}
tbaa_decorate(tbaa_tag, ctx.builder.CreateStore(maybe_decay_untracked(jt), emit_typeptr_addr(ctx, v)));
return v;
auto call = ctx.builder.CreateCall(prepare_call(jl_alloc_obj_func),
{ptls_ptr, ConstantInt::get(T_size, static_size),
maybe_decay_untracked(jt)});
call->setAttributes(jl_alloc_obj_func->getAttributes());
return call;
}

// if ptr is NULL this emits a write barrier _back_
Expand Down
34 changes: 14 additions & 20 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,7 @@ static Function *jlgenericfunction_func;
static Function *jlenter_func;
static Function *jlleave_func;
static Function *jlegal_func;
static Function *jlalloc_pool_func;
static Function *jlalloc_big_func;
static Function *jl_alloc_obj_func;
static Function *jlisa_func;
static Function *jlsubtype_func;
static Function *jlapplytype_func;
Expand Down Expand Up @@ -6372,24 +6371,19 @@ static void init_julia_llvm_env(Module *m)
"jl_instantiate_type_in_env", m);
add_named_global(jlapplytype_func, &jl_instantiate_type_in_env);

std::vector<Type*> alloc_pool_args(0);
alloc_pool_args.push_back(T_pint8);
alloc_pool_args.push_back(T_int32);
alloc_pool_args.push_back(T_int32);
jlalloc_pool_func =
Function::Create(FunctionType::get(T_prjlvalue, alloc_pool_args, false),
Function::ExternalLinkage,
"jl_gc_pool_alloc", m);
add_named_global(jlalloc_pool_func, &jl_gc_pool_alloc);

std::vector<Type*> alloc_big_args(0);
alloc_big_args.push_back(T_pint8);
alloc_big_args.push_back(T_size);
jlalloc_big_func =
Function::Create(FunctionType::get(T_prjlvalue, alloc_big_args, false),
Function::ExternalLinkage,
"jl_gc_big_alloc", m);
add_named_global(jlalloc_big_func, &jl_gc_big_alloc);
std::vector<Type*> gc_alloc_args(0);
gc_alloc_args.push_back(T_pint8);
gc_alloc_args.push_back(T_size);
gc_alloc_args.push_back(T_prjlvalue);
jl_alloc_obj_func = Function::Create(FunctionType::get(T_prjlvalue, gc_alloc_args, false),
Function::ExternalLinkage,
"julia.gc_alloc_obj");
#if JL_LLVM_VERSION >= 50000
jl_alloc_obj_func->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
#else
jl_alloc_obj_func->addAttribute(AttributeSet::ReturnIndex, Attribute::NoAlias);
#endif
add_named_global(jl_alloc_obj_func, (void*)NULL, /*dllimport*/false);

std::vector<Type *> dlsym_args(0);
dlsym_args.push_back(T_pint8);
Expand Down
6 changes: 1 addition & 5 deletions src/intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,11 +325,7 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
}

int alignment;
if (x.isboxed) {
// julia's gc gives 16-byte aligned addresses
alignment = 16;
}
else if (jt) {
if (jt) {
alignment = julia_alignment(p, jt, 0);
}
else {
Expand Down
7 changes: 7 additions & 0 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level)
// effectiveness of the optimization, but should retain correctness.
#if JL_LLVM_VERSION < 50000
PM->add(createLowerExcHandlersPass());
PM->add(createAllocOptPass());
PM->add(createLateLowerGCFramePass());
// Remove dead use of ptls
PM->add(createDeadCodeEliminationPass());
Expand All @@ -161,6 +162,12 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level)
PM->add(createAlwaysInlinerPass()); // Respect always_inline
#endif

#if JL_LLVM_VERSION >= 50000
// Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard time
// merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt`
// pass.
PM->add(createAllocOptPass());
#endif
PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
PM->add(createSROAPass()); // Break up aggregate allocas
PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
Expand Down
1 change: 1 addition & 0 deletions src/jitlayers.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ Pass *createLateLowerGCFramePass();
Pass *createLowerExcHandlersPass();
Pass *createGCInvariantVerifierPass(bool Strong);
Pass *createPropagateJuliaAddrspaces();
Pass *createAllocOptPass();
// Whether the Function is an llvm or julia intrinsic.
static inline bool isIntrinsicFunction(Function *F)
{
Expand Down
Loading

0 comments on commit e1a604e

Please sign in to comment.