Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Investigate escape analysis for array allocations #43107

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/src/manual/performance-tips.md
Original file line number Diff line number Diff line change
Expand Up @@ -949,7 +949,7 @@ Sometimes you can circumvent the need to allocate memory on each function call b
the output. As a trivial example, compare

```jldoctest prealloc
julia> function xinc(x)
julia> @noinline function xinc(x)
return [x, x+1, x+2]
end;

Expand All @@ -966,7 +966,7 @@ julia> function loopinc()
with

```jldoctest prealloc
julia> function xinc!(ret::AbstractVector{T}, x::T) where T
julia> @noinline function xinc!(ret::AbstractVector{T}, x::T) where T
ret[1] = x
ret[2] = x+1
ret[3] = x+2
Expand Down
10 changes: 6 additions & 4 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ CODEGEN_SRCS := codegen llvm-ptls
RUNTIME_CODEGEN_SRCS := jitlayers aotcompile debuginfo disasm llvm-simdloop llvm-muladd \
llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering \
llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \
llvm-multiversioning llvm-alloc-opt cgmemmgr llvm-remove-addrspaces \
llvm-multiversioning llvm-alloc-opt llvm-alloc-helpers cgmemmgr llvm-remove-addrspaces \
llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures
FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
CG_LLVM_LIBS := all
Expand Down Expand Up @@ -278,7 +278,7 @@ $(BUILDDIR)/aotcompile.o $(BUILDDIR)/aotcompile.dbg.obj: $(SRCDIR)/jitlayers.h $
$(BUILDDIR)/ast.o $(BUILDDIR)/ast.dbg.obj: $(BUILDDIR)/julia_flisp.boot.inc $(SRCDIR)/flisp/*.h
$(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/iddict.c $(SRCDIR)/builtin_proto.h
$(BUILDDIR)/codegen.o $(BUILDDIR)/codegen.dbg.obj: $(addprefix $(SRCDIR)/,\
intrinsics.cpp jitlayers.h intrinsics.h codegen_shared.h cgutils.cpp ccall.cpp abi_*.cpp processor.h builtin_proto.h)
intrinsics.cpp jitlayers.h intrinsics.h codegen_shared.h cgutils.cpp ccall.cpp abi_*.cpp processor.h builtin_proto.h llvm-alloc-helpers.h)
$(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,debuginfo.h processor.h)
$(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h
$(BUILDDIR)/dump.o $(BUILDDIR)/dump.dbg.obj: $(addprefix $(SRCDIR)/,common_symbols1.inc common_symbols2.inc builtin_proto.h serialize.h)
Expand All @@ -290,14 +290,16 @@ $(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_pro
$(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/jltypes.o $(BUILDDIR)/jltypes.dbg.obj: $(SRCDIR)/builtin_proto.h
$(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/codegen_shared.h $(BUILDDIR)/julia_version.h
$(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-pass-helpers.h
$(BUILDDIR)/llvm-alloc-helpers.o $(BUILDDIR)/llvm-alloc-helpers.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
$(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h
$(BUILDDIR)/llvm-gc-invariant-verifier.o $(BUILDDIR)/llvm-gc-invariant-verifier.dbg.obj: $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-alloc-helpers.h
$(BUILDDIR)/llvm-lower-handlers.o $(BUILDDIR)/llvm-lower-handlers.dbg.obj: $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/llvm-multiversioning.o $(BUILDDIR)/llvm-multiversioning.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/processor.h
$(BUILDDIR)/llvm-pass-helpers.o $(BUILDDIR)/llvm-pass-helpers.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/llvm-ptls.o $(BUILDDIR)/llvm-ptls.dbg.obj: $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/llvm-julia-licm.o $(BUILDDIR)/llvm-julia-licm.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
$(BUILDDIR)/processor.o $(BUILDDIR)/processor.dbg.obj: $(addprefix $(SRCDIR)/,processor_*.cpp processor.h features_*.h)
$(BUILDDIR)/signal-handling.o $(BUILDDIR)/signal-handling.dbg.obj: $(addprefix $(SRCDIR)/,signals-*.c)
$(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/processor.h $(SRCDIR)/builtin_proto.h
Expand Down
5 changes: 5 additions & 0 deletions src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -730,7 +730,9 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
// Subsequent passes not stripping metadata from terminator
PM->add(createInstSimplifyLegacyPass());
PM->add(createIndVarSimplifyPass());
PM->add(createCFGSimplificationPass()); // See note above, don't hoist instructions before LV
PM->add(createLoopDeletionPass());
PM->add(createLoopIdiomPass());
PM->add(createSimpleLoopUnrollPass());

// Run our own SROA on heap objects before LLVM's
Expand All @@ -744,6 +746,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
PM->add(createGVNPass());
PM->add(createMemCpyOptPass());
PM->add(createSCCPPass());
PM->add(createInductiveRangeCheckEliminationPass());

// Run instcombine after redundancy elimination to exploit opportunities
// opened up by them.
Expand All @@ -756,6 +759,8 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,

// More dead allocation (store) deletion before loop optimization
// consider removing this:
// This is now useful for optimizing arrays whose out-of-bounds
// checks were eliminated by the preceding IRCE pass
PM->add(createAllocOptPass());
// see if all of the constant folding has exposed more loops
// to simplification and deletion
Expand Down
40 changes: 40 additions & 0 deletions src/ccall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1747,6 +1747,46 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
JL_GC_POP();
return mark_or_box_ccall_result(ctx, ret, retboxed, rt, unionall, static_rt);
}
} else if (is_libjulia_func(jl_alloc_array_1d)) {
jl_cgval_t retval = sig.emit_a_ccall(ctx, symarg, argv, gc_uses, static_rt);
if (auto call = dyn_cast<CallInst>(retval.V)) {
AttrBuilder builder;
builder.addAttribute(Attribute::NoAlias);
builder.addAttribute(Attribute::NonNull);
call->setAttributes(call->getAttributes().addAttributes(ctx.builder.getContext(), AttributeList::ReturnIndex, builder));
}
JL_GC_POP();
return retval;
} else if (is_libjulia_func(jl_alloc_array_2d)) {
jl_cgval_t retval = sig.emit_a_ccall(ctx, symarg, argv, gc_uses, static_rt);
if (auto call = dyn_cast<CallInst>(retval.V)) {
AttrBuilder builder;
builder.addAttribute(Attribute::NoAlias);
builder.addAttribute(Attribute::NonNull);
call->setAttributes(call->getAttributes().addAttributes(ctx.builder.getContext(), AttributeList::ReturnIndex, builder));
}
JL_GC_POP();
return retval;
} else if (is_libjulia_func(jl_alloc_array_3d)) {
jl_cgval_t retval = sig.emit_a_ccall(ctx, symarg, argv, gc_uses, static_rt);
if (auto call = dyn_cast<CallInst>(retval.V)) {
AttrBuilder builder;
builder.addAttribute(Attribute::NoAlias);
builder.addAttribute(Attribute::NonNull);
call->setAttributes(call->getAttributes().addAttributes(ctx.builder.getContext(), AttributeList::ReturnIndex, builder));
}
JL_GC_POP();
return retval;
} else if (is_libjulia_func(jl_new_array)) {
jl_cgval_t retval = sig.emit_a_ccall(ctx, symarg, argv, gc_uses, static_rt);
if (auto call = dyn_cast<CallInst>(retval.V)) {
AttrBuilder builder;
builder.addAttribute(Attribute::NoAlias);
builder.addAttribute(Attribute::NonNull);
call->setAttributes(call->getAttributes().addAttributes(ctx.builder.getContext(), AttributeList::ReturnIndex, builder));
}
JL_GC_POP();
return retval;
}

jl_cgval_t retval = sig.emit_a_ccall(
Expand Down
20 changes: 16 additions & 4 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ typedef Instruction TerminatorInst;
#include "codegen_shared.h"
#include "processor.h"
#include "julia_assert.h"
#include "llvm-alloc-helpers.h"

JL_STREAM *dump_emitted_mi_name_stream = NULL;
extern "C" JL_DLLEXPORT
Expand Down Expand Up @@ -2790,10 +2791,21 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
PHINode *data_owner = NULL; // owner object against which the write barrier must check
if (isboxed || (jl_is_datatype(ety) && ((jl_datatype_t*)ety)->layout->npointers > 0)) { // if elements are just bits, don't need a write barrier
Value *aryv = boxed(ctx, ary);
Value *flags = emit_arrayflags(ctx, ary);
// the owner of the data is ary itself except if ary->how == 3
flags = ctx.builder.CreateAnd(flags, 3);
Value *is_owned = ctx.builder.CreateICmpEQ(flags, ConstantInt::get(T_int16, 3));
Value *is_owned;
do {
if (auto call = dyn_cast<CallInst>(aryv)) {
jl_alloc::AllocIdInfo info;
if (jl_alloc::getAllocIdInfo(info, call, nullptr)) {
//This is an array allocation function, ary->how cannot be 3
is_owned = ConstantInt::getFalse(ctx.builder.getContext());
break;
}
}
Value *flags = emit_arrayflags(ctx, ary);
// the owner of the data is ary itself except if ary->how == 3
flags = ctx.builder.CreateAnd(flags, 3);
is_owned = ctx.builder.CreateICmpEQ(flags, ConstantInt::get(T_int16, 3));
} while (0);
BasicBlock *curBB = ctx.builder.GetInsertBlock();
BasicBlock *ownedBB = BasicBlock::Create(jl_LLVMContext, "array_owned", ctx.f);
BasicBlock *mergeBB = BasicBlock::Create(jl_LLVMContext, "merge_own", ctx.f);
Expand Down
1 change: 1 addition & 0 deletions src/debuginfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,7 @@ static objfileentry_t &find_object_file(uint64_t fbase, StringRef fname) JL_NOTS
if (DebugInfo) {
errorobj = std::move(DebugInfo);
// Yes, we've checked, and yes LLVM want us to check again.
static_cast<bool>(errorobj);
assert(errorobj);
debugobj = errorobj->getBinary();
}
Expand Down
Loading