Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

codegen: optimize returning an argument #33829

Merged
merged 1 commit into from
Nov 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 59 additions & 27 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5209,7 +5209,7 @@ static Function *jl_cfunction_object(jl_value_t *ff, jl_value_t *declrt, jl_tupl
}

// generate a julia-callable function that calls f (AKA lam)
static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, const jl_returninfo_t &f, StringRef funcName, Module *M)
static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, const jl_returninfo_t &f, int retarg, StringRef funcName, Module *M)
{
Function *w = Function::Create(jl_func_sig, GlobalVariable::ExternalLinkage, funcName, M);
add_return_attr(w, Attribute::NonNull);
Expand All @@ -5234,7 +5234,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
ctx.builder.SetCurrentDebugLocation(noDbg);
allocate_gc_frame(ctx, b0);

// TODO: replace this with emit_call_specfun_other
// TODO: replace this with emit_call_specfun_other?
FunctionType *ftype = f.decl->getFunctionType();
size_t nfargs = ftype->getNumParams();
Value **args = (Value**) alloca(nfargs * sizeof(Value*));
Expand Down Expand Up @@ -5270,7 +5270,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
theArg = funcArg;
}
else {
Value *argPtr = ctx.builder.CreateInBoundsGEP(argArray, ConstantInt::get(T_size, i - 1));
Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, argArray, i - 1);
theArg = maybe_mark_load_dereferenceable(ctx.builder.CreateLoad(argPtr), false, ty);
}
if (!isboxed) {
Expand All @@ -5286,28 +5286,38 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
call->setAttributes(f.decl->getAttributes());

jl_cgval_t retval;
switch (f.cc) {
case jl_returninfo_t::Boxed:
retval = mark_julia_type(ctx, call, true, jlretty);
break;
case jl_returninfo_t::Register:
retval = mark_julia_type(ctx, call, false, jlretty);
break;
case jl_returninfo_t::SRet:
retval = mark_julia_slot(result, jlretty, NULL, tbaa_stack);
break;
case jl_returninfo_t::Union:
// result is technically not right here, but `boxed` will only look at it
// for the unboxed values, so it's ok.
retval = mark_julia_slot(result,
jlretty,
ctx.builder.CreateExtractValue(call, 1),
tbaa_stack);
retval.Vboxed = ctx.builder.CreateExtractValue(call, 0);
break;
case jl_returninfo_t::Ghosts:
retval = mark_julia_slot(NULL, jlretty, call, tbaa_stack);
break;
if (retarg != -1) {
Value *theArg;
if (retarg == 0)
theArg = funcArg;
else
theArg = ctx.builder.CreateLoad(ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, argArray, retarg - 1));
retval = mark_julia_type(ctx, theArg, true, jl_any_type);
}
else {
switch (f.cc) {
case jl_returninfo_t::Boxed:
retval = mark_julia_type(ctx, call, true, jlretty);
break;
case jl_returninfo_t::Register:
retval = mark_julia_type(ctx, call, false, jlretty);
break;
case jl_returninfo_t::SRet:
retval = mark_julia_slot(result, jlretty, NULL, tbaa_stack);
break;
case jl_returninfo_t::Union:
// result is technically not right here, but `boxed` will only look at it
// for the unboxed values, so it's ok.
retval = mark_julia_slot(result,
jlretty,
ctx.builder.CreateExtractValue(call, 1),
tbaa_stack);
retval.Vboxed = ctx.builder.CreateExtractValue(call, 0);
break;
case jl_returninfo_t::Ghosts:
retval = mark_julia_slot(NULL, jlretty, call, tbaa_stack);
break;
}
}
ctx.builder.CreateRet(boxed(ctx, retval));
assert(!ctx.roots);
Expand Down Expand Up @@ -5641,9 +5651,31 @@ static std::unique_ptr<Module> emit_function(
has_sret = (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union);
jl_init_function(f);

// common pattern: see if all return statements are an argument in that
// case the apply-generic call can re-use the original box for the return
int retarg = [stmts, nreq]() {
int retarg = -1;
for (size_t i = 0; i < jl_array_len(stmts); ++i) {
jl_value_t *stmt = jl_array_ptr_ref(stmts, i);
if (jl_is_expr(stmt) && ((jl_expr_t*)stmt)->head == return_sym) {
stmt = jl_exprarg(stmt, 0);
if (!jl_is_slot(stmt))
return -1;
unsigned sl = jl_slot_number(stmt) - 1;
if (sl >= nreq)
return -1;
if (retarg == -1)
retarg = sl;
else if ((unsigned)retarg != sl)
return -1;
}
}
return retarg;
}();

std::stringstream wrapName;
wrapName << "jfptr_" << unadorned_name << "_" << globalUnique;
Function *fwrap = gen_invoke_wrapper(lam, jlrettype, returninfo, wrapName.str(), M);
Function *fwrap = gen_invoke_wrapper(lam, jlrettype, returninfo, retarg, wrapName.str(), M);
declarations->functionObject = strdup(fwrap->getName().str().c_str());
}
else {
Expand Down Expand Up @@ -6199,7 +6231,7 @@ static std::unique_ptr<Module> emit_function(
std::vector<Metadata*> scope_stack;
std::vector<MDNode*> scope_list_stack;
{
size_t nstmts = jl_array_len(src->code);
size_t nstmts = jl_array_len(stmts);
aliasscopes.resize(nstmts + 1, nullptr);
MDBuilder mbuilder(jl_LLVMContext);
MDNode *alias_domain = mbuilder.createAliasScopeDomain(ctx.name);
Expand Down
11 changes: 11 additions & 0 deletions test/compiler/codegen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,17 @@ end
f_dict_hash_alloc(); g_dict_hash_alloc();
@test (@allocated f_dict_hash_alloc()) == (@allocated g_dict_hash_alloc())

# returning an argument shouldn't alloc a new box
@noinline f33829(x) = (global called33829 = true; x)
g33829() = @allocated Base.inferencebarrier(f33829)(1.1,)
g33829() # warm up
@test (@allocated g33829()) == 0
@test called33829 # make sure there was a global side effect so it's hard for this call to simply be removed
let src = get_llvm(f33829, Tuple{Float64}, true, true)
@test occursin(r"call [^(]*double @", src)
@test !occursin(r"call [^(]*\%jl_value_t", src)
end

let io = IOBuffer()
# Test for the f(args...) = g(args...) generic codegen optimization
code_llvm(io, Base.vect, Tuple{Vararg{Union{Float64, Int64}}})
Expand Down