From c06292fbae900798e8ba7dfa4277df3e3d835a14 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 12 Nov 2019 00:10:24 -0500 Subject: [PATCH] codegen: optimize returning an argument It's not unusual to have code that simply returns an argument. When used in generic code though, that currently might involve copying the value to a new box. We sometimes define functions like `identity(nospecialize x) = x` to work around that, (which is preferable anyways for other reasons), but it's better for codegen to also just be aware of this pattern. --- src/codegen.cpp | 86 +++++++++++++++++++++++++++------------- test/compiler/codegen.jl | 11 +++++ 2 files changed, 70 insertions(+), 27 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 9fc1dc3b02935..980d1bb166e78 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -5209,7 +5209,7 @@ static Function *jl_cfunction_object(jl_value_t *ff, jl_value_t *declrt, jl_tupl } // generate a julia-callable function that calls f (AKA lam) -static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, const jl_returninfo_t &f, StringRef funcName, Module *M) +static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, const jl_returninfo_t &f, int retarg, StringRef funcName, Module *M) { Function *w = Function::Create(jl_func_sig, GlobalVariable::ExternalLinkage, funcName, M); add_return_attr(w, Attribute::NonNull); @@ -5234,7 +5234,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret ctx.builder.SetCurrentDebugLocation(noDbg); allocate_gc_frame(ctx, b0); - // TODO: replace this with emit_call_specfun_other + // TODO: replace this with emit_call_specfun_other? FunctionType *ftype = f.decl->getFunctionType(); size_t nfargs = ftype->getNumParams(); Value **args = (Value**) alloca(nfargs * sizeof(Value*)); @@ -5270,7 +5270,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret theArg = funcArg; } else { - Value *argPtr = ctx.builder.CreateInBoundsGEP(argArray, ConstantInt::get(T_size, i - 1)); + Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, argArray, i - 1); theArg = maybe_mark_load_dereferenceable(ctx.builder.CreateLoad(argPtr), false, ty); } if (!isboxed) { @@ -5286,28 +5286,38 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret call->setAttributes(f.decl->getAttributes()); jl_cgval_t retval; - switch (f.cc) { - case jl_returninfo_t::Boxed: - retval = mark_julia_type(ctx, call, true, jlretty); - break; - case jl_returninfo_t::Register: - retval = mark_julia_type(ctx, call, false, jlretty); - break; - case jl_returninfo_t::SRet: - retval = mark_julia_slot(result, jlretty, NULL, tbaa_stack); - break; - case jl_returninfo_t::Union: - // result is technically not right here, but `boxed` will only look at it - // for the unboxed values, so it's ok. - retval = mark_julia_slot(result, - jlretty, - ctx.builder.CreateExtractValue(call, 1), - tbaa_stack); - retval.Vboxed = ctx.builder.CreateExtractValue(call, 0); - break; - case jl_returninfo_t::Ghosts: - retval = mark_julia_slot(NULL, jlretty, call, tbaa_stack); - break; + if (retarg != -1) { + Value *theArg; + if (retarg == 0) + theArg = funcArg; + else + theArg = ctx.builder.CreateLoad(ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, argArray, retarg - 1)); + retval = mark_julia_type(ctx, theArg, true, jl_any_type); + } + else { + switch (f.cc) { + case jl_returninfo_t::Boxed: + retval = mark_julia_type(ctx, call, true, jlretty); + break; + case jl_returninfo_t::Register: + retval = mark_julia_type(ctx, call, false, jlretty); + break; + case jl_returninfo_t::SRet: + retval = mark_julia_slot(result, jlretty, NULL, tbaa_stack); + break; + case jl_returninfo_t::Union: + // result is technically not right here, but `boxed` will only look at it + // for the unboxed values, so it's ok. + retval = mark_julia_slot(result, + jlretty, + ctx.builder.CreateExtractValue(call, 1), + tbaa_stack); + retval.Vboxed = ctx.builder.CreateExtractValue(call, 0); + break; + case jl_returninfo_t::Ghosts: + retval = mark_julia_slot(NULL, jlretty, call, tbaa_stack); + break; + } } ctx.builder.CreateRet(boxed(ctx, retval)); assert(!ctx.roots); @@ -5641,9 +5651,31 @@ static std::unique_ptr emit_function( has_sret = (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union); jl_init_function(f); + // common pattern: see if all return statements are an argument in that + // case the apply-generic call can re-use the original box for the return + int retarg = [stmts, nreq]() { + int retarg = -1; + for (size_t i = 0; i < jl_array_len(stmts); ++i) { + jl_value_t *stmt = jl_array_ptr_ref(stmts, i); + if (jl_is_expr(stmt) && ((jl_expr_t*)stmt)->head == return_sym) { + stmt = jl_exprarg(stmt, 0); + if (!jl_is_slot(stmt)) + return -1; + unsigned sl = jl_slot_number(stmt) - 1; + if (sl >= nreq) + return -1; + if (retarg == -1) + retarg = sl; + else if ((unsigned)retarg != sl) + return -1; + } + } + return retarg; + }(); + std::stringstream wrapName; wrapName << "jfptr_" << unadorned_name << "_" << globalUnique; - Function *fwrap = gen_invoke_wrapper(lam, jlrettype, returninfo, wrapName.str(), M); + Function *fwrap = gen_invoke_wrapper(lam, jlrettype, returninfo, retarg, wrapName.str(), M); declarations->functionObject = strdup(fwrap->getName().str().c_str()); } else { @@ -6199,7 +6231,7 @@ static std::unique_ptr emit_function( std::vector scope_stack; std::vector scope_list_stack; { - size_t nstmts = jl_array_len(src->code); + size_t nstmts = jl_array_len(stmts); aliasscopes.resize(nstmts + 1, nullptr); MDBuilder mbuilder(jl_LLVMContext); MDNode *alias_domain = mbuilder.createAliasScopeDomain(ctx.name); diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl index 34f32cca47750..00c3704ee8f34 100644 --- a/test/compiler/codegen.jl +++ b/test/compiler/codegen.jl @@ -392,6 +392,17 @@ end f_dict_hash_alloc(); g_dict_hash_alloc(); @test (@allocated f_dict_hash_alloc()) == (@allocated g_dict_hash_alloc()) +# returning an argument shouldn't alloc a new box +@noinline f33829(x) = (global called33829 = true; x) +g33829() = @allocated Base.inferencebarrier(f33829)(1.1,) +g33829() # warm up +@test (@allocated g33829()) == 0 +@test called33829 # make sure there was a global side effect so it's hard for this call to simply be removed +let src = get_llvm(f33829, Tuple{Float64}, true, true) + @test occursin(r"call [^(]*double @", src) + @test !occursin(r"call [^(]*\%jl_value_t", src) +end + let io = IOBuffer() # Test for the f(args...) = g(args...) generic codegen optimization code_llvm(io, Base.vect, Tuple{Vararg{Union{Float64, Int64}}})