Skip to content

Commit

Permalink
Improve codegen for certain varargs functions
Browse files Browse the repository at this point in the history
This just handles the simple case where the varargs arguments are all
isbits_spec. More complicated additions are of course possible, but this
already gets a bunch of interesting cases.

Before:
```
julia> @noinline f(x::Vararg{Int, N}) where {N} = x[1]
f (generic function with 1 method)

julia> g(x) = f(x)
g (generic function with 1 method)

julia> @code_llvm f(1)

; Function f
; Location: REPL[1]
define %jl_value_t addrspace(10)* @japi1_f_62200(%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32) #0 {
top:
  %3 = alloca %jl_value_t addrspace(10)**, align 8
  store volatile %jl_value_t addrspace(10)** %1, %jl_value_t addrspace(10)*** %3, align 8
; Location: REPL[1]:1
  %4 = icmp eq i32 %2, 0
  br i1 %4, label %fail, label %pass

fail:                                             ; preds = %top
  call void @jl_bounds_error_tuple_int(%jl_value_t addrspace(10)** %1, i64 0, i64 1)
  unreachable

pass:                                             ; preds = %top
  %5 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %1, align 8
  ret %jl_value_t addrspace(10)* %5
}

julia> @code_llvm g(1)

; Function g
; Location: REPL[2]
define i64 @julia_g_62202(i64) #0 {
top:
  %1 = alloca %jl_value_t addrspace(10)*, align 8
  %gcframe1 = alloca [3 x %jl_value_t addrspace(10)*], align 8
  %gcframe1.sub = getelementptr inbounds [3 x %jl_value_t addrspace(10)*], [3 x %jl_value_t addrspace(10)*]* %gcframe1, i64 0, i64 0
  %2 = getelementptr inbounds [3 x %jl_value_t addrspace(10)*], [3 x %jl_value_t addrspace(10)*]* %gcframe1, i64 0, i64 1
  %3 = bitcast %jl_value_t addrspace(10)** %2 to i8*
  call void @llvm.memset.p0i8.i32(i8* %3, i8 0, i32 16, i32 8, i1 false)
  %4 = call %jl_value_t*** @jl_get_ptls_states() #4
  %5 = bitcast [3 x %jl_value_t addrspace(10)*]* %gcframe1 to i64*
  store i64 2, i64* %5, align 8
  %6 = bitcast %jl_value_t*** %4 to i64*
  %7 = load i64, i64* %6, align 8
  %8 = getelementptr [3 x %jl_value_t addrspace(10)*], [3 x %jl_value_t addrspace(10)*]* %gcframe1, i64 0, i64 1
  %9 = bitcast %jl_value_t addrspace(10)** %8 to i64*
  store i64 %7, i64* %9, align 8
  %10 = bitcast %jl_value_t*** %4 to %jl_value_t addrspace(10)***
  store %jl_value_t addrspace(10)** %gcframe1.sub, %jl_value_t addrspace(10)*** %10, align 8
; Location: REPL[2]:1
  %11 = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %0)
  %12 = getelementptr [3 x %jl_value_t addrspace(10)*], [3 x %jl_value_t addrspace(10)*]* %gcframe1, i64 0, i64 2
  store %jl_value_t addrspace(10)* %11, %jl_value_t addrspace(10)** %12, align 8
  store %jl_value_t addrspace(10)* %11, %jl_value_t addrspace(10)** %1, align 8
  %13 = call %jl_value_t addrspace(10)* @japi1_f_62202(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 4511449272 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** nonnull %1, i32 1)
  %14 = bitcast %jl_value_t addrspace(10)* %13 to i64 addrspace(10)*
  %15 = load i64, i64 addrspace(10)* %14, align 8
  %16 = load i64, i64* %9, align 8
  store i64 %16, i64* %6, align 8
  ret i64 %15
}
```

After:
```
julia> @code_llvm f(1)

define i64 @julia_f_63017(i64) #0 {
top:
  ret i64 %0
}

julia> @code_llvm g(1)

define i64 @julia_g_63053(i64) #0 {
top:
  %1 = call i64 @julia_f_63054(i64 %0)
  ret i64 %1
}
```

Part of #5402
  • Loading branch information
Keno committed Sep 13, 2017
1 parent fdff5b4 commit adf8e0b
Show file tree
Hide file tree
Showing 2 changed files with 151 additions and 97 deletions.
13 changes: 6 additions & 7 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2234,13 +2234,12 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
{
assert(jl_is_datatype(ty));
assert(jl_is_leaf_type(ty));
assert(nargs > 0);
jl_datatype_t *sty = (jl_datatype_t*)ty;
size_t nf = jl_datatype_nfields(sty);
if (nf > 0) {
if (jl_isbits(sty)) {
Type *lt = julia_type_to_llvm(ty);
unsigned na = (nargs - 1 < nf) ? (nargs - 1) : nf;
unsigned na = nargs < nf ? nargs : nf;

// whether we should perform the initialization with the struct as a IR value
// or instead initialize the stack buffer with stores
Expand All @@ -2260,7 +2259,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg

for (unsigned i = 0; i < na; i++) {
jl_value_t *jtype = jl_svecref(sty->types, i);
const jl_cgval_t &fval_info = argv[i + 1];
const jl_cgval_t &fval_info = argv[i];
emit_typecheck(ctx, fval_info, jtype, "new");
Type *fty;
if (type_is_ghost(lt))
Expand Down Expand Up @@ -2334,12 +2333,12 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
}
bool need_wb = false;
// TODO: verify that nargs <= nf (currently handled by front-end)
for (size_t i = 1; i < nargs; i++) {
for (size_t i = 0; i < nargs; i++) {
const jl_cgval_t &rhs = argv[i];
if (jl_field_isptr(sty, i - 1) && !rhs.isboxed)
if (jl_field_isptr(sty, i) && !rhs.isboxed)
need_wb = true;
emit_typecheck(ctx, rhs, jl_svecref(sty->types, i - 1), "new");
emit_setfield(ctx, sty, strctinfo, i - 1, rhs, false, need_wb);
emit_typecheck(ctx, rhs, jl_svecref(sty->types, i), "new");
emit_setfield(ctx, sty, strctinfo, i, rhs, false, need_wb);
}
return strctinfo;
}
Expand Down
235 changes: 145 additions & 90 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2358,7 +2358,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
return true;
}
if (jl_is_tuple_type(rt) && jl_is_leaf_type(rt) && nargs == jl_datatype_nfields(rt)) {
*ret = emit_new_struct(ctx, rt, nargs + 1, argv);
*ret = emit_new_struct(ctx, rt, nargs, &argv[1]);
return true;
}
}
Expand Down Expand Up @@ -3382,6 +3382,69 @@ static Value *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, bool
return NULL;
}

static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Value *isboxed, jl_cgval_t rval_info)
{
if (vi.usedUndef)
store_def_flag(ctx, vi, true);

if (!vi.value.constant) { // check that this is not a virtual store
assert(vi.value.ispointer() || (vi.pTIndex && vi.value.V == NULL));
// store value
if (vi.value.V == NULL) {
// all ghost values in destination - nothing to copy or store
}
else if (rval_info.constant || !rval_info.ispointer()) {
if (rval_info.isghost) {
// all ghost values in source - nothing to copy or store
}
else {
if (rval_info.typ != vi.value.typ && !vi.pTIndex && !rval_info.TIndex) {
// isbits cast-on-assignment is invalid. this branch should be dead-code.
CreateTrap(ctx.builder);
}
else {
Value *dest = vi.value.V;
if (vi.pTIndex)
ctx.builder.CreateStore(UndefValue::get(cast<AllocaInst>(vi.value.V)->getAllocatedType()), vi.value.V);
Type *store_ty = julia_type_to_llvm(rval_info.constant ? jl_typeof(rval_info.constant) : rval_info.typ);
Type *dest_ty = store_ty->getPointerTo();
if (dest_ty != dest->getType())
dest = emit_bitcast(ctx, dest, dest_ty);
tbaa_decorate(tbaa_stack, ctx.builder.CreateStore(
emit_unbox(ctx, store_ty, rval_info, rval_info.typ),
dest,
vi.isVolatile));
}
}
}
else {
MDNode *tbaa = rval_info.tbaa;
// the memcpy intrinsic does not allow to specify different alias tags
// for the load part (x.tbaa) and the store part (tbaa_stack).
// since the tbaa lattice has to be a tree we have unfortunately
// x.tbaa ∪ tbaa_stack = tbaa_root if x.tbaa != tbaa_stack
if (tbaa != tbaa_stack)
tbaa = NULL;
if (vi.pTIndex == NULL) {
assert(jl_is_leaf_type(vi.value.typ));
Value *copy_bytes = ConstantInt::get(T_int32, jl_datatype_size(vi.value.typ));
ctx.builder.CreateMemCpy(vi.value.V,
data_pointer(ctx, rval_info, T_pint8),
copy_bytes,
jl_datatype_align(rval_info.typ),
vi.isVolatile,
tbaa);
}
else {
emit_unionmove(ctx, vi.value.V, rval_info, isboxed, vi.isVolatile, tbaa);
}
}
}
else {
assert(vi.pTIndex == NULL);
}
}

static void emit_assignment(jl_codectx_t &ctx, jl_value_t *l, jl_value_t *r)
{
if (jl_is_ssavalue(l)) {
Expand Down Expand Up @@ -3531,65 +3594,7 @@ static void emit_assignment(jl_codectx_t &ctx, jl_value_t *l, jl_value_t *r)

// store unboxed variables
if (!vi.boxroot || (vi.pTIndex && rval_info.TIndex)) {
if (vi.usedUndef)
store_def_flag(ctx, vi, true);

if (!vi.value.constant) { // check that this is not a virtual store
assert(vi.value.ispointer() || (vi.pTIndex && vi.value.V == NULL));
// store value
if (vi.value.V == NULL) {
// all ghost values in destination - nothing to copy or store
}
else if (rval_info.constant || !rval_info.ispointer()) {
if (rval_info.isghost) {
// all ghost values in source - nothing to copy or store
}
else {
if (rval_info.typ != vi.value.typ && !vi.pTIndex && !rval_info.TIndex) {
// isbits cast-on-assignment is invalid. this branch should be dead-code.
CreateTrap(ctx.builder);
}
else {
Value *dest = vi.value.V;
if (vi.pTIndex)
ctx.builder.CreateStore(UndefValue::get(cast<AllocaInst>(vi.value.V)->getAllocatedType()), vi.value.V);
Type *store_ty = julia_type_to_llvm(rval_info.constant ? jl_typeof(rval_info.constant) : rval_info.typ);
Type *dest_ty = store_ty->getPointerTo();
if (dest_ty != dest->getType())
dest = emit_bitcast(ctx, dest, dest_ty);
tbaa_decorate(tbaa_stack, ctx.builder.CreateStore(
emit_unbox(ctx, store_ty, rval_info, rval_info.typ),
dest,
vi.isVolatile));
}
}
}
else {
MDNode *tbaa = rval_info.tbaa;
// the memcpy intrinsic does not allow to specify different alias tags
// for the load part (x.tbaa) and the store part (tbaa_stack).
// since the tbaa lattice has to be a tree we have unfortunately
// x.tbaa ∪ tbaa_stack = tbaa_root if x.tbaa != tbaa_stack
if (tbaa != tbaa_stack)
tbaa = NULL;
if (vi.pTIndex == NULL) {
assert(jl_is_leaf_type(vi.value.typ));
Value *copy_bytes = ConstantInt::get(T_int32, jl_datatype_size(vi.value.typ));
ctx.builder.CreateMemCpy(vi.value.V,
data_pointer(ctx, rval_info, T_pint8),
copy_bytes,
jl_datatype_align(rval_info.typ),
vi.isVolatile,
tbaa);
}
else {
emit_unionmove(ctx, vi.value.V, rval_info, isboxed, vi.isVolatile, tbaa);
}
}
}
else {
assert(vi.pTIndex == NULL);
}
emit_vi_assignment_unboxed(ctx, vi, isboxed, rval_info);
}
}

Expand Down Expand Up @@ -3856,7 +3861,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr)
jl_is_datatype(jl_tparam0(ty)) &&
jl_is_leaf_type(jl_tparam0(ty))) {
assert(nargs <= jl_datatype_nfields(jl_tparam0(ty)) + 1);
return emit_new_struct(ctx, jl_tparam0(ty), nargs, argv);
return emit_new_struct(ctx, jl_tparam0(ty), nargs - 1, &argv[1]);
}
Value *typ = boxed(ctx, argv[0]);
Value *val = emit_jlcall(ctx, jlnew_func, typ, &argv[1], nargs - 1);
Expand Down Expand Up @@ -4539,12 +4544,15 @@ static Function *gen_jlcall_wrapper(jl_method_instance_t *lam, const jl_returnin
idx++;
break;
}
for (size_t i = 0; i < nargs; i++) {
size_t i = 0;
while (idx < nfargs) {
jl_value_t *ty = jl_nth_slot_type(lam->specTypes, i);
bool isboxed;
Type *lty = julia_type_to_llvm(ty, &isboxed);
if (lty != NULL && type_is_ghost(lty))
if (lty != NULL && type_is_ghost(lty)) {
i++;
continue;
}
Value *theArg;
if (i == 0) {
theArg = fArg;
Expand All @@ -4560,7 +4568,7 @@ static Function *gen_jlcall_wrapper(jl_method_instance_t *lam, const jl_returnin
}
assert(dyn_cast<UndefValue>(theArg) == NULL);
args[idx] = theArg;
idx++;
idx++; i++;
}
CallInst *call = ctx.builder.CreateCall(f.decl, ArrayRef<Value*>(&args[0], nfargs));
call->setAttributes(f.decl->getAttributes());
Expand Down Expand Up @@ -4595,9 +4603,9 @@ static Function *gen_jlcall_wrapper(jl_method_instance_t *lam, const jl_returnin
return w;
}

static bool uses_specsig(jl_value_t *sig, jl_value_t *rettype, bool needsparam, bool va, jl_code_info_t *src, bool prefer_specsig)
static bool uses_specsig(jl_value_t *sig, size_t nreq, jl_value_t *rettype, bool needsparam, bool va, jl_code_info_t *src, bool prefer_specsig)
{
if (va || needsparam)
if (needsparam)
return false;
if (!src || !jl_ast_flag_inferred((jl_array_t*)src))
return false;
Expand All @@ -4607,6 +4615,16 @@ static bool uses_specsig(jl_value_t *sig, jl_value_t *rettype, bool needsparam,
return false;
if (jl_nparams(sig) == 0)
return false;
if (va) {
if (jl_is_vararg_type(jl_tparam(sig, jl_nparams(sig)-1)))
return false;
// For now we can only handle va tuples that will end up being
// leaf types
for (size_t i = nreq; i < jl_nparams(sig); i++) {
if (!isbits_spec(jl_tparam(sig, i)))
return false;
}
}
// not invalid, consider if specialized signature is worthwhile
if (prefer_specsig)
return true;
Expand All @@ -4620,7 +4638,7 @@ static bool uses_specsig(jl_value_t *sig, jl_value_t *rettype, bool needsparam,
return true; // some elements of the union could be returned unboxed avoiding allocation
}
for (size_t i = 0; i < jl_nparams(sig); i++) {
if (isbits_spec(jl_tparam(sig, i), false)) { // assumes !va
if (isbits_spec(jl_tparam(sig, i), false)) {
return true;
}
}
Expand Down Expand Up @@ -4811,6 +4829,19 @@ static std::unique_ptr<Module> emit_function(
ctx.ssavalue_assigned.assign(n_ssavalues, false);
ctx.SAvalues.assign(n_ssavalues, jl_cgval_t());

bool needsparams = false;
if (jl_is_method(lam->def.method)) {
if (jl_svec_len(lam->def.method->sparam_syms) != jl_svec_len(lam->sparam_vals))
needsparams = true;
for (int i = 0; i < jl_svec_len(lam->sparam_vals); ++i) {
if (jl_is_typevar(jl_svecref(lam->sparam_vals, i)))
needsparams = true;
}
}

jl_value_t *jlrettype = lam->rettype;
bool specsig = uses_specsig(lam->specTypes, nreq, jlrettype, needsparams, va, src, params->prefer_specsig);

// step 3. some variable analysis
size_t i;
for (i = 0; i < nreq; i++) {
Expand All @@ -4825,7 +4856,18 @@ static std::unique_ptr<Module> emit_function(
if (va && ctx.vaSlot != -1) {
jl_varinfo_t &varinfo = ctx.slots[ctx.vaSlot];
varinfo.isArgument = true;
varinfo.value = mark_julia_type(ctx, (Value*)NULL, false, jl_tuple_type);
if (specsig) {
size_t nvargs = jl_nparams(lam->specTypes)-nreq;
jl_svec_t *tupargs = jl_alloc_svec(nvargs);
for (size_t i = nreq; i < jl_nparams(lam->specTypes); ++i) {
jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
jl_svecset(tupargs, i-nreq, argType);
}
jl_value_t *typ = (jl_value_t*)jl_apply_tuple_type(tupargs);
varinfo.value = mark_julia_type(ctx, (Value*)NULL, false, typ);
} else {
varinfo.value = mark_julia_type(ctx, (Value*)NULL, false, jl_tuple_type);
}
}

for (i = 0; i < vinfoslen; i++) {
Expand All @@ -4852,17 +4894,12 @@ static std::unique_ptr<Module> emit_function(
mark_volatile_vars(stmts, ctx.slots);

// step 4. determine function signature
bool needsparams = jl_is_method(lam->def.method)
? jl_svec_len(lam->def.method->sparam_syms) != jl_svec_len(lam->sparam_vals)
: false;
for (i = 0; !needsparams && i < jl_svec_len(lam->sparam_vals); i++) {
jl_value_t *e = jl_svecref(lam->sparam_vals, i);
if (jl_is_typevar(e))
needsparams = true;
}

jl_value_t *jlrettype = lam->rettype;
bool specsig = uses_specsig(lam->specTypes, jlrettype, needsparams, va, src, params->prefer_specsig);
if (!specsig)
ctx.nReqArgs--; // function not part of argArray in jlcall

Expand Down Expand Up @@ -5096,7 +5133,7 @@ static std::unique_ptr<Module> emit_function(
alloc_def_flag(ctx, varinfo);
continue;
}
else if (varinfo.isArgument) {
else if (varinfo.isArgument && !(specsig && i == (size_t)ctx.vaSlot)) {
// if we can unbox it, just use the input pointer
if (i != (size_t)ctx.vaSlot && isbits_spec(jt, false))
continue;
Expand Down Expand Up @@ -5180,6 +5217,27 @@ static std::unique_ptr<Module> emit_function(

// step 9. move args into local variables
Function::arg_iterator AI = f->arg_begin();

auto get_specsig_arg = [&](jl_value_t *argType, Type *llvmArgType, bool isboxed) {
jl_cgval_t theArg;
if (type_is_ghost(llvmArgType)) { // this argument is not actually passed
theArg = ghostValue(argType);
}
else if (llvmArgType->isAggregateType()) {
Argument *Arg = &*AI++;
maybe_mark_argument_dereferenceable(Arg, argType);
theArg = mark_julia_slot(Arg, argType, NULL, tbaa_const); // this argument is by-pointer
theArg.isimmutable = true;
}
else {
Argument *Arg = &*AI++;
if (isboxed)
maybe_mark_argument_dereferenceable(Arg, argType);
theArg = mark_julia_type(ctx, Arg, isboxed, argType);
}
return theArg;
};

if (ctx.has_sret)
AI++; // skip sret slot
for (i = 0; i < nreq; i++) {
Expand All @@ -5201,21 +5259,7 @@ static std::unique_ptr<Module> emit_function(
}
else {
if (specsig) {
if (type_is_ghost(llvmArgType)) { // this argument is not actually passed
theArg = ghostValue(argType);
}
else if (llvmArgType->isAggregateType()) {
Argument *Arg = &*AI++;
maybe_mark_argument_dereferenceable(Arg, argType);
theArg = mark_julia_slot(Arg, argType, NULL, tbaa_const); // this argument is by-pointer
theArg.isimmutable = true;
}
else {
Argument *Arg = &*AI++;
if (isboxed)
maybe_mark_argument_dereferenceable(Arg, argType);
theArg = mark_julia_type(ctx, Arg, isboxed, argType);
}
theArg = get_specsig_arg(argType, llvmArgType, isboxed);
}
else {
if (i == 0) {
Expand Down Expand Up @@ -5279,7 +5323,18 @@ static std::unique_ptr<Module> emit_function(
if (vi.value.constant || !vi.used) {
assert(vi.boxroot == NULL);
}
else {
else if (specsig) {
size_t nvargs = jl_nparams(lam->specTypes)-nreq;
jl_cgval_t vargs[nvargs];
for (size_t i = nreq; i < jl_nparams(lam->specTypes); ++i) {
jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
bool isboxed;
Type *llvmArgType = julia_type_to_llvm(argType, &isboxed);
vargs[i-nreq] = get_specsig_arg(argType, llvmArgType, isboxed);
}
jl_cgval_t tuple = emit_new_struct(ctx, vi.value.typ, nvargs, vargs);
emit_vi_assignment_unboxed(ctx, vi, NULL, tuple);
} else {
// restarg = jl_f_tuple(NULL, &args[nreq], nargs - nreq)
CallInst *restTuple =
ctx.builder.CreateCall(prepare_call(jltuple_func),
Expand Down

0 comments on commit adf8e0b

Please sign in to comment.