Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve life with large tuples a little #16460

Merged
merged 4 commits into from
May 21, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion base/inference.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ const MAX_TYPE_DEPTH = 7
const MAX_TUPLETYPE_LEN = 8
const MAX_TUPLE_DEPTH = 4

const MAX_TUPLE_SPLAT = 16

# alloc_elim_pass! relies on `Slot_AssignedOnce | Slot_UsedUndef` being
# SSA. This should be true now but can break if we start to track conditional
# constants. e.g.
Expand Down Expand Up @@ -2960,7 +2962,7 @@ function inlining_pass(e::Expr, sv, linfo)
newargs[i-2] = aarg.args[2:end]
elseif isa(aarg, Tuple)
newargs[i-2] = Any[ QuoteNode(x) for x in aarg ]
elseif isa(t,DataType) && t.name===Tuple.name && !isvatuple(t) && effect_free(aarg,sv,true)
elseif isa(t,DataType) && t.name===Tuple.name && !isvatuple(t) && effect_free(aarg,sv,true) && length(t.parameters) <= MAX_TUPLE_SPLAT
# apply(f,t::(x,y)) => f(t[1],t[2])
tp = t.parameters
newargs[i-2] = Any[ mk_getfield(aarg,j,tp[j]) for j=1:length(tp) ]
Expand Down
12 changes: 11 additions & 1 deletion base/tuple.jl
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@ map(f, t::Tuple{Any,}) = (f(t[1]),)
map(f, t::Tuple{Any, Any}) = (f(t[1]), f(t[2]))
map(f, t::Tuple{Any, Any, Any}) = (f(t[1]), f(t[2]), f(t[3]))
map(f, t::Tuple) = (f(t[1]), map(f,tail(t))...)
# stop inlining after some number of arguments to avoid code blowup
function map(f, t::Tuple{Any,Any,Any,Any,Any,Any,Any,Any,
Any,Any,Any,Any,Any,Any,Any,Any,Vararg{Any}})
n = length(t)
A = Array(Any,n)
for i=1:n
A[i] = f(t[i])
end
(A...,)
end
# 2 argument function
map(f, t::Tuple{}, s::Tuple{}) = ()
map(f, t::Tuple{Any,}, s::Tuple{Any,}) = (f(t[1],s[1]),)
Expand All @@ -96,7 +106,7 @@ heads(t::Tuple, ts::Tuple...) = (t[1], heads(ts...)...)
tails() = ()
tails(t::Tuple, ts::Tuple...) = (tail(t), tails(ts...)...)
map(f, ::Tuple{}, ts::Tuple...) = ()
map(f, t::Tuple, ts::Tuple...) = (f(heads(t, ts...)...), map(f, tails(t, ts...)...)...)
map(f, t1::Tuple, t2::Tuple, ts::Tuple...) = (f(heads(t1, t2, ts...)...), map(f, tails(t1, t2, ts...)...)...)

# type-stable padding
fill_to_length{N}(t::Tuple, val, ::Type{Val{N}}) = _ftl((), val, Val{N}, t...)
Expand Down
109 changes: 79 additions & 30 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -834,7 +834,7 @@ static LoadInst *build_load (Value *ptr, jl_value_t *jltype) {
return builder.CreateAlignedLoad(ptr, julia_alignment(ptr, jltype, 0));
}

static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt);
static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value* dest = NULL, bool volatile_store = false);

static jl_cgval_t typed_load(Value *ptr, Value *idx_0based, jl_value_t *jltype,
jl_codectx_t *ctx, MDNode *tbaa, unsigned alignment = 0)
Expand Down Expand Up @@ -1343,12 +1343,29 @@ static Value *emit_array_nd_index(const jl_cgval_t &ainfo, jl_value_t *ex, size_
// --- boxing ---

static Value *emit_allocobj(size_t static_size);
static void init_tag(Value *v, Value *jt)
{
tbaa_decorate(tbaa_tag, builder.CreateStore(jt, emit_typeptr_addr(v)));
}
static Value *init_bits_value(Value *newv, Value *jt, Value *v, MDNode *tbaa)
{
tbaa_decorate(tbaa_tag, builder.CreateStore(jt, emit_typeptr_addr(newv)));
init_tag(newv, jt);
tbaa_decorate(tbaa, builder.CreateAlignedStore(v, builder.CreateBitCast(newv, PointerType::get(v->getType(),0)), sizeof(void*))); // min alignment in julia's gc is pointer-aligned
return newv;
}
static Value *as_value(Type *t, const jl_cgval_t&);
static Value *init_bits_cgval(Value *newv, const jl_cgval_t& v, MDNode *tbaa, Type *t, jl_codectx_t *ctx)
{
Value *jt = literal_pointer_val(v.typ);
if (v.ispointer()) {
init_tag(newv, jt);
builder.CreateMemCpy(newv, data_pointer(v,ctx,PointerType::get(t,0)), jl_datatype_size(v.typ), sizeof(void*));
return newv;
}
else {
return init_bits_value(newv, jt, v.V, tbaa);
}
}

static jl_value_t *static_constant_instance(Constant *constant, jl_value_t *jt)
{
Expand Down Expand Up @@ -1431,6 +1448,14 @@ static Value *call_with_unsigned(Function *ufunc, Value *v)

static void jl_add_linfo_root(jl_lambda_info_t *li, jl_value_t *val);

static Value *as_value(Type *t, const jl_cgval_t &v)
{
assert(!v.isboxed);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is faulty. the code-generator can't meet this condition.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, nvm. i see you only use this after checking.

if (v.ispointer())
return tbaa_decorate(v.tbaa, build_load(builder.CreatePointerCast(v.V, t->getPointerTo()), v.typ));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should use data_pointer() instead of v.V (in case v is a constant)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this doesn't matter either then (since you already checked for isconstant)

return v.V;
}

// this is used to wrap values for generic contexts, where a
// dynamically-typed value is required (e.g. argument to unknown function).
// if it's already a pointer it's left alone.
Expand All @@ -1449,13 +1474,11 @@ static Value *boxed(const jl_cgval_t &vinfo, jl_codectx_t *ctx, bool gcrooted)
Type *t = julia_type_to_llvm(vinfo.typ);
assert(!type_is_ghost(t)); // should have been handled by isghost above!

if (vinfo.ispointer())
v = tbaa_decorate(vinfo.tbaa, build_load(builder.CreatePointerCast(v, t->getPointerTo()), vinfo.typ));

if (t == T_int1)
return julia_bool(v);
return julia_bool(as_value(t,vinfo));

if (ctx->linfo && ctx->linfo->def) { // don't bother codegen pre-boxing for toplevel
if (ctx->linfo && ctx->linfo->def && !vinfo.ispointer()) { // don't bother codegen pre-boxing for toplevel
if (Constant *c = dyn_cast<Constant>(v)) {
jl_value_t *s = static_constant_instance(c, jt);
if (s) {
Expand All @@ -1469,30 +1492,31 @@ static Value *boxed(const jl_cgval_t &vinfo, jl_codectx_t *ctx, bool gcrooted)
assert(jl_is_datatype(jb));
Value *box = NULL;
if (jb == jl_int8_type)
box = call_with_signed(box_int8_func, v);
box = call_with_signed(box_int8_func, as_value(t, vinfo));
else if (jb == jl_int16_type)
box = call_with_signed(box_int16_func, v);
box = call_with_signed(box_int16_func, as_value(t,vinfo));
else if (jb == jl_int32_type)
box = call_with_signed(box_int32_func, v);
box = call_with_signed(box_int32_func, as_value(t,vinfo));
else if (jb == jl_int64_type)
box = call_with_signed(box_int64_func, v);
box = call_with_signed(box_int64_func, as_value(t,vinfo));
else if (jb == jl_float32_type)
box = builder.CreateCall(prepare_call(box_float32_func), v);
box = builder.CreateCall(prepare_call(box_float32_func), as_value(t,vinfo));
//if (jb == jl_float64_type)
// box = builder.CreateCall(box_float64_func, v);
// box = builder.CreateCall(box_float64_func, as_value(t,vinfo);
// for Float64, fall through to generic case below, to inline alloc & init of Float64 box. cheap, I know.
else if (jb == jl_uint8_type)
box = call_with_unsigned(box_uint8_func, v);
box = call_with_unsigned(box_uint8_func, as_value(t,vinfo));
else if (jb == jl_uint16_type)
box = call_with_unsigned(box_uint16_func, v);
box = call_with_unsigned(box_uint16_func, as_value(t,vinfo));
else if (jb == jl_uint32_type)
box = call_with_unsigned(box_uint32_func, v);
box = call_with_unsigned(box_uint32_func, as_value(t,vinfo));
else if (jb == jl_uint64_type)
box = call_with_unsigned(box_uint64_func, v);
box = call_with_unsigned(box_uint64_func, as_value(t,vinfo));
else if (jb == jl_char_type)
box = call_with_unsigned(box_char_func, v);
box = call_with_unsigned(box_char_func, as_value(t,vinfo));
else if (jb == jl_ssavalue_type) {
unsigned zero = 0;
v = as_value(t, vinfo);
assert(v->getType() == jl_ssavalue_type->struct_decl);
v = builder.CreateExtractValue(v, makeArrayRef(&zero, 1));
box = call_with_unsigned(box_ssavalue_func, v);
Expand All @@ -1506,7 +1530,7 @@ static Value *boxed(const jl_cgval_t &vinfo, jl_codectx_t *ctx, bool gcrooted)
return literal_pointer_val(jb->instance);
}
else {
box = init_bits_value(emit_allocobj(jl_datatype_size(jt)), literal_pointer_val(jt), v, jb->mutabl ? tbaa_mutab : tbaa_immut);
box = init_bits_cgval(emit_allocobj(jl_datatype_size(jt)), vinfo, jb->mutabl ? tbaa_mutab : tbaa_immut, t, ctx);
}

if (gcrooted) {
Expand Down Expand Up @@ -1650,8 +1674,21 @@ static jl_cgval_t emit_new_struct(jl_value_t *ty, size_t nargs, jl_value_t **arg
if (nf > 0) {
if (jl_isbits(sty)) {
Type *lt = julia_type_to_llvm(ty);
// whether we should perform the initialization with the struct as a IR value
// or instead initialize the stack buffer with stores
bool init_as_value = false;
if (lt->isVectorTy() ||
is_vecelement_type(ty) ||
type_is_ghost(lt)) // maybe also check the size ?
init_as_value = true;

size_t na = nargs-1 < nf ? nargs-1 : nf;
Value *strct = UndefValue::get(lt == T_void ? NoopType : lt);
Value *strct;
if (init_as_value)
strct = UndefValue::get(lt == T_void ? NoopType : lt);
else
strct = emit_static_alloca(lt);

unsigned idx = 0;
for (size_t i=0; i < na; i++) {
jl_value_t *jtype = jl_svecref(sty->types,i);
Expand All @@ -1660,22 +1697,34 @@ static jl_cgval_t emit_new_struct(jl_value_t *ty, size_t nargs, jl_value_t **arg
if (!jl_subtype(fval_info.typ, jtype, 0))
emit_typecheck(fval_info, jtype, "new", ctx);
if (!type_is_ghost(fty)) {
Value *fval = emit_unbox(fty, fval_info, jtype);
Value *fval = NULL, *dest = NULL;
if (!init_as_value) {
// avoid unboxing the argument explicitely
// and use memcpy instead
dest = builder.CreateConstInBoundsGEP2_32(lt, strct, 0, i);
}
if (fty == T_int1)
fval = builder.CreateZExt(fval, T_int8);
if (lt->isVectorTy())
strct = builder.CreateInsertElement(strct, fval, ConstantInt::get(T_int32,idx));
else if (lt->isAggregateType())
strct = builder.CreateInsertValue(strct, fval, ArrayRef<unsigned>(&idx,1));
else {
// Must be a VecElement type, which comes unwrapped in LLVM.
assert(is_vecelement_type(ty));
strct = fval;
fty = T_int8;
fval = emit_unbox(fty, fval_info, jtype, dest);

if (init_as_value) {
if (lt->isVectorTy())
strct = builder.CreateInsertElement(strct, fval, ConstantInt::get(T_int32,idx));
else if (lt->isAggregateType())
strct = builder.CreateInsertValue(strct, fval, ArrayRef<unsigned>(&idx,1));
else {
// Must be a VecElement type, which comes unwrapped in LLVM.
assert(is_vecelement_type(ty));
strct = fval;
}
}
}
idx++;
}
return mark_julia_type(strct, false, ty, ctx);
if (init_as_value)
return mark_julia_type(strct, false, ty, ctx);
else
return mark_julia_slot(strct, ty, tbaa_stack);
}
Value *f1 = NULL;
size_t j = 0;
Expand Down
22 changes: 11 additions & 11 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2966,9 +2966,9 @@ static void emit_assignment(jl_value_t *l, jl_value_t *r, jl_codectx_t *ctx)
if (!slot.isboxed && !slot.isimmutable) { // emit a copy of values stored in mutable slots
Type *vtype = julia_type_to_llvm(slot.typ);
assert(vtype != T_pjlvalue);
slot = mark_julia_type(
emit_unbox(vtype, slot, slot.typ),
false, slot.typ, ctx);
Value *dest = emit_static_alloca(vtype);
emit_unbox(vtype, slot, slot.typ, dest);
slot = mark_julia_slot(dest, slot.typ, tbaa_stack);
}
if (slot.isboxed && slot.isimmutable) {
// see if inference had a better type for the ssavalue than the expression (after inlining getfield on a Tuple)
Expand Down Expand Up @@ -3045,9 +3045,7 @@ static void emit_assignment(jl_value_t *l, jl_value_t *r, jl_codectx_t *ctx)
else {
// store unboxed
assert(vi.value.ispointer());
builder.CreateStore(
emit_unbox(julia_type_to_llvm(vi.value.typ), rval_info, vi.value.typ),
vi.value.V, vi.isVolatile);
emit_unbox(julia_type_to_llvm(vi.value.typ), rval_info, vi.value.typ, vi.value.V, vi.isVolatile);
}
}

Expand Down Expand Up @@ -4754,16 +4752,18 @@ static std::unique_ptr<Module> emit_function(jl_lambda_info_t *lam, jl_llvm_func
retboxed = true;
}
jl_cgval_t retvalinfo = emit_expr(jl_exprarg(ex,0), &ctx);
if (retboxed)
if (retboxed) {
retval = boxed(retvalinfo, &ctx, false); // skip the gcroot on the return path
else if (!type_is_ghost(retty))
retval = emit_unbox(retty, retvalinfo, jlrettype);
assert(!ctx.sret);
}
else if (!type_is_ghost(retty)) {
retval = emit_unbox(retty, retvalinfo, jlrettype,
ctx.sret ? &*ctx.f->arg_begin() : NULL);
}
else // undef return type
retval = NULL;
if (do_malloc_log && lno != -1)
mallocVisitLine(filename, lno);
if (ctx.sret)
builder.CreateStore(retval, &*ctx.f->arg_begin());
if (type_is_ghost(retty) || ctx.sret)
builder.CreateRetVoid();
else
Expand Down
72 changes: 52 additions & 20 deletions src/intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,8 @@ static Constant *julia_const_to_llvm(jl_value_t *e, bool nested=false)

static jl_cgval_t ghostValue(jl_value_t *ty);

// emit code to unpack a raw value from a box into registers
static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt)
// emit code to unpack a raw value from a box into registers or a stack slot
static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value *dest, bool volatile_store)
{
assert(to != T_pjlvalue);
// TODO: fully validate that x.typ == jt?
Expand All @@ -276,47 +276,79 @@ static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt)
Type *ty = unboxed->getType();
// bools are stored internally as int8 (for now)
if (ty == T_int1 && to == T_int8)
return builder.CreateZExt(unboxed, T_int8);
if (ty->isPointerTy() && !to->isPointerTy())
return builder.CreatePtrToInt(unboxed, to);
if (!ty->isPointerTy() && to->isPointerTy())
return builder.CreateIntToPtr(unboxed, to);
if (ty->isPointerTy() && to->isPointerTy())
unboxed = builder.CreateZExt(unboxed, T_int8);
else if (ty->isPointerTy() && !to->isPointerTy())
unboxed = builder.CreatePtrToInt(unboxed, to);
else if (!ty->isPointerTy() && to->isPointerTy())
unboxed = builder.CreateIntToPtr(unboxed, to);
else if (ty->isPointerTy() && to->isPointerTy())
// pointer types are going away anyways, and this can come up in ccall argument conversion
return builder.CreatePointerCast(unboxed, to);
if (ty != to) {
unboxed = builder.CreatePointerCast(unboxed, to);
else if (ty != to) {
// this can happen when a branch yielding a different type ends
// up being dead code, and type inference knows that the other
// branch's type is the only one that matters.
// assert(ty == T_void);
//emit_error("emit_unbox: a type mismatch error in occurred during codegen", ctx);
return UndefValue::get(to); // type mismatch error
unboxed = UndefValue::get(to); // type mismatch error
}
return unboxed;
if (!dest)
return unboxed;
builder.CreateStore(unboxed, dest, volatile_store);
return NULL;
}

// bools stored as int8, so an extra Trunc is needed to get an int1
Value *p = x.constant ? literal_pointer_val(x.constant) : x.V;
Type *ptype = (to == T_int1 ? T_pint8 : to->getPointerTo());
if (p->getType() != ptype)
p = builder.CreateBitCast(p, ptype);

Value *unboxed = NULL;
if (to == T_int1)
return builder.CreateTrunc(tbaa_decorate(x.tbaa, builder.CreateLoad(p)), T_int1);
if (jt == (jl_value_t*)jl_bool_type)
return builder.CreateZExt(builder.CreateTrunc(tbaa_decorate(x.tbaa, builder.CreateLoad(p)), T_int1), to);
unboxed = builder.CreateTrunc(tbaa_decorate(x.tbaa, builder.CreateLoad(p)), T_int1);
else if (jt == (jl_value_t*)jl_bool_type)
unboxed = builder.CreateZExt(builder.CreateTrunc(tbaa_decorate(x.tbaa, builder.CreateLoad(p)), T_int1), to);
if (unboxed) {
if (!dest)
return unboxed;
builder.CreateStore(unboxed, dest);
return NULL;
}

Instruction *load;
int alignment;
if (x.isboxed) {
load = builder.CreateAlignedLoad(p, 16); // julia's gc gives 16-byte aligned addresses
// julia's gc gives 16-byte aligned addresses
alignment = 16;
}
else if (jt) {
load = build_load(p, jt);
alignment = julia_alignment(p, jt, 0);
}
else {
// stack has default alignment
load = builder.CreateLoad(p);
alignment = 0;
}
if (dest) {
// callers using the dest argument only use it for a stack slot for now
alignment = 0;
MDNode *tbaa = x.tbaa;
// the memcpy intrinsic does not allow to specify different alias tags
// for the load part (x.tbaa) and the store part (tbaa_stack).
// since the tbaa lattice has to be a tree we have unfortunately
// x.tbaa ∪ tbaa_stack = tbaa_root if x.tbaa != tbaa_stack
if (tbaa != tbaa_stack)
tbaa = NULL;
builder.CreateMemCpy(dest, p, jl_datatype_size(jt), alignment, volatile_store, tbaa);
return NULL;
}
else {
Instruction *load;
if (alignment)
load = builder.CreateAlignedLoad(p, alignment);
else
load = builder.CreateLoad(p);
return tbaa_decorate(x.tbaa, load);
}
return tbaa_decorate(x.tbaa, load);
}

// unbox, trying to determine correct bitstype automatically
Expand Down