From 71f75ce6d9214971ed4c7f4cc17bd50d8318043a Mon Sep 17 00:00:00 2001 From: Oscar Blumberg Date: Thu, 19 May 2016 15:55:14 -0400 Subject: [PATCH 1/4] improve life with large tuples a little --- base/inference.jl | 4 +- base/tuple.jl | 12 ++++- src/cgutils.cpp | 109 ++++++++++++++++++++++++++++++++------------- src/codegen.cpp | 12 ++--- src/intrinsics.cpp | 63 +++++++++++++++++--------- 5 files changed, 143 insertions(+), 57 deletions(-) diff --git a/base/inference.jl b/base/inference.jl index 84b1d5a8d3781..1379d6ee2587d 100644 --- a/base/inference.jl +++ b/base/inference.jl @@ -6,6 +6,8 @@ const MAX_TYPE_DEPTH = 7 const MAX_TUPLETYPE_LEN = 8 const MAX_TUPLE_DEPTH = 4 +const MAX_TUPLE_SPLAT = 16 + # alloc_elim_pass! relies on `Slot_AssignedOnce | Slot_UsedUndef` being # SSA. This should be true now but can break if we start to track conditional # constants. e.g. @@ -2960,7 +2962,7 @@ function inlining_pass(e::Expr, sv, linfo) newargs[i-2] = aarg.args[2:end] elseif isa(aarg, Tuple) newargs[i-2] = Any[ QuoteNode(x) for x in aarg ] - elseif isa(t,DataType) && t.name===Tuple.name && !isvatuple(t) && effect_free(aarg,sv,true) + elseif isa(t,DataType) && t.name===Tuple.name && !isvatuple(t) && effect_free(aarg,sv,true) && length(t.parameters) <= MAX_TUPLE_SPLAT # apply(f,t::(x,y)) => f(t[1],t[2]) tp = t.parameters newargs[i-2] = Any[ mk_getfield(aarg,j,tp[j]) for j=1:length(tp) ] diff --git a/base/tuple.jl b/base/tuple.jl index f48df7b0d0516..65240d64af48e 100644 --- a/base/tuple.jl +++ b/base/tuple.jl @@ -86,6 +86,16 @@ map(f, t::Tuple{Any,}) = (f(t[1]),) map(f, t::Tuple{Any, Any}) = (f(t[1]), f(t[2])) map(f, t::Tuple{Any, Any, Any}) = (f(t[1]), f(t[2]), f(t[3])) map(f, t::Tuple) = (f(t[1]), map(f,tail(t))...) +# stop inlining after some number of arguments to avoid code blowup +function map(f, t::Tuple{Any,Any,Any,Any,Any,Any,Any,Any, + Any,Any,Any,Any,Any,Any,Any,Any,Vararg{Any}}) + n = length(t) + A = Array(Any,n) + for i=1:n + A[i] = f(t[i]) + end + (A...,) +end # 2 argument function map(f, t::Tuple{}, s::Tuple{}) = () map(f, t::Tuple{Any,}, s::Tuple{Any,}) = (f(t[1],s[1]),) @@ -96,7 +106,7 @@ heads(t::Tuple, ts::Tuple...) = (t[1], heads(ts...)...) tails() = () tails(t::Tuple, ts::Tuple...) = (tail(t), tails(ts...)...) map(f, ::Tuple{}, ts::Tuple...) = () -map(f, t::Tuple, ts::Tuple...) = (f(heads(t, ts...)...), map(f, tails(t, ts...)...)...) +map(f, t1::Tuple, t2::Tuple, ts::Tuple...) = (f(heads(t1, t2, ts...)...), map(f, tails(t1, t2, ts...)...)...) # type-stable padding fill_to_length{N}(t::Tuple, val, ::Type{Val{N}}) = _ftl((), val, Val{N}, t...) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 10c67a21b5435..10a303f7c9331 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -834,7 +834,7 @@ static LoadInst *build_load (Value *ptr, jl_value_t *jltype) { return builder.CreateAlignedLoad(ptr, julia_alignment(ptr, jltype, 0)); } -static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt); +static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value* dest = NULL); static jl_cgval_t typed_load(Value *ptr, Value *idx_0based, jl_value_t *jltype, jl_codectx_t *ctx, MDNode *tbaa, unsigned alignment = 0) @@ -1343,12 +1343,29 @@ static Value *emit_array_nd_index(const jl_cgval_t &ainfo, jl_value_t *ex, size_ // --- boxing --- static Value *emit_allocobj(size_t static_size); +static void init_tag(Value *v, Value *jt) +{ + tbaa_decorate(tbaa_tag, builder.CreateStore(jt, emit_typeptr_addr(v))); +} static Value *init_bits_value(Value *newv, Value *jt, Value *v, MDNode *tbaa) { - tbaa_decorate(tbaa_tag, builder.CreateStore(jt, emit_typeptr_addr(newv))); + init_tag(newv, jt); tbaa_decorate(tbaa, builder.CreateAlignedStore(v, builder.CreateBitCast(newv, PointerType::get(v->getType(),0)), sizeof(void*))); // min alignment in julia's gc is pointer-aligned return newv; } +static Value *as_value(Type *t, const jl_cgval_t&); +static Value *init_bits_cgval(Value *newv, const jl_cgval_t& v, MDNode *tbaa, Type *t) +{ + Value *jt = literal_pointer_val(v.typ); + if (v.ispointer()) { + init_tag(newv, jt); + builder.CreateMemCpy(newv, v.V, jl_datatype_size(v.typ), sizeof(void*)); + return newv; + } + else { + return init_bits_value(newv, jt, as_value(t,v), tbaa); + } +} static jl_value_t *static_constant_instance(Constant *constant, jl_value_t *jt) { @@ -1431,6 +1448,14 @@ static Value *call_with_unsigned(Function *ufunc, Value *v) static void jl_add_linfo_root(jl_lambda_info_t *li, jl_value_t *val); +static Value *as_value(Type *t, const jl_cgval_t &v) +{ + assert(!v.isboxed); + if (v.ispointer()) + return tbaa_decorate(v.tbaa, build_load(builder.CreatePointerCast(v.V, t->getPointerTo()), v.typ)); + return v.V; +} + // this is used to wrap values for generic contexts, where a // dynamically-typed value is required (e.g. argument to unknown function). // if it's already a pointer it's left alone. @@ -1449,13 +1474,11 @@ static Value *boxed(const jl_cgval_t &vinfo, jl_codectx_t *ctx, bool gcrooted) Type *t = julia_type_to_llvm(vinfo.typ); assert(!type_is_ghost(t)); // should have been handled by isghost above! - if (vinfo.ispointer()) - v = tbaa_decorate(vinfo.tbaa, build_load(builder.CreatePointerCast(v, t->getPointerTo()), vinfo.typ)); if (t == T_int1) - return julia_bool(v); + return julia_bool(as_value(t,vinfo)); - if (ctx->linfo && ctx->linfo->def) { // don't bother codegen pre-boxing for toplevel + if (ctx->linfo && ctx->linfo->def && !vinfo.ispointer()) { // don't bother codegen pre-boxing for toplevel if (Constant *c = dyn_cast(v)) { jl_value_t *s = static_constant_instance(c, jt); if (s) { @@ -1469,30 +1492,31 @@ static Value *boxed(const jl_cgval_t &vinfo, jl_codectx_t *ctx, bool gcrooted) assert(jl_is_datatype(jb)); Value *box = NULL; if (jb == jl_int8_type) - box = call_with_signed(box_int8_func, v); + box = call_with_signed(box_int8_func, as_value(t, vinfo)); else if (jb == jl_int16_type) - box = call_with_signed(box_int16_func, v); + box = call_with_signed(box_int16_func, as_value(t,vinfo)); else if (jb == jl_int32_type) - box = call_with_signed(box_int32_func, v); + box = call_with_signed(box_int32_func, as_value(t,vinfo)); else if (jb == jl_int64_type) - box = call_with_signed(box_int64_func, v); + box = call_with_signed(box_int64_func, as_value(t,vinfo)); else if (jb == jl_float32_type) - box = builder.CreateCall(prepare_call(box_float32_func), v); + box = builder.CreateCall(prepare_call(box_float32_func), as_value(t,vinfo)); //if (jb == jl_float64_type) - // box = builder.CreateCall(box_float64_func, v); + // box = builder.CreateCall(box_float64_func, as_value(t,vinfo); // for Float64, fall through to generic case below, to inline alloc & init of Float64 box. cheap, I know. else if (jb == jl_uint8_type) - box = call_with_unsigned(box_uint8_func, v); + box = call_with_unsigned(box_uint8_func, as_value(t,vinfo)); else if (jb == jl_uint16_type) - box = call_with_unsigned(box_uint16_func, v); + box = call_with_unsigned(box_uint16_func, as_value(t,vinfo)); else if (jb == jl_uint32_type) - box = call_with_unsigned(box_uint32_func, v); + box = call_with_unsigned(box_uint32_func, as_value(t,vinfo)); else if (jb == jl_uint64_type) - box = call_with_unsigned(box_uint64_func, v); + box = call_with_unsigned(box_uint64_func, as_value(t,vinfo)); else if (jb == jl_char_type) - box = call_with_unsigned(box_char_func, v); + box = call_with_unsigned(box_char_func, as_value(t,vinfo)); else if (jb == jl_ssavalue_type) { unsigned zero = 0; + v = as_value(t, vinfo); assert(v->getType() == jl_ssavalue_type->struct_decl); v = builder.CreateExtractValue(v, makeArrayRef(&zero, 1)); box = call_with_unsigned(box_ssavalue_func, v); @@ -1506,7 +1530,7 @@ static Value *boxed(const jl_cgval_t &vinfo, jl_codectx_t *ctx, bool gcrooted) return literal_pointer_val(jb->instance); } else { - box = init_bits_value(emit_allocobj(jl_datatype_size(jt)), literal_pointer_val(jt), v, jb->mutabl ? tbaa_mutab : tbaa_immut); + box = init_bits_cgval(emit_allocobj(jl_datatype_size(jt)), vinfo, jb->mutabl ? tbaa_mutab : tbaa_immut, t); } if (gcrooted) { @@ -1650,8 +1674,21 @@ static jl_cgval_t emit_new_struct(jl_value_t *ty, size_t nargs, jl_value_t **arg if (nf > 0) { if (jl_isbits(sty)) { Type *lt = julia_type_to_llvm(ty); + // whether we should perform the initialization with the struct as a IR value + // or instead initialize the stack buffer with stores + bool init_as_value = false; + if (lt->isVectorTy() || + is_vecelement_type(ty) || + type_is_ghost(lt)) // maybe also check the size ? + init_as_value = true; + size_t na = nargs-1 < nf ? nargs-1 : nf; - Value *strct = UndefValue::get(lt == T_void ? NoopType : lt); + Value *strct; + if (init_as_value) + strct = UndefValue::get(lt == T_void ? NoopType : lt); + else + strct = emit_static_alloca(lt); + unsigned idx = 0; for (size_t i=0; i < na; i++) { jl_value_t *jtype = jl_svecref(sty->types,i); @@ -1660,22 +1697,34 @@ static jl_cgval_t emit_new_struct(jl_value_t *ty, size_t nargs, jl_value_t **arg if (!jl_subtype(fval_info.typ, jtype, 0)) emit_typecheck(fval_info, jtype, "new", ctx); if (!type_is_ghost(fty)) { - Value *fval = emit_unbox(fty, fval_info, jtype); + Value *fval = NULL, *dest = NULL; + if (!init_as_value) { + // avoid unboxing the argument explicitely + // and use memcpy instead + dest = builder.CreateConstInBoundsGEP2_32(lt, strct, 0, i); + } if (fty == T_int1) - fval = builder.CreateZExt(fval, T_int8); - if (lt->isVectorTy()) - strct = builder.CreateInsertElement(strct, fval, ConstantInt::get(T_int32,idx)); - else if (lt->isAggregateType()) - strct = builder.CreateInsertValue(strct, fval, ArrayRef(&idx,1)); - else { - // Must be a VecElement type, which comes unwrapped in LLVM. - assert(is_vecelement_type(ty)); - strct = fval; + fty = T_int8; + fval = emit_unbox(fty, fval_info, jtype, dest); + + if (init_as_value) { + if (lt->isVectorTy()) + strct = builder.CreateInsertElement(strct, fval, ConstantInt::get(T_int32,idx)); + else if (lt->isAggregateType()) + strct = builder.CreateInsertValue(strct, fval, ArrayRef(&idx,1)); + else { + // Must be a VecElement type, which comes unwrapped in LLVM. + assert(is_vecelement_type(ty)); + strct = fval; + } } } idx++; } - return mark_julia_type(strct, false, ty, ctx); + if (init_as_value) + return mark_julia_type(strct, false, ty, ctx); + else + return mark_julia_slot(strct, ty, tbaa_stack); } Value *f1 = NULL; size_t j = 0; diff --git a/src/codegen.cpp b/src/codegen.cpp index 24f395059e586..c438484b9998c 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -4754,16 +4754,18 @@ static std::unique_ptr emit_function(jl_lambda_info_t *lam, jl_llvm_func retboxed = true; } jl_cgval_t retvalinfo = emit_expr(jl_exprarg(ex,0), &ctx); - if (retboxed) + if (retboxed) { retval = boxed(retvalinfo, &ctx, false); // skip the gcroot on the return path - else if (!type_is_ghost(retty)) - retval = emit_unbox(retty, retvalinfo, jlrettype); + assert(!ctx.sret); + } + else if (!type_is_ghost(retty)) { + retval = emit_unbox(retty, retvalinfo, jlrettype, + ctx.sret ? &*ctx.f->arg_begin() : NULL); + } else // undef return type retval = NULL; if (do_malloc_log && lno != -1) mallocVisitLine(filename, lno); - if (ctx.sret) - builder.CreateStore(retval, &*ctx.f->arg_begin()); if (type_is_ghost(retty) || ctx.sret) builder.CreateRetVoid(); else diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 17a9c5f7f994f..fa1a06c02b786 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -257,8 +257,8 @@ static Constant *julia_const_to_llvm(jl_value_t *e, bool nested=false) static jl_cgval_t ghostValue(jl_value_t *ty); -// emit code to unpack a raw value from a box into registers -static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt) +// emit code to unpack a raw value from a box into registers or a stack slot +static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value *dest) { assert(to != T_pjlvalue); // TODO: fully validate that x.typ == jt? @@ -276,23 +276,26 @@ static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt) Type *ty = unboxed->getType(); // bools are stored internally as int8 (for now) if (ty == T_int1 && to == T_int8) - return builder.CreateZExt(unboxed, T_int8); - if (ty->isPointerTy() && !to->isPointerTy()) - return builder.CreatePtrToInt(unboxed, to); - if (!ty->isPointerTy() && to->isPointerTy()) - return builder.CreateIntToPtr(unboxed, to); - if (ty->isPointerTy() && to->isPointerTy()) + unboxed = builder.CreateZExt(unboxed, T_int8); + else if (ty->isPointerTy() && !to->isPointerTy()) + unboxed = builder.CreatePtrToInt(unboxed, to); + else if (!ty->isPointerTy() && to->isPointerTy()) + unboxed = builder.CreateIntToPtr(unboxed, to); + else if (ty->isPointerTy() && to->isPointerTy()) // pointer types are going away anyways, and this can come up in ccall argument conversion - return builder.CreatePointerCast(unboxed, to); - if (ty != to) { + unboxed = builder.CreatePointerCast(unboxed, to); + else if (ty != to) { // this can happen when a branch yielding a different type ends // up being dead code, and type inference knows that the other // branch's type is the only one that matters. // assert(ty == T_void); //emit_error("emit_unbox: a type mismatch error in occurred during codegen", ctx); - return UndefValue::get(to); // type mismatch error + unboxed = UndefValue::get(to); // type mismatch error } - return unboxed; + if (!dest) + return unboxed; + builder.CreateStore(unboxed, dest); + return NULL; } // bools stored as int8, so an extra Trunc is needed to get an int1 @@ -300,23 +303,43 @@ static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt) Type *ptype = (to == T_int1 ? T_pint8 : to->getPointerTo()); if (p->getType() != ptype) p = builder.CreateBitCast(p, ptype); + + Value *unboxed = NULL; if (to == T_int1) - return builder.CreateTrunc(tbaa_decorate(x.tbaa, builder.CreateLoad(p)), T_int1); - if (jt == (jl_value_t*)jl_bool_type) - return builder.CreateZExt(builder.CreateTrunc(tbaa_decorate(x.tbaa, builder.CreateLoad(p)), T_int1), to); + unboxed = builder.CreateTrunc(tbaa_decorate(x.tbaa, builder.CreateLoad(p)), T_int1); + else if (jt == (jl_value_t*)jl_bool_type) + unboxed = builder.CreateZExt(builder.CreateTrunc(tbaa_decorate(x.tbaa, builder.CreateLoad(p)), T_int1), to); + if (unboxed) { + if (!dest) + return unboxed; + builder.CreateStore(unboxed, dest); + return NULL; + } - Instruction *load; + int alignment; if (x.isboxed) { - load = builder.CreateAlignedLoad(p, 16); // julia's gc gives 16-byte aligned addresses + // julia's gc gives 16-byte aligned addresses + alignment = 16; } else if (jt) { - load = build_load(p, jt); + alignment = julia_alignment(p, jt, 0); } else { // stack has default alignment - load = builder.CreateLoad(p); + alignment = 0; + } + if (dest) { + builder.CreateMemCpy(dest, p, jl_datatype_size(jt), alignment); + return NULL; + } + else { + Instruction *load; + if (alignment) + load = builder.CreateAlignedLoad(p, alignment); + else + load = builder.CreateLoad(p); + return tbaa_decorate(x.tbaa, load); } - return tbaa_decorate(x.tbaa, load); } // unbox, trying to determine correct bitstype automatically From 04bba52e1dbe84458595766deea88e6cf5358b50 Mon Sep 17 00:00:00 2001 From: Oscar Blumberg Date: Thu, 19 May 2016 21:22:33 -0400 Subject: [PATCH 2/4] fix align --- src/cgutils.cpp | 8 ++++---- src/intrinsics.cpp | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 10a303f7c9331..a33136b695245 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -1354,16 +1354,16 @@ static Value *init_bits_value(Value *newv, Value *jt, Value *v, MDNode *tbaa) return newv; } static Value *as_value(Type *t, const jl_cgval_t&); -static Value *init_bits_cgval(Value *newv, const jl_cgval_t& v, MDNode *tbaa, Type *t) +static Value *init_bits_cgval(Value *newv, const jl_cgval_t& v, MDNode *tbaa, Type *t, jl_codectx_t *ctx) { Value *jt = literal_pointer_val(v.typ); if (v.ispointer()) { init_tag(newv, jt); - builder.CreateMemCpy(newv, v.V, jl_datatype_size(v.typ), sizeof(void*)); + builder.CreateMemCpy(newv, data_pointer(v,ctx,PointerType::get(t,0)), jl_datatype_size(v.typ), sizeof(void*)); return newv; } else { - return init_bits_value(newv, jt, as_value(t,v), tbaa); + return init_bits_value(newv, jt, v.V, tbaa); } } @@ -1530,7 +1530,7 @@ static Value *boxed(const jl_cgval_t &vinfo, jl_codectx_t *ctx, bool gcrooted) return literal_pointer_val(jb->instance); } else { - box = init_bits_cgval(emit_allocobj(jl_datatype_size(jt)), vinfo, jb->mutabl ? tbaa_mutab : tbaa_immut, t); + box = init_bits_cgval(emit_allocobj(jl_datatype_size(jt)), vinfo, jb->mutabl ? tbaa_mutab : tbaa_immut, t, ctx); } if (gcrooted) { diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index fa1a06c02b786..5d3d8e2b0deea 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -329,7 +329,9 @@ static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value *d alignment = 0; } if (dest) { - builder.CreateMemCpy(dest, p, jl_datatype_size(jt), alignment); + // callers using the dest argument only use it for a stack slot for now + alignment = 0; + builder.CreateMemCpy(dest, p, jl_datatype_size(jt), alignment, false, x.tbaa); return NULL; } else { From 64e38b9276aae230fc48b6a2652c3d3a579d4e28 Mon Sep 17 00:00:00 2001 From: Oscar Blumberg Date: Thu, 19 May 2016 21:58:23 -0400 Subject: [PATCH 3/4] var slots too --- src/cgutils.cpp | 2 +- src/codegen.cpp | 10 ++++------ src/intrinsics.cpp | 6 +++--- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index a33136b695245..701d28781a3d1 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -834,7 +834,7 @@ static LoadInst *build_load (Value *ptr, jl_value_t *jltype) { return builder.CreateAlignedLoad(ptr, julia_alignment(ptr, jltype, 0)); } -static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value* dest = NULL); +static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value* dest = NULL, bool volatile_store = false); static jl_cgval_t typed_load(Value *ptr, Value *idx_0based, jl_value_t *jltype, jl_codectx_t *ctx, MDNode *tbaa, unsigned alignment = 0) diff --git a/src/codegen.cpp b/src/codegen.cpp index c438484b9998c..941fe441c6cb5 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -2966,9 +2966,9 @@ static void emit_assignment(jl_value_t *l, jl_value_t *r, jl_codectx_t *ctx) if (!slot.isboxed && !slot.isimmutable) { // emit a copy of values stored in mutable slots Type *vtype = julia_type_to_llvm(slot.typ); assert(vtype != T_pjlvalue); - slot = mark_julia_type( - emit_unbox(vtype, slot, slot.typ), - false, slot.typ, ctx); + Value *dest = emit_static_alloca(vtype); + emit_unbox(vtype, slot, slot.typ, dest); + slot = mark_julia_slot(dest, slot.typ, tbaa_stack); } if (slot.isboxed && slot.isimmutable) { // see if inference had a better type for the ssavalue than the expression (after inlining getfield on a Tuple) @@ -3045,9 +3045,7 @@ static void emit_assignment(jl_value_t *l, jl_value_t *r, jl_codectx_t *ctx) else { // store unboxed assert(vi.value.ispointer()); - builder.CreateStore( - emit_unbox(julia_type_to_llvm(vi.value.typ), rval_info, vi.value.typ), - vi.value.V, vi.isVolatile); + emit_unbox(julia_type_to_llvm(vi.value.typ), rval_info, vi.value.typ, vi.value.V, vi.isVolatile); } } diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 5d3d8e2b0deea..974284b94d97e 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -258,7 +258,7 @@ static Constant *julia_const_to_llvm(jl_value_t *e, bool nested=false) static jl_cgval_t ghostValue(jl_value_t *ty); // emit code to unpack a raw value from a box into registers or a stack slot -static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value *dest) +static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value *dest, bool volatile_store) { assert(to != T_pjlvalue); // TODO: fully validate that x.typ == jt? @@ -294,7 +294,7 @@ static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value *d } if (!dest) return unboxed; - builder.CreateStore(unboxed, dest); + builder.CreateStore(unboxed, dest, volatile_store); return NULL; } @@ -331,7 +331,7 @@ static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value *d if (dest) { // callers using the dest argument only use it for a stack slot for now alignment = 0; - builder.CreateMemCpy(dest, p, jl_datatype_size(jt), alignment, false, x.tbaa); + builder.CreateMemCpy(dest, p, jl_datatype_size(jt), alignment, volatile_store, x.tbaa); return NULL; } else { From b0d9687582d750485e4cbfbd1b63e26b1a67715d Mon Sep 17 00:00:00 2001 From: Oscar Blumberg Date: Fri, 20 May 2016 09:45:12 -0400 Subject: [PATCH 4/4] fix fail test --- src/intrinsics.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 974284b94d97e..2ac9c4c91dd6e 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -331,7 +331,14 @@ static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value *d if (dest) { // callers using the dest argument only use it for a stack slot for now alignment = 0; - builder.CreateMemCpy(dest, p, jl_datatype_size(jt), alignment, volatile_store, x.tbaa); + MDNode *tbaa = x.tbaa; + // the memcpy intrinsic does not allow to specify different alias tags + // for the load part (x.tbaa) and the store part (tbaa_stack). + // since the tbaa lattice has to be a tree we have unfortunately + // x.tbaa ∪ tbaa_stack = tbaa_root if x.tbaa != tbaa_stack + if (tbaa != tbaa_stack) + tbaa = NULL; + builder.CreateMemCpy(dest, p, jl_datatype_size(jt), alignment, volatile_store, tbaa); return NULL; } else {