From 775bdc003cf7f4c9f8cc2a86c4f77a9fdb475aea Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 20 Apr 2020 12:44:46 -0400 Subject: [PATCH] threading: update codegen to use atomic annotations also And add load/store alignment annotations, because LLVM now prefers that we try to specify those explicitly, even though it's not required. This does not yet include correct load/store behaviors for objects with inlined references (the recent #34126 PR). --- src/atomics.h | 3 +- src/ccall.cpp | 37 +++++---- src/cgutils.cpp | 118 ++++++++++++++-------------- src/codegen.cpp | 125 ++++++++++++++++++------------ src/gf.c | 2 +- src/julia.h | 2 +- src/llvm-alloc-opt.cpp | 37 ++++++--- src/llvm-final-gc-lowering.cpp | 36 +++++---- src/llvm-late-gc-lowering.cpp | 28 ++++--- test/llvmpasses/alloc-opt.jl | 10 +-- test/llvmpasses/final-lower-gc.ll | 18 ++--- test/llvmpasses/late-lower-gc.ll | 4 +- 12 files changed, 242 insertions(+), 178 deletions(-) diff --git a/src/atomics.h b/src/atomics.h index 0af087038da034..c61eb8ee183e65 100644 --- a/src/atomics.h +++ b/src/atomics.h @@ -75,7 +75,7 @@ // TODO: Maybe add jl_atomic_compare_exchange_weak for spin lock # define jl_atomic_store(obj, val) \ __atomic_store_n(obj, val, __ATOMIC_SEQ_CST) -# define jl_atomic_store_relaxed(obj, val) \ +# define jl_atomic_store_relaxed(obj, val) \ __atomic_store_n(obj, val, __ATOMIC_RELAXED) # if defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) || \ !(defined(_CPU_X86_) || defined(_CPU_X86_64_)) @@ -271,6 +271,7 @@ static inline void jl_atomic_store_release(volatile T *obj, T2 val) jl_signal_fence(); *obj = (T)val; } +template static inline void jl_atomic_store_relaxed(volatile T *obj, T2 val) { *obj = (T)val; diff --git a/src/ccall.cpp b/src/ccall.cpp index b8d068d2381e91..49870ff0128f97 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -87,7 +87,7 @@ static Value *runtime_sym_lookup( BasicBlock *dlsym_lookup = BasicBlock::Create(jl_LLVMContext, "dlsym"); BasicBlock *ccall_bb = BasicBlock::Create(jl_LLVMContext, "ccall"); Constant *initnul = ConstantPointerNull::get((PointerType*)T_pvoidfunc); - LoadInst *llvmf_orig = irbuilder.CreateAlignedLoad(llvmgv, sizeof(void*)); + LoadInst *llvmf_orig = irbuilder.CreateAlignedLoad(T_pvoidfunc, llvmgv, sizeof(void*)); // This in principle needs a consume ordering so that load from // this pointer sees a valid value. However, this is not supported by // LLVM (or agreed on in the C/C++ standard FWIW) and should be @@ -95,7 +95,7 @@ static Value *runtime_sym_lookup( // ordering is enforced by the hardware and LLVM has to speculate an // invalid load from the `cglobal` but doesn't depend on the `cglobal` // value for this to happen. - // llvmf_orig->setAtomic(AtomicOrdering::Consume); + llvmf_orig->setAtomic(AtomicOrdering::Unordered); irbuilder.CreateCondBr( irbuilder.CreateICmpNE(llvmf_orig, initnul), ccall_bb, @@ -114,7 +114,7 @@ static Value *runtime_sym_lookup( } Value *llvmf = irbuilder.CreateCall(prepare_call_in(jl_builderModule(irbuilder), jldlsym_func), { libname, stringConstPtr(emission_context, irbuilder, f_name), libptrgv }); - auto store = irbuilder.CreateAlignedStore(llvmf, llvmgv, sizeof(void*)); + StoreInst *store = irbuilder.CreateAlignedStore(llvmf, llvmgv, sizeof(void*)); store->setAtomic(AtomicOrdering::Release); irbuilder.CreateBr(ccall_bb); @@ -169,7 +169,7 @@ static GlobalVariable *emit_plt_thunk( IRBuilder<> irbuilder(b0); Value *ptr = runtime_sym_lookup(emission_context, irbuilder, funcptype, f_lib, f_name, plt, libptrgv, llvmgv, runtime_lib); - auto store = irbuilder.CreateAlignedStore(irbuilder.CreateBitCast(ptr, T_pvoidfunc), got, sizeof(void*)); + StoreInst *store = irbuilder.CreateAlignedStore(irbuilder.CreateBitCast(ptr, T_pvoidfunc), got, sizeof(void*)); store->setAtomic(AtomicOrdering::Release); SmallVector args; for (Function::arg_iterator arg = plt->arg_begin(), arg_e = plt->arg_end(); arg != arg_e; ++arg) @@ -234,7 +234,7 @@ static Value *emit_plt( // consume ordering too. This is even less likely to cause issues though // since the only thing we do to this loaded pointer is to call it // immediately. - // got_val->setAtomic(AtomicOrdering::Consume); + got_val->setAtomic(AtomicOrdering::Unordered); return ctx.builder.CreateBitCast(got_val, funcptype); } @@ -349,17 +349,19 @@ static Value *llvm_type_rewrite( Value *from; Value *to; const DataLayout &DL = jl_data_layout; + unsigned align = std::max(DL.getPrefTypeAlignment(target_type), DL.getPrefTypeAlignment(from_type)); if (DL.getTypeAllocSize(target_type) >= DL.getTypeAllocSize(from_type)) { to = emit_static_alloca(ctx, target_type); + cast(to)->setAlignment(Align(align)); from = emit_bitcast(ctx, to, from_type->getPointerTo()); } else { from = emit_static_alloca(ctx, from_type); + cast(from)->setAlignment(Align(align)); to = emit_bitcast(ctx, from, target_type->getPointerTo()); } - // XXX: deal with possible alignment issues - ctx.builder.CreateStore(v, from); - return ctx.builder.CreateLoad(to); + ctx.builder.CreateAlignedStore(v, from, align); + return ctx.builder.CreateAlignedLoad(to, align); } // --- argument passing and scratch space utilities --- @@ -1576,9 +1578,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) Value *ptls_i16 = emit_bitcast(ctx, ctx.ptlsStates, T_pint16); const int tid_offset = offsetof(jl_tls_states_t, tid); Value *ptid = ctx.builder.CreateGEP(ptls_i16, ConstantInt::get(T_size, tid_offset / 2)); - return mark_or_box_ccall_result(ctx, - tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(ptid)), - retboxed, rt, unionall, static_rt); + LoadInst *tid = ctx.builder.CreateAlignedLoad(ptid, sizeof(int16_t)); + tbaa_decorate(tbaa_const, tid); + return mark_or_box_ccall_result(ctx, tid, retboxed, rt, unionall, static_rt); } else if (is_libjulia_func(jl_get_current_task)) { assert(lrt == T_prjlvalue); @@ -1587,9 +1589,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) Value *ptls_pv = emit_bitcast(ctx, ctx.ptlsStates, T_pprjlvalue); const int ct_offset = offsetof(jl_tls_states_t, current_task); Value *pct = ctx.builder.CreateGEP(ptls_pv, ConstantInt::get(T_size, ct_offset / sizeof(void*))); - return mark_or_box_ccall_result(ctx, - tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(pct)), - retboxed, rt, unionall, static_rt); + LoadInst *ct = ctx.builder.CreateAlignedLoad(pct, sizeof(void*)); + tbaa_decorate(tbaa_const, ct); + return mark_or_box_ccall_result(ctx, ct, retboxed, rt, unionall, static_rt); } else if (is_libjulia_func(jl_set_next_task)) { assert(lrt == T_void); @@ -1608,8 +1610,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) ctx.builder.CreateCall(prepare_call(gcroot_flush_func)); Value *pdefer_sig = emit_defer_signal(ctx); Value *defer_sig = ctx.builder.CreateLoad(pdefer_sig); - defer_sig = ctx.builder.CreateAdd(defer_sig, - ConstantInt::get(T_sigatomic, 1)); + defer_sig = ctx.builder.CreateAdd(defer_sig, ConstantInt::get(T_sigatomic, 1)); ctx.builder.CreateStore(defer_sig, pdefer_sig); emit_signal_fence(ctx); return ghostValue(jl_nothing_type); @@ -1671,7 +1672,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) idx = ctx.builder.CreateAdd(idx, ConstantInt::get(T_size, ((jl_datatype_t*)ety)->layout->first_ptr)); } Value *slot_addr = ctx.builder.CreateInBoundsGEP(T_prjlvalue, arrayptr, idx); - Value *load = tbaa_decorate(tbaa_ptrarraybuf, ctx.builder.CreateLoad(T_prjlvalue, slot_addr)); + LoadInst *load = ctx.builder.CreateAlignedLoad(T_prjlvalue, slot_addr, sizeof(void*)); + load->setAtomic(AtomicOrdering::Unordered); + tbaa_decorate(tbaa_ptrarraybuf, load); Value *res = ctx.builder.CreateZExt(ctx.builder.CreateICmpNE(load, Constant::getNullValue(T_prjlvalue)), T_int32); JL_GC_POP(); return mark_or_box_ccall_result(ctx, res, retboxed, rt, unionall, static_rt); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index c68db711e341fd..c220e84761235f 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -400,7 +400,8 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p) return literal_static_pointer_val(p); Value *pgv = literal_pointer_val_slot(ctx, p); return tbaa_decorate(tbaa_const, maybe_mark_load_dereferenceable( - ctx.builder.CreateLoad(T_pjlvalue, pgv), false, jl_typeof(p))); + ctx.builder.CreateAlignedLoad(T_pjlvalue, pgv, sizeof(void*)), + false, jl_typeof(p))); } static Value *literal_pointer_val(jl_codectx_t &ctx, jl_binding_t *p) @@ -413,8 +414,8 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_binding_t *p) // bindings are prefixed with jl_bnd# Value *pgv = julia_pgv(ctx, "jl_bnd#", p->name, p->owner, p); return tbaa_decorate(tbaa_const, maybe_mark_load_dereferenceable( - ctx.builder.CreateLoad(T_pjlvalue, pgv), false, - sizeof(jl_binding_t), alignof(jl_binding_t))); + ctx.builder.CreateAlignedLoad(T_pjlvalue, pgv, sizeof(void*)), + false, sizeof(jl_binding_t), alignof(jl_binding_t))); } // bitcast a value, but preserve its address space when dealing with pointer types @@ -453,7 +454,7 @@ static Value *julia_binding_gv(jl_codectx_t &ctx, jl_binding_t *b) if (imaging_mode) bv = emit_bitcast(ctx, tbaa_decorate(tbaa_const, - ctx.builder.CreateLoad(T_pjlvalue, julia_pgv(ctx, "*", b->name, b->owner, b))), + ctx.builder.CreateAlignedLoad(T_pjlvalue, julia_pgv(ctx, "*", b->name, b->owner, b), sizeof(void*))), T_pprjlvalue); else bv = ConstantExpr::getBitCast(literal_static_pointer_val(b), T_pprjlvalue); @@ -808,13 +809,6 @@ static Value *emit_nthptr_addr(jl_codectx_t &ctx, Value *v, Value *idx) idx); } -static Value *emit_nthptr(jl_codectx_t &ctx, Value *v, ssize_t n, MDNode *tbaa) -{ - // p = (jl_value_t**)v; p[n] - Value *vptr = emit_nthptr_addr(ctx, v, n); - return tbaa_decorate(tbaa, ctx.builder.CreateLoad(T_prjlvalue, vptr)); -} - static Value *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDNode *tbaa, Type *ptype) { // p = (jl_value_t**)v; *(ptype)&p[n] @@ -879,7 +873,7 @@ static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p) auto emit_unboxty = [&] () -> Value* { if (imaging_mode) return maybe_decay_untracked( - tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(T_pjlvalue, datatype_or_p))); + tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_pjlvalue, datatype_or_p, sizeof(void*)))); return datatype_or_p; }; Value *res; @@ -920,20 +914,21 @@ static Value *emit_datatype_types(jl_codectx_t &ctx, Value *dt) { Value *Ptr = emit_bitcast(ctx, decay_derived(dt), T_ppjlvalue); Value *Idx = ConstantInt::get(T_size, offsetof(jl_datatype_t, types) / sizeof(void*)); - return tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(T_pjlvalue, ctx.builder.CreateInBoundsGEP(T_pjlvalue, Ptr, Idx))); + return tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad( + T_pjlvalue, ctx.builder.CreateInBoundsGEP(T_pjlvalue, Ptr, Idx), sizeof(void*))); } static Value *emit_datatype_nfields(jl_codectx_t &ctx, Value *dt) { Value *type_svec = emit_bitcast(ctx, emit_datatype_types(ctx, dt), T_psize); - return tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(T_size, type_svec)); + return tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_size, type_svec, sizeof(void*))); } static Value *emit_datatype_size(jl_codectx_t &ctx, Value *dt) { Value *Ptr = emit_bitcast(ctx, decay_derived(dt), T_pint32); Value *Idx = ConstantInt::get(T_size, offsetof(jl_datatype_t, size) / sizeof(int)); - return tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(T_int32, ctx.builder.CreateInBoundsGEP(T_int32, Ptr, Idx))); + return tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_int32, ctx.builder.CreateInBoundsGEP(T_int32, Ptr, Idx), sizeof(int32_t))); } /* this is valid code, it's simply unused @@ -991,7 +986,7 @@ static Value *emit_datatype_mutabl(jl_codectx_t &ctx, Value *dt) Value *Ptr = emit_bitcast(ctx, decay_derived(dt), T_pint8); Value *Idx = ConstantInt::get(T_size, offsetof(jl_datatype_t, mutabl)); Value *mutabl = tbaa_decorate(tbaa_const, - ctx.builder.CreateLoad(T_int8, ctx.builder.CreateInBoundsGEP(T_int8, Ptr, Idx))); + ctx.builder.CreateAlignedLoad(T_int8, ctx.builder.CreateInBoundsGEP(T_int8, Ptr, Idx), 1)); return ctx.builder.CreateTrunc(mutabl, T_int1); } @@ -1002,7 +997,7 @@ static Value *emit_datatype_abstract(jl_codectx_t &ctx, Value *dt) Value *Idx = ConstantInt::get(T_size, offsetof(jl_datatype_t, abstract)); Value *abstract = tbaa_decorate(tbaa_const, - ctx.builder.CreateLoad(T_int8, ctx.builder.CreateInBoundsGEP(T_int8, Ptr, Idx))); + ctx.builder.CreateAlignedLoad(T_int8, ctx.builder.CreateInBoundsGEP(T_int8, Ptr, Idx), 1)); return ctx.builder.CreateTrunc(abstract, T_int1); } */ @@ -1017,10 +1012,8 @@ static Value *emit_datatype_isprimitivetype(jl_codectx_t &ctx, Value *dt) static Value *emit_datatype_name(jl_codectx_t &ctx, Value *dt) { - return emit_nthptr( - ctx, dt, - (ssize_t)(offsetof(jl_datatype_t, name) / sizeof(char*)), - tbaa_const); + Value *vptr = emit_nthptr_addr(ctx, dt, (ssize_t)(offsetof(jl_datatype_t, name) / sizeof(char*))); + return tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_prjlvalue, vptr, sizeof(void*))); } // --- generating various error checks --- @@ -1213,7 +1206,7 @@ static Value *emit_isconcrete(jl_codectx_t &ctx, Value *typ) { Value *isconcrete; isconcrete = ctx.builder.CreateConstInBoundsGEP1_32(T_int8, emit_bitcast(ctx, decay_derived(typ), T_pint8), offsetof(jl_datatype_t, isconcretetype)); - isconcrete = ctx.builder.CreateLoad(T_int8, isconcrete, tbaa_const); + isconcrete = tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_int8, isconcrete, 1)); isconcrete = ctx.builder.CreateTrunc(isconcrete, T_int1); return isconcrete; } @@ -1343,13 +1336,17 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j // elt = data; //} //else { - load = ctx.builder.CreateAlignedLoad(data, - isboxed || alignment ? alignment : julia_alignment(jltype), - false); + if (isboxed) + alignment = sizeof(void*); + else if (!alignment) + alignment = julia_alignment(jltype); + load = ctx.builder.CreateAlignedLoad(data, alignment, false); if (aliasscope) load->setMetadata("alias.scope", aliasscope); - if (isboxed) + if (isboxed) { + cast(load)->setOrdering(AtomicOrdering::Unordered); load = maybe_mark_load_dereferenceable(load, true, jltype); + } if (tbaa) load = tbaa_decorate(tbaa, load); if (maybe_null_if_boxed) { @@ -1613,10 +1610,10 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx, T_prjlvalue, maybe_decay_tracked(emit_bitcast(ctx, data_pointer(ctx, strct), T_pprjlvalue)), idx0()); - Value *fld = tbaa_decorate(strct.tbaa, - maybe_mark_load_dereferenceable( - ctx.builder.CreateLoad(T_prjlvalue, fldptr), - maybe_null, minimum_field_size, minimum_align)); + LoadInst *fld = ctx.builder.CreateAlignedLoad(T_prjlvalue, fldptr, sizeof(void*)); + fld->setOrdering(AtomicOrdering::Unordered); + tbaa_decorate(strct.tbaa, fld); + maybe_mark_load_dereferenceable(fld, maybe_null, minimum_field_size, minimum_align); if (maybe_null) null_pointer_check(ctx, fld); *ret = mark_julia_type(ctx, fld, true, jl_any_type); @@ -1687,9 +1684,9 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st addr = ctx.builder.CreateConstInBoundsGEP2_32(lt, staddr, 0, idx); } if (jl_field_isptr(jt, idx)) { - Instruction *Load = maybe_mark_load_dereferenceable( - ctx.builder.CreateLoad(T_prjlvalue, maybe_bitcast(ctx, addr, T_pprjlvalue)), - maybe_null, jl_field_type(jt, idx)); + LoadInst *Load = ctx.builder.CreateAlignedLoad(T_prjlvalue, maybe_bitcast(ctx, addr, T_pprjlvalue), sizeof(void*)); + Load->setOrdering(AtomicOrdering::Unordered); + maybe_mark_load_dereferenceable(Load, maybe_null, jl_field_type(jt, idx)); Value *fldv = tbaa_decorate(strct.tbaa, Load); if (maybe_null) null_pointer_check(ctx, fldv); @@ -1707,7 +1704,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st else { ptindex = emit_struct_gep(ctx, cast(lt), staddr, byte_offset + fsz); } - Instruction *tindex0 = tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateLoad(T_int8, ptindex)); + Instruction *tindex0 = tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateAlignedLoad(T_int8, ptindex, 1)); //tindex0->setMetadata(LLVMContext::MD_range, MDNode::get(jl_LLVMContext, { // ConstantAsMetadata::get(ConstantInt::get(T_int8, 0)), // ConstantAsMetadata::get(ConstantInt::get(T_int8, union_max)) })); @@ -1759,7 +1756,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st unsigned fld = st_idx + i; Value *fldv = ctx.builder.CreateExtractValue(obj, makeArrayRef(fld)); Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i); - ctx.builder.CreateStore(fldv, fldp); + ctx.builder.CreateAlignedStore(fldv, fldp, align); } // emit remaining bytes up to tindex if (i < ptindex - st_idx) { @@ -1768,7 +1765,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st for (; i < ptindex - st_idx; i++) { Value *fldv = ctx.builder.CreateExtractValue(obj, makeArrayRef(st_idx + i)); Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(T_int8, staddr, i); - ctx.builder.CreateStore(fldv, fldp); + ctx.builder.CreateAlignedStore(fldv, fldp, 1); } } } @@ -1847,11 +1844,12 @@ static Value *emit_arraylen_prim(jl_codectx_t &ctx, const jl_cgval_t &tinfo) jl_value_t *ty = tinfo.typ; #ifdef STORE_ARRAY_LEN Value *addr = ctx.builder.CreateStructGEP(jl_array_llvmt, - emit_bitcast(ctx, decay_derived(t), jl_parray_llvmt), - 1); //index (not offset) of length field in jl_parray_llvmt - + emit_bitcast(ctx, decay_derived(t), jl_parray_llvmt), + 1); //index (not offset) of length field in jl_parray_llvmt MDNode *tbaa = arraytype_constshape(ty) ? tbaa_const : tbaa_arraylen; - return tbaa_decorate(tbaa, ctx.builder.CreateLoad(addr, false)); + LoadInst *len = ctx.builder.CreateAlignedLoad(addr, sizeof(size_t)); + len->setOrdering(AtomicOrdering::NotAtomic); + return tbaa_decorate(tbaa, len); #else jl_value_t *p1 = jl_tparam1(ty); // FIXME: check that ty is an array type if (jl_is_long(p1)) { @@ -1896,7 +1894,8 @@ static Value *emit_arrayptr_internal(jl_codectx_t &ctx, const jl_cgval_t &tinfo, PointerType::get(PPT->getElementType(), AS), PT->getAddressSpace())); } - auto LI = ctx.builder.CreateLoad(addr); + LoadInst *LI = ctx.builder.CreateAlignedLoad(addr, sizeof(char*)); + LI->setOrdering(AtomicOrdering::NotAtomic); LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(jl_LLVMContext, None)); tbaa_decorate(tbaa, LI); return LI; @@ -1937,7 +1936,7 @@ static Value *emit_arrayflags(jl_codectx_t &ctx, const jl_cgval_t &tinfo) jl_array_llvmt, emit_bitcast(ctx, decay_derived(t), jl_parray_llvmt), arrayflag_field); - return tbaa_decorate(tbaa_arrayflags, ctx.builder.CreateLoad(addr)); + return tbaa_decorate(tbaa_arrayflags, ctx.builder.CreateAlignedLoad(T_int16, addr, sizeof(int16_t))); } static Value *emit_arrayndims(jl_codectx_t &ctx, const jl_cgval_t &ary) @@ -1958,9 +1957,9 @@ static Value *emit_arrayelsize(jl_codectx_t &ctx, const jl_cgval_t &tinfo) int elsize_field = 2; #endif Value *addr = ctx.builder.CreateStructGEP(jl_array_llvmt, - emit_bitcast(ctx, decay_derived(t), jl_parray_llvmt), - elsize_field); - return tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(addr)); + emit_bitcast(ctx, decay_derived(t), jl_parray_llvmt), + elsize_field); + return tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_int16, addr, sizeof(int16_t))); } static Value *emit_arrayoffset(jl_codectx_t &ctx, const jl_cgval_t &tinfo, int nd) @@ -1974,10 +1973,11 @@ static Value *emit_arrayoffset(jl_codectx_t &ctx, const jl_cgval_t &tinfo, int n int offset_field = 3; #endif - Value *addr = ctx.builder.CreateStructGEP(jl_array_llvmt, - emit_bitcast(ctx, decay_derived(t), jl_parray_llvmt), - offset_field); - return tbaa_decorate(tbaa_arrayoffset, ctx.builder.CreateLoad(addr)); + Value *addr = ctx.builder.CreateStructGEP( + jl_array_llvmt, + emit_bitcast(ctx, decay_derived(t), jl_parray_llvmt), + offset_field); + return tbaa_decorate(tbaa_arrayoffset, ctx.builder.CreateAlignedLoad(T_int32, addr, sizeof(int32_t))); } // Returns the size of the array represented by `tinfo` for the given dimension `dim` if @@ -2068,7 +2068,7 @@ static Value *emit_array_nd_index( // CreateAlloca is OK here since we are on an error branch Value *tmp = ctx.builder.CreateAlloca(T_size, ConstantInt::get(T_size, nidxs)); for (size_t k = 0; k < nidxs; k++) { - ctx.builder.CreateStore(idxs[k], ctx.builder.CreateInBoundsGEP(T_size, tmp, ConstantInt::get(T_size, k))); + ctx.builder.CreateAlignedStore(idxs[k], ctx.builder.CreateInBoundsGEP(T_size, tmp, ConstantInt::get(T_size, k)), sizeof(size_t)); } ctx.builder.CreateCall(prepare_call(jlboundserrorv_func), { mark_callee_rooted(a), tmp, ConstantInt::get(T_size, nidxs) }); @@ -2633,7 +2633,7 @@ static void emit_setfield(jl_codectx_t &ctx, Value *tindex = compute_tindex_unboxed(ctx, rhs_union, jfty); tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(T_int8, 1)); Value *ptindex = ctx.builder.CreateInBoundsGEP(T_int8, emit_bitcast(ctx, maybe_decay_tracked(addr), T_pint8), ConstantInt::get(T_size, fsz)); - tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateStore(tindex, ptindex)); + tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateAlignedStore(tindex, ptindex, 1)); // copy data if (!rhs.isghost) { emit_unionmove(ctx, addr, strct.tbaa, rhs, nullptr); @@ -2744,7 +2744,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg unsigned i = 0; for (; i < fsz / al; i++) { Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i); - Value *fldv = tbaa_decorate(tbaa_stack, ctx.builder.CreateLoad(ET, fldp)); + Value *fldv = tbaa_decorate(tbaa_stack, ctx.builder.CreateAlignedLoad(ET, fldp, al)); strct = ctx.builder.CreateInsertValue(strct, fldv, makeArrayRef(llvm_idx + i)); } // emit remaining bytes up to tindex @@ -2753,7 +2753,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg staddr = ctx.builder.CreateBitCast(staddr, T_pint8); for (; i < ptindex - llvm_idx; i++) { Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(T_int8, staddr, i); - Value *fldv = tbaa_decorate(tbaa_stack, ctx.builder.CreateLoad(T_int8, fldp)); + Value *fldv = tbaa_decorate(tbaa_stack, ctx.builder.CreateAlignedLoad(T_int8, fldp, 1)); strct = ctx.builder.CreateInsertValue(strct, fldv, makeArrayRef(llvm_idx + i)); } } @@ -2765,7 +2765,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg } else { Value *ptindex = emit_struct_gep(ctx, lt, strct, offs + fsz); - tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateStore(tindex, ptindex)); + tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateAlignedStore(tindex, ptindex, 1)); if (!rhs_union.isghost) emit_unionmove(ctx, dest, tbaa_stack, fval_info, nullptr); } @@ -2793,9 +2793,10 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg if (init_as_value) strct = ctx.builder.CreateInsertValue(strct, ConstantInt::get(T_int8, 0), makeArrayRef(llvm_idx)); else - tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateStore( + tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateAlignedStore( ConstantInt::get(T_int8, 0), - ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx))); + ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx), + 1)); } } if (type_is_ghost(lt)) @@ -2812,10 +2813,11 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg undef_derived_strct(ctx.builder, strct, sty, strctinfo.tbaa); for (size_t i = nargs; i < nf; i++) { if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) { - tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateStore( + tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateAlignedStore( ConstantInt::get(T_int8, 0), ctx.builder.CreateInBoundsGEP(emit_bitcast(ctx, strct, T_pint8), - ConstantInt::get(T_size, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1)))); + ConstantInt::get(T_size, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1)), + 1)); } } // TODO: verify that nargs <= nf (currently handled by front-end) diff --git a/src/codegen.cpp b/src/codegen.cpp index 05cf45bd610f12..b63b2acf5a1934 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -2223,7 +2223,10 @@ static jl_cgval_t emit_globalref(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t * if (bnd->constp) { return mark_julia_const(bnd->value); } - return mark_julia_type(ctx, tbaa_decorate(tbaa_binding, ctx.builder.CreateLoad(bp)), true, (jl_value_t*)jl_any_type); + LoadInst *v = ctx.builder.CreateAlignedLoad(T_prjlvalue, bp, sizeof(void*)); + v->setOrdering(AtomicOrdering::Unordered); + tbaa_decorate(tbaa_binding, v); + return mark_julia_type(ctx, v, true, (jl_value_t*)jl_any_type); } // todo: use type info to avoid undef check return emit_checked_var(ctx, bp, name, false, tbaa_binding); @@ -2400,10 +2403,14 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a else fld2 = ctx.builder.CreateExtractValue(varg2, llvm_idx); if (jl_field_isptr(sty, i)) { - if (arg1.ispointer()) - fld1 = ctx.builder.CreateLoad(T_prjlvalue, fld1); - if (arg2.ispointer()) - fld2 = ctx.builder.CreateLoad(T_prjlvalue, fld2); + if (arg1.ispointer()) { + fld1 = ctx.builder.CreateAlignedLoad(T_prjlvalue, fld1, sizeof(void*)); + cast(fld1)->setOrdering(AtomicOrdering::Unordered); + } + if (arg2.ispointer()) { + fld2 = ctx.builder.CreateAlignedLoad(T_prjlvalue, fld2, sizeof(void*)); + cast(fld2)->setOrdering(AtomicOrdering::Unordered); + } subAns = emit_box_compare(ctx, mark_julia_type(ctx, fld1, true, fldty), mark_julia_type(ctx, fld2, true, fldty)); @@ -2414,7 +2421,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a jl_cgval_t fld2_info; if (arg1.ispointer()) { Value *tindex1 = ctx.builder.CreateNUWAdd(ConstantInt::get(T_int8, 1), - ctx.builder.CreateLoad(T_int8, emit_struct_gep(ctx, at, varg1, tindex_offset))); + ctx.builder.CreateAlignedLoad(T_int8, emit_struct_gep(ctx, at, varg1, tindex_offset), 1)); fld1_info = mark_julia_slot(fld1, fldty, tindex1, arg1.tbaa); } else { @@ -2422,7 +2429,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a } if (arg2.ispointer()) { Value *tindex2 = ctx.builder.CreateNUWAdd(ConstantInt::get(T_int8, 1), - ctx.builder.CreateLoad(T_int8, emit_struct_gep(ctx, at, varg2, tindex_offset))); + ctx.builder.CreateAlignedLoad(T_int8, emit_struct_gep(ctx, at, varg2, tindex_offset), 1)); fld2_info = mark_julia_slot(fld2, fldty, tindex2, arg2.tbaa); } else { @@ -2700,7 +2707,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, ptindex = emit_bitcast(ctx, ptindex, T_pint8); ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, offset); ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, idx); - Instruction *tindex = tbaa_decorate(tbaa_arrayselbyte, ctx.builder.CreateLoad(T_int8, ptindex)); + Instruction *tindex = tbaa_decorate(tbaa_arrayselbyte, ctx.builder.CreateAlignedLoad(T_int8, ptindex, 1)); tindex->setMetadata(LLVMContext::MD_range, MDNode::get(jl_LLVMContext, { ConstantAsMetadata::get(ConstantInt::get(T_int8, 0)), ConstantAsMetadata::get(ConstantInt::get(T_int8, union_max)) })); @@ -2766,10 +2773,11 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, // load owner pointer Instruction *own_ptr; if (jl_is_long(ndp)) { - own_ptr = ctx.builder.CreateLoad(T_prjlvalue, + own_ptr = ctx.builder.CreateAlignedLoad(T_prjlvalue, ctx.builder.CreateConstGEP1_32(T_prjlvalue, emit_bitcast(ctx, decay_derived(aryv), T_pprjlvalue), - jl_array_data_owner_offset(nd) / sizeof(jl_value_t*))); + jl_array_data_owner_offset(nd) / sizeof(jl_value_t*)), + sizeof(void*)); tbaa_decorate(tbaa_const, maybe_mark_load_dereferenceable(own_ptr, false, (jl_value_t*)jl_array_any_type)); } else { @@ -2846,7 +2854,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, jl_value_t *boundscheck = (nargs == 3 ? argv[3].constant : jl_true); idx = emit_bounds_check(ctx, va_ary, NULL, idx, valen, boundscheck); idx = ctx.builder.CreateAdd(idx, ConstantInt::get(T_size, ctx.nReqArgs)); - Instruction *v = ctx.builder.CreateLoad(T_prjlvalue, ctx.builder.CreateInBoundsGEP(ctx.argArray, idx)); + Instruction *v = ctx.builder.CreateAlignedLoad(T_prjlvalue, ctx.builder.CreateInBoundsGEP(ctx.argArray, idx), sizeof(void*)); // if we know the result type of this load, we will mark that information here too tbaa_decorate(tbaa_value, maybe_mark_load_dereferenceable(v, false, rt)); *ret = mark_julia_type(ctx, v, /*boxed*/ true, rt); @@ -2989,7 +2997,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, jl_value_t *boundscheck = (nargs == 3 ? argv[3].constant : jl_true); emit_bounds_check(ctx, typ, (jl_value_t*)jl_datatype_type, idx, types_len, boundscheck); Value *fieldtyp_p = ctx.builder.CreateInBoundsGEP(T_prjlvalue, decay_derived(emit_bitcast(ctx, types_svec, T_pprjlvalue)), idx); - Value *fieldtyp = tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(T_prjlvalue, fieldtyp_p)); + Value *fieldtyp = tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_prjlvalue, fieldtyp_p, sizeof(void*))); *ret = mark_julia_type(ctx, fieldtyp, true, (jl_value_t*)jl_type_type); return true; } @@ -3003,7 +3011,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, if (sty == jl_string_type || sty == jl_simplevector_type) { // String and SimpleVector's length fields have the same layout auto ptr = emit_bitcast(ctx, boxed(ctx, obj), T_psize); - Value *len = tbaa_decorate(tbaa_mutab, ctx.builder.CreateLoad(T_size, ptr)); + Value *len = tbaa_decorate(tbaa_mutab, ctx.builder.CreateAlignedLoad(T_size, ptr, sizeof(size_t))); if (sty == jl_simplevector_type) { len = ctx.builder.CreateMul(len, ConstantInt::get(T_size, sizeof(void*))); len = ctx.builder.CreateAdd(len, ConstantInt::get(T_size, sizeof(void*))); @@ -3112,7 +3120,8 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, Value *addr = ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, ptr, offs); // emit this using the same type as emit_getfield_knownidx // so that LLVM may be able to load-load forward them and fold the result - fldv = tbaa_decorate(obj.tbaa, ctx.builder.CreateLoad(T_prjlvalue, addr)); + fldv = tbaa_decorate(obj.tbaa, ctx.builder.CreateAlignedLoad(T_prjlvalue, addr, sizeof(size_t))); + cast(fldv)->setOrdering(AtomicOrdering::Unordered); } else { fldv = ctx.builder.CreateExtractValue(obj.V, offs); @@ -3469,9 +3478,10 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t Constant *initnul = V_null; GlobalVariable *bindinggv = new GlobalVariable(*ctx.f->getParent(), T_pjlvalue, false, GlobalVariable::PrivateLinkage, initnul); - Value *cachedval = ctx.builder.CreateLoad(T_pjlvalue, bindinggv); - BasicBlock *have_val = BasicBlock::Create(jl_LLVMContext, "found"), - *not_found = BasicBlock::Create(jl_LLVMContext, "notfound"); + LoadInst *cachedval = ctx.builder.CreateAlignedLoad(T_pjlvalue, bindinggv, sizeof(void*)); + cachedval->setOrdering(AtomicOrdering::Unordered); + BasicBlock *have_val = BasicBlock::Create(jl_LLVMContext, "found"); + BasicBlock *not_found = BasicBlock::Create(jl_LLVMContext, "notfound"); BasicBlock *currentbb = ctx.builder.GetInsertBlock(); ctx.builder.CreateCondBr(ctx.builder.CreateICmpNE(cachedval, initnul), have_val, not_found); ctx.f->getBasicBlockList().push_back(not_found); @@ -3479,7 +3489,7 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t Value *bval = ctx.builder.CreateCall(prepare_call(jlgetbindingorerror_func), { literal_pointer_val(ctx, (jl_value_t*)m), literal_pointer_val(ctx, (jl_value_t*)s) }); - ctx.builder.CreateStore(bval, bindinggv); + ctx.builder.CreateAlignedStore(bval, bindinggv, sizeof(void*))->setOrdering(AtomicOrdering::Release); ctx.builder.CreateBr(have_val); ctx.f->getBasicBlockList().push_back(have_val); ctx.builder.SetInsertPoint(have_val); @@ -3498,10 +3508,10 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, bool isvol, MDNode *tbaa) { - assert(bp->getType() == T_pprjlvalue); - LoadInst *v = ctx.builder.CreateLoad(T_prjlvalue, bp); + LoadInst *v = ctx.builder.CreateAlignedLoad(T_prjlvalue, bp, sizeof(void*)); if (isvol) v->setVolatile(true); + v->setOrdering(AtomicOrdering::Unordered); if (tbaa) tbaa_decorate(tbaa, v); undef_var_error_ifnot(ctx, ctx.builder.CreateIsNotNull(v), name); @@ -3521,7 +3531,7 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i) T_prjlvalue, ctx.spvals_ptr, i + sizeof(jl_svec_t) / sizeof(jl_value_t*)); - Value *sp = tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(T_prjlvalue, bp)); + Value *sp = tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_prjlvalue, bp, sizeof(void*))); Value *isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp), maybe_decay_untracked(literal_pointer_val(ctx, (jl_value_t*)jl_tvar_type))); jl_unionall_t *sparam = (jl_unionall_t*)ctx.linfo->def.method->sig; @@ -3544,7 +3554,10 @@ static jl_cgval_t emit_global(jl_codectx_t &ctx, jl_sym_t *sym) // double-check that a global variable is actually defined. this // can be a problem in parallel when a definition is missing on // one machine. - return mark_julia_type(ctx, tbaa_decorate(tbaa_binding, ctx.builder.CreateLoad(T_prjlvalue, bp)), true, jl_any_type); + LoadInst *v = ctx.builder.CreateAlignedLoad(T_prjlvalue, bp, sizeof(void*)); + v->setOrdering(AtomicOrdering::Unordered); + tbaa_decorate(tbaa_binding, v); + return mark_julia_type(ctx, v, true, jl_any_type); } return emit_checked_var(ctx, bp, sym, false, tbaa_binding); } @@ -3559,15 +3572,15 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym) return mark_julia_const(jl_true); if (vi.boxroot == NULL || vi.pTIndex != NULL) { assert(vi.defFlag); - isnull = ctx.builder.CreateLoad(T_int1, vi.defFlag, vi.isVolatile); + isnull = ctx.builder.CreateAlignedLoad(T_int1, vi.defFlag, 1, vi.isVolatile); } if (vi.boxroot != NULL) { - Value *boxed = ctx.builder.CreateLoad(T_prjlvalue, vi.boxroot, vi.isVolatile); + Value *boxed = ctx.builder.CreateAlignedLoad(T_prjlvalue, vi.boxroot, sizeof(void*), vi.isVolatile); Value *box_isnull = ctx.builder.CreateICmpNE(boxed, maybe_decay_untracked(V_null)); if (vi.pTIndex) { // value is either boxed in the stack slot, or unboxed in value // as indicated by testing (pTIndex & 0x80) - Value *tindex = ctx.builder.CreateLoad(T_int8, vi.pTIndex, vi.isVolatile); + Value *tindex = ctx.builder.CreateAlignedLoad(T_int8, vi.pTIndex, sizeof(void*), vi.isVolatile); Value *load_unbox = ctx.builder.CreateICmpEQ( ctx.builder.CreateAnd(tindex, ConstantInt::get(T_int8, 0x80)), ConstantInt::get(T_int8, 0)); @@ -3592,7 +3605,7 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym) T_prjlvalue, ctx.spvals_ptr, i + sizeof(jl_svec_t) / sizeof(jl_value_t*)); - Value *sp = tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(T_prjlvalue, bp)); + Value *sp = tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_prjlvalue, bp, sizeof(void*))); isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp), maybe_decay_untracked(literal_pointer_val(ctx, (jl_value_t*)jl_tvar_type))); } @@ -3613,8 +3626,9 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym) if (bnd->value != NULL) return mark_julia_const(jl_true); Value *bp = julia_binding_gv(ctx, bnd); - Instruction *v = ctx.builder.CreateLoad(T_prjlvalue, bp); + LoadInst *v = ctx.builder.CreateAlignedLoad(T_prjlvalue, bp, sizeof(void*)); tbaa_decorate(tbaa_binding, v); + v->setOrdering(AtomicOrdering::Unordered); isnull = ctx.builder.CreateICmpNE(v, maybe_decay_untracked(V_null)); } else { @@ -3636,7 +3650,7 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va if ((!vi.isVolatile && vi.isSA) || vi.isArgument || vi.value.constant || !vi.value.V) { v = vi.value; if (vi.pTIndex) - v.TIndex = ctx.builder.CreateLoad(T_int8, vi.pTIndex); + v.TIndex = ctx.builder.CreateAlignedLoad(T_int8, vi.pTIndex, 1); } else { // copy value to a non-mutable (non-volatile SSA) location @@ -3656,18 +3670,18 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va } Value *tindex = NULL; if (vi.pTIndex) - tindex = ctx.builder.CreateLoad(T_int8, vi.pTIndex, vi.isVolatile); + tindex = ctx.builder.CreateAlignedLoad(T_int8, vi.pTIndex, 1, vi.isVolatile); v = mark_julia_slot(ssaslot, vi.value.typ, tindex, tbaa_stack); } if (vi.boxroot == NULL) v = update_julia_type(ctx, v, typ); if (vi.usedUndef) { assert(vi.defFlag); - isnull = ctx.builder.CreateLoad(T_int1, vi.defFlag, vi.isVolatile); + isnull = ctx.builder.CreateAlignedLoad(T_int1, vi.defFlag, 1, vi.isVolatile); } } if (vi.boxroot != NULL) { - Instruction *boxed = ctx.builder.CreateLoad(T_prjlvalue, vi.boxroot, vi.isVolatile); + Instruction *boxed = ctx.builder.CreateAlignedLoad(T_prjlvalue, vi.boxroot, sizeof(void*), vi.isVolatile); Value *box_isnull = NULL; if (vi.usedUndef) box_isnull = ctx.builder.CreateICmpNE(boxed, maybe_decay_untracked(V_null)); @@ -4074,9 +4088,10 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result) } else { if (!jl_is_method(ctx.linfo->def.method)) { - // TODO: inference is invalid if this has an effect - Value *world = ctx.builder.CreateLoad(prepare_global_in(jl_Module, jlgetworld_global)); - ctx.builder.CreateStore(world, ctx.world_age_field); + // TODO: inference is invalid if this has any effect (which it often does) + Value *world = ctx.builder.CreateAlignedLoad(prepare_global_in(jl_Module, jlgetworld_global), sizeof(size_t)); + // TODO: world->setOrdering(AtomicOrdering::Monotonic); + ctx.builder.CreateAlignedStore(world, ctx.world_age_field, sizeof(size_t)); } assert(ssaval_result != -1); emit_ssaval_assign(ctx, ssaval_result, expr); @@ -4655,20 +4670,22 @@ static Function* gen_cfun_wrapper( // TODO: in the future, try to initialize a full TLS context here // for now, just use a dummy field to avoid a branch in this function ctx.world_age_field = ctx.builder.CreateSelect(have_tls, ctx.world_age_field, dummy_world); - Value *last_age = tbaa_decorate(tbaa_gcframe, ctx.builder.CreateLoad(ctx.world_age_field)); + Value *last_age = tbaa_decorate(tbaa_gcframe, ctx.builder.CreateAlignedLoad(ctx.world_age_field, sizeof(size_t))); Value *valid_tls = ctx.builder.CreateIsNotNull(last_age); have_tls = ctx.builder.CreateAnd(have_tls, valid_tls); ctx.world_age_field = ctx.builder.CreateSelect(valid_tls, ctx.world_age_field, dummy_world); - Value *world_v = ctx.builder.CreateLoad(prepare_global_in(jl_Module, jlgetworld_global)); + Value *world_v = ctx.builder.CreateAlignedLoad(prepare_global_in(jl_Module, jlgetworld_global), sizeof(size_t)); + // TODO: cast(world_v)->setOrdering(AtomicOrdering::Monotonic); Value *age_ok = NULL; if (calltype) { - Value *lam_max = ctx.builder.CreateLoad( + LoadInst *lam_max = ctx.builder.CreateAlignedLoad( T_size, ctx.builder.CreateConstInBoundsGEP1_32( T_size, emit_bitcast(ctx, literal_pointer_val(ctx, (jl_value_t*)codeinst), T_psize), - offsetof(jl_code_instance_t, max_world) / sizeof(size_t))); + offsetof(jl_code_instance_t, max_world) / sizeof(size_t)), + sizeof(size_t)); // XXX: age is always OK if we don't have a TLS. This is a hack required due to `@threadcall` abuse. // and adds quite a bit of complexity here, even though it's still wrong // (anything that tries to interact with the runtime will fault) @@ -4689,7 +4706,7 @@ static Function* gen_cfun_wrapper( } else { assert(nest && nestPtr); - Value *ff = ctx.builder.CreateLoad(T_prjlvalue, nestPtr); + Value *ff = ctx.builder.CreateAlignedLoad(T_prjlvalue, nestPtr, sizeof(void*)); inputargs[0] = mark_julia_type(ctx, ff, true, jl_any_type); } // XXX: these values may need to be rooted until the end of the function @@ -4725,7 +4742,7 @@ static Function* gen_cfun_wrapper( if (aref) { if (jargty == (jl_value_t*)jl_any_type) { inputarg = mark_julia_type(ctx, - ctx.builder.CreateLoad(T_prjlvalue, emit_bitcast(ctx, val, T_pprjlvalue)), + ctx.builder.CreateAlignedLoad(T_prjlvalue, emit_bitcast(ctx, val, T_pprjlvalue), sizeof(void*)), true, jl_any_type); } else if (static_at && jl_is_concrete_immutable(jargty)) { // anything that could be stored unboxed @@ -4753,8 +4770,9 @@ static Function* gen_cfun_wrapper( if (!*closure_types) *closure_types = jl_alloc_vec_any(0); jl_array_ptr_1d_push(*closure_types, jargty); - Value *runtime_dt = ctx.builder.CreateLoad(T_prjlvalue, - ctx.builder.CreateConstGEP1_32(T_prjlvalue, nestPtr, jl_array_len(*closure_types))); + Value *runtime_dt = ctx.builder.CreateAlignedLoad(T_prjlvalue, + ctx.builder.CreateConstGEP1_32(T_prjlvalue, nestPtr, jl_array_len(*closure_types)), + sizeof(void*)); BasicBlock *boxedBB = BasicBlock::Create(jl_LLVMContext, "isboxed", cw); BasicBlock *loadBB = BasicBlock::Create(jl_LLVMContext, "need-load", cw); BasicBlock *unboxedBB = BasicBlock::Create(jl_LLVMContext, "maybe-unboxed", cw); @@ -4772,7 +4790,7 @@ static Function* gen_cfun_wrapper( ctx.builder.CreateBitCast(val, T_pjlvalue)); ctx.builder.CreateCondBr(isrtany, isanyBB, unboxedBB); ctx.builder.SetInsertPoint(isanyBB); - Value *p2 = ctx.builder.CreateLoad(T_prjlvalue, ctx.builder.CreateBitCast(val, T_pprjlvalue)); + Value *p2 = ctx.builder.CreateAlignedLoad(T_prjlvalue, ctx.builder.CreateBitCast(val, T_pprjlvalue), sizeof(void*)); ctx.builder.CreateBr(afterBB); ctx.builder.SetInsertPoint(unboxedBB); Value *p3 = emit_new_bits(ctx, runtime_dt, val); @@ -4820,8 +4838,9 @@ static Function* gen_cfun_wrapper( if (!*closure_types) *closure_types = jl_alloc_vec_any(0); jl_array_ptr_1d_push(*closure_types, jargty); - Value *runtime_dt = ctx.builder.CreateLoad(T_prjlvalue, - ctx.builder.CreateConstGEP1_32(T_prjlvalue, nestPtr, jl_array_len(*closure_types))); + Value *runtime_dt = ctx.builder.CreateAlignedLoad(T_prjlvalue, + ctx.builder.CreateConstGEP1_32(T_prjlvalue, nestPtr, jl_array_len(*closure_types)), + sizeof(void*)); Value *strct = box_ccall_result(ctx, val, runtime_dt, jargty); inputarg = mark_julia_type(ctx, strct, true, jargty_proper); } @@ -5345,7 +5364,10 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret } else { Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, argArray, i - 1); - theArg = maybe_mark_load_dereferenceable(ctx.builder.CreateLoad(T_prjlvalue, argPtr), false, ty); + theArg = maybe_mark_load_dereferenceable( + ctx.builder.CreateAlignedLoad(T_prjlvalue, argPtr, sizeof(void*)), + false, + ty); } if (!isboxed) { theArg = decay_derived(emit_bitcast(ctx, theArg, PointerType::get(lty, 0))); @@ -5365,7 +5387,9 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret if (retarg == 0) theArg = funcArg; else - theArg = ctx.builder.CreateLoad(T_prjlvalue, ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, argArray, retarg - 1)); + theArg = ctx.builder.CreateAlignedLoad(T_prjlvalue, + ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, argArray, retarg - 1), + sizeof(void*)); retval = mark_julia_type(ctx, theArg, true, jl_any_type); } else { @@ -5915,7 +5939,7 @@ static std::pair, jl_llvm_functions_t> Value *last_age = NULL; if (toplevel) { emit_last_age_field(ctx); - last_age = tbaa_decorate(tbaa_gcframe, ctx.builder.CreateLoad(ctx.world_age_field)); + last_age = tbaa_decorate(tbaa_gcframe, ctx.builder.CreateAlignedLoad(ctx.world_age_field, sizeof(size_t))); } // step 8. allocate local variables slots @@ -6097,8 +6121,9 @@ static std::pair, jl_llvm_functions_t> } else { Value *argPtr = ctx.builder.CreateInBoundsGEP(T_prjlvalue, argArray, ConstantInt::get(T_size, i-1)); - auto load = maybe_mark_load_dereferenceable(ctx.builder.CreateLoad(T_prjlvalue, argPtr), - false, vi.value.typ); + Value *load = maybe_mark_load_dereferenceable( + ctx.builder.CreateAlignedLoad(T_prjlvalue, argPtr, sizeof(void*)), + false, vi.value.typ); theArg = mark_julia_type(ctx, load, true, vi.value.typ); if (ctx.debug_enabled && vi.dinfo && !vi.boxroot && !vi.value.V) { SmallVector addr; diff --git a/src/gf.c b/src/gf.c index 9dfb028e7a7707..fa238157eb555f 100644 --- a/src/gf.c +++ b/src/gf.c @@ -24,7 +24,7 @@ extern "C" { #endif -JL_DLLEXPORT size_t jl_world_counter = 1; +JL_DLLEXPORT size_t jl_world_counter = 1; // TODO: should this be atomic release/consume? JL_DLLEXPORT size_t jl_get_world_counter(void) { return jl_world_counter; diff --git a/src/julia.h b/src/julia.h index e30380dc9046b5..02a1533985290a 100644 --- a/src/julia.h +++ b/src/julia.h @@ -120,7 +120,7 @@ static inline void jl_set_typeof(void *v, void *t) JL_NOTSAFEPOINT { // Do not call this on a value that is already initialized. jl_taggedvalue_t *tag = jl_astaggedvalue(v); - tag->type = (jl_value_t*)t; + jl_atomic_store_relaxed(&tag->type, (jl_value_t*)t); } #define jl_typeis(v,t) (jl_typeof(v)==(jl_value_t*)(t)) diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index 605cae027db68d..f17298efb8dc33 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -1276,16 +1276,22 @@ void Optimizer::splitOnStack(CallInst *orig_inst) assert(slot.offset <= offset && slot.offset + slot.size >= offset); IRBuilder<> builder(load); Value *val; - auto load_ty = load->getType(); + Type *load_ty = load->getType(); + LoadInst *newload; if (slot.isref) { assert(slot.offset == offset); - val = builder.CreateLoad(pass.T_prjlvalue, slot.slot); + newload = builder.CreateLoad(pass.T_prjlvalue, slot.slot); // Assume the addrspace is correct. - val = builder.CreateBitCast(val, load_ty); + val = builder.CreateBitCast(newload, load_ty); } else { - val = builder.CreateLoad(load_ty, slot_gep(slot, offset, load_ty, builder)); + newload = builder.CreateLoad(load_ty, slot_gep(slot, offset, load_ty, builder)); + val = newload; } + // TODO: should we use `load->clone()`, or manually copy any other metadata? + newload->setAlignment(load->getAlignment()); + // since we're moving heap-to-stack, it is safe to downgrade the atomic level to NotAtomic + newload->setOrdering(AtomicOrdering::NotAtomic); load->replaceAllUsesWith(val); load->eraseFromParent(); return; @@ -1302,6 +1308,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst) IRBuilder<> builder(store); auto store_val = store->getValueOperand(); auto store_ty = store_val->getType(); + StoreInst *newstore; if (slot.isref) { assert(slot.offset == offset); if (!isa(store_ty)) { @@ -1316,11 +1323,15 @@ void Optimizer::splitOnStack(CallInst *orig_inst) } if (cast(store_ty)->getAddressSpace() != AddressSpace::Tracked) store_val = builder.CreateAddrSpaceCast(store_val, pass.T_prjlvalue); - builder.CreateStore(store_val, slot.slot); + newstore = builder.CreateStore(store_val, slot.slot); } else { - builder.CreateStore(store_val, slot_gep(slot, offset, store_ty, builder)); + newstore = builder.CreateStore(store_val, slot_gep(slot, offset, store_ty, builder)); } + // TODO: should we use `store->clone()`, or manually copy any other metadata? + newstore->setAlignment(store->getAlignment()); + // since we're moving heap-to-stack, it is safe to downgrade the atomic level to NotAtomic + newstore->setOrdering(AtomicOrdering::NotAtomic); store->eraseFromParent(); return; } @@ -1352,7 +1363,8 @@ void Optimizer::splitOnStack(CallInst *orig_inst) val = ConstantExpr::getIntToPtr(val, pass.T_pjlvalue); ptr = ConstantExpr::getAddrSpaceCast(val, pass.T_prjlvalue); } - builder.CreateStore(ptr, slot.slot); + StoreInst *store = builder.CreateAlignedStore(ptr, slot.slot, sizeof(void*)); + store->setOrdering(AtomicOrdering::NotAtomic); continue; } auto ptr8 = builder.CreateBitCast(slot.slot, pass.T_pint8); @@ -1361,6 +1373,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst) offset - slot.offset); auto sub_size = std::min(slot.offset + slot.size, offset + size) - std::max(offset, slot.offset); + // TODO: alignment computation #if JL_LLVM_VERSION >= 100000 builder.CreateMemSet(ptr8, val_arg, sub_size, MaybeAlign(0)); #else @@ -1394,7 +1407,10 @@ void Optimizer::splitOnStack(CallInst *orig_inst) for (auto &slot: slots) { if (!slot.isref) continue; - operands.push_back(builder.CreateLoad(pass.T_prjlvalue, slot.slot)); + LoadInst *ref = builder.CreateAlignedLoad(pass.T_prjlvalue, slot.slot, sizeof(void*)); + // since we're moving heap-to-stack, it is safe to downgrade the atomic level to NotAtomic + ref->setOrdering(AtomicOrdering::NotAtomic); + operands.push_back(ref); } auto new_call = builder.CreateCall(pass.gc_preserve_begin, operands); new_call->takeName(call); @@ -1422,7 +1438,10 @@ void Optimizer::splitOnStack(CallInst *orig_inst) for (auto &slot: slots) { if (!slot.isref) continue; - operands.push_back(builder.CreateLoad(pass.T_prjlvalue, slot.slot)); + LoadInst *ref = builder.CreateAlignedLoad(pass.T_prjlvalue, slot.slot, sizeof(void*)); + // since we're moving heap-to-stack, it is safe to downgrade the atomic level to NotAtomic + ref->setOrdering(AtomicOrdering::NotAtomic); + operands.push_back(ref); } bundle = OperandBundleDef("jl_roots", std::move(operands)); break; diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 2bf41bbba7eb20..ad4cdf0b4d1ede 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -104,22 +104,25 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F) IRBuilder<> builder(target->getContext()); builder.SetInsertPoint(&*(++BasicBlock::iterator(target))); - Instruction *inst = - builder.CreateStore( - ConstantInt::get(T_size, JL_GC_ENCODE_PUSHARGS(nRoots)), - builder.CreateBitCast( - builder.CreateConstGEP1_32(gcframe, 0), - T_size->getPointerTo())); + StoreInst *inst = builder.CreateAlignedStore( + ConstantInt::get(T_size, JL_GC_ENCODE_PUSHARGS(nRoots)), + builder.CreateBitCast( + builder.CreateConstGEP1_32(gcframe, 0), + T_size->getPointerTo()), + sizeof(void*)); inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); Value *pgcstack = builder.Insert(getPgcstack(ptlsStates)); - inst = builder.CreateStore( - builder.CreateLoad(pgcstack), - builder.CreatePointerCast( - builder.CreateConstGEP1_32(gcframe, 1), - PointerType::get(T_ppjlvalue, 0))); + inst = builder.CreateAlignedStore( + builder.CreateAlignedLoad(pgcstack, sizeof(void*)), + builder.CreatePointerCast( + builder.CreateConstGEP1_32(gcframe, 1), + PointerType::get(T_ppjlvalue, 0)), + sizeof(void*)); inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); - builder.CreateStore(gcframe, builder.CreateBitCast(pgcstack, - PointerType::get(PointerType::get(T_prjlvalue, 0), 0))); + inst = builder.CreateAlignedStore( + gcframe, + builder.CreateBitCast(pgcstack, PointerType::get(PointerType::get(T_prjlvalue, 0), 0)), + sizeof(void*)); } void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F) @@ -131,13 +134,14 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F) builder.SetInsertPoint(target); Instruction *gcpop = cast(builder.CreateConstGEP1_32(gcframe, 1)); - Instruction *inst = builder.CreateLoad(gcpop); + Instruction *inst = builder.CreateAlignedLoad(gcpop, sizeof(void*)); inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); - inst = builder.CreateStore( + inst = builder.CreateAlignedStore( inst, builder.CreateBitCast( builder.Insert(getPgcstack(ptlsStates)), - PointerType::get(T_prjlvalue, 0))); + PointerType::get(T_prjlvalue, 0)), + sizeof(void*)); inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); } diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 9b4c283827f2ba..d078d70c8d0d09 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1560,8 +1560,13 @@ static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef IdxList[j + 1] = ConstantInt::get(T_int32, Idxs[j]); } Value *GEP = irbuilder.CreateGEP(VTy, V, IdxList); - V = irbuilder.CreateLoad(GEP); - } else if (isa(V->getType())) { + Type *T = GetElementPtrInst::getIndexedType(VTy, IdxList); + assert(T->isPointerTy()); + V = irbuilder.CreateAlignedLoad(T, GEP, sizeof(void*)); + // since we're doing stack operations, it should be safe do this non-atomically + cast(V)->setOrdering(AtomicOrdering::NotAtomic); + } + else if (isa(V->getType())) { assert(Idxs.empty()); } else if (!Idxs.empty()) { @@ -1600,9 +1605,10 @@ unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, IRBuilde auto Ptrs = ExtractTrackedValues(Src, STy, isptr, irbuilder); for (unsigned i = 0; i < Ptrs.size(); ++i) { Value *Elem = Ptrs[i]; + assert(Elem->getType()->isPointerTy()); Value *Slot = irbuilder.CreateConstInBoundsGEP1_32(Elem->getType(), Dst, i); - Value *shadowStore = irbuilder.CreateStore(Elem, Slot); - (void)shadowStore; + StoreInst *shadowStore = irbuilder.CreateAlignedStore(Elem, Slot, sizeof(void*)); + shadowStore->setOrdering(AtomicOrdering::NotAtomic); // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); } return Ptrs.size(); @@ -1975,7 +1981,8 @@ Value *LateLowerGCFrame::EmitTagPtr(IRBuilder<> &builder, Type *T, Value *V) Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Value *V) { auto addr = EmitTagPtr(builder, T_size, V); - auto load = builder.CreateLoad(T_size, addr); + LoadInst *load = builder.CreateAlignedLoad(T_size, addr, sizeof(size_t)); + load->setOrdering(AtomicOrdering::Unordered); load->setMetadata(LLVMContext::MD_tbaa, tbaa_tag); MDBuilder MDB(load->getContext()); auto *NullInt = ConstantInt::get(T_size, 0); @@ -2099,9 +2106,11 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) { newI->takeName(CI); // Set the tag. - auto store = builder.CreateStore( + StoreInst *store = builder.CreateAlignedStore( CI->getArgOperand(2), - EmitTagPtr(builder, T_prjlvalue, newI)); + EmitTagPtr(builder, T_prjlvalue, newI), + sizeof(size_t)); + store->setOrdering(AtomicOrdering::Unordered); store->setMetadata(LLVMContext::MD_tbaa, tbaa_tag); // Replace uses of the call to `julia.gc_alloc_obj` with the call to @@ -2150,8 +2159,9 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) { int slot = 0; IRBuilder<> Builder (CI); for (; arg_it != CI->arg_end(); ++arg_it) { - Builder.CreateStore(*arg_it, Builder.CreateGEP(T_prjlvalue, Frame, - ConstantInt::get(T_int32, slot++))); + Builder.CreateAlignedStore(*arg_it, + Builder.CreateGEP(T_prjlvalue, Frame, ConstantInt::get(T_int32, slot++)), + sizeof(void*)); } ReplacementArgs.push_back(nframeargs == 0 ? (llvm::Value*)ConstantPointerNull::get(T_pprjlvalue) : diff --git a/test/llvmpasses/alloc-opt.jl b/test/llvmpasses/alloc-opt.jl index b9122ba6817c92..4a41fb7a5539fe 100644 --- a/test/llvmpasses/alloc-opt.jl +++ b/test/llvmpasses/alloc-opt.jl @@ -11,7 +11,7 @@ println(""" # CHECK-LABEL: @return_obj # CHECK-NOT: @julia.gc_alloc_obj # CHECK: %v = call noalias nonnull {} addrspace(10)* @jl_gc_pool_alloc -# CHECK: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, !tbaa !0 +# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !0 println(""" define {} addrspace(10)* @return_obj() { %ptls = call {}*** @julia.ptls_states() @@ -48,7 +48,7 @@ define i64 @return_load(i64 %i) { # CHECK: call {}*** @julia.ptls_states() # CHECK-NOT: @julia.gc_alloc_obj # CHECK: @jl_gc_pool_alloc -# CHECK: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, !tbaa !0 +# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !0 println(""" define void @ccall_obj(i8* %fptr) { %ptls = call {}*** @julia.ptls_states() @@ -90,7 +90,7 @@ define void @ccall_ptr(i8* %fptr) { # CHECK: call {}*** @julia.ptls_states() # CHECK-NOT: @julia.gc_alloc_obj # CHECK: @jl_gc_pool_alloc -# CHECK: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, !tbaa !0 +# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !0 println(""" define void @ccall_unknown_bundle(i8* %fptr) { %ptls = call {}*** @julia.ptls_states() @@ -152,7 +152,7 @@ L3: # CHECK: call {}*** @julia.ptls_states() # CHECK-NOT: @julia.gc_alloc_obj # CHECK-NOT: @jl_gc_pool_alloc -# CHECK-NOT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, !tbaa !0 +# CHECK-NOT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, align 8, !tbaa !0 println(""" define void @object_field({} addrspace(10)* %field) { %ptls = call {}*** @julia.ptls_states() @@ -160,7 +160,7 @@ define void @object_field({} addrspace(10)* %field) { %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag) %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)* %vab = bitcast {} addrspace(11)* %va to {} addrspace(10)* addrspace(11)* - store {} addrspace(10)* %field, {} addrspace(10)* addrspace(11)* %vab + store {} addrspace(10)* %field, {} addrspace(10)* addrspace(11)* %vab, align 8 ret void } """) diff --git a/test/llvmpasses/final-lower-gc.ll b/test/llvmpasses/final-lower-gc.ll index 0570370dd83f13..8d3113b1cc8e4b 100644 --- a/test/llvmpasses/final-lower-gc.ll +++ b/test/llvmpasses/final-lower-gc.ll @@ -25,30 +25,30 @@ top: %ptls = call {}*** @julia.ptls_states() ; CHECK-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 0 ; CHECK-DAG: [[GCFRAME_SIZE_PTR2:%.*]] = bitcast {} addrspace(10)** [[GCFRAME_SIZE_PTR]] to i64* -; CHECK-DAG: store i64 8, i64* [[GCFRAME_SIZE_PTR2]], !tbaa !0 +; CHECK-DAG: store i64 8, i64* [[GCFRAME_SIZE_PTR2]], align 8, !tbaa !0 ; CHECK-DAG: [[GCFRAME_SLOT:%.*]] = getelementptr {}**, {}*** %ptls, i32 0 ; CHECK-DAG: [[PREV_GCFRAME_PTR:%.*]] = getelementptr {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1 ; CHECK-DAG: [[PREV_GCFRAME_PTR2:%.*]] = bitcast {} addrspace(10)** [[PREV_GCFRAME_PTR]] to {}*** -; CHECK-DAG: [[PREV_GCFRAME:%.*]] = load {}**, {}*** [[GCFRAME_SLOT]] -; CHECK-DAG: store {}** [[PREV_GCFRAME]], {}*** [[PREV_GCFRAME_PTR2]], !tbaa !0 +; CHECK-DAG: [[PREV_GCFRAME:%.*]] = load {}**, {}*** [[GCFRAME_SLOT]], align 8 +; CHECK-DAG: store {}** [[PREV_GCFRAME]], {}*** [[PREV_GCFRAME_PTR2]], align 8, !tbaa !0 ; CHECK-DAG: [[GCFRAME_SLOT2:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)*** -; CHECK-NEXT: store {} addrspace(10)** %gcframe, {} addrspace(10)*** [[GCFRAME_SLOT2]] +; CHECK-NEXT: store {} addrspace(10)** %gcframe, {} addrspace(10)*** [[GCFRAME_SLOT2]], align 8 call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2) %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) ; CHECK: %frame_slot_1 = getelementptr {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 3 %frame_slot_1 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1) - store {} addrspace(10)* %aboxed, {} addrspace(10)** %frame_slot_1 + store {} addrspace(10)* %aboxed, {} addrspace(10)** %frame_slot_1, align 8 %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b) ; CHECK: %frame_slot_2 = getelementptr {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 %frame_slot_2 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0) - store {} addrspace(10)* %bboxed, {} addrspace(10)** %frame_slot_2 + store {} addrspace(10)* %bboxed, {} addrspace(10)** %frame_slot_2, align 8 ; CHECK: call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed) call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed) ; CHECK-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1 -; CHECK-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load {} addrspace(10)*, {} addrspace(10)** [[PREV_GCFRAME_PTR3]], !tbaa !0 +; CHECK-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load {} addrspace(10)*, {} addrspace(10)** [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0 ; CHECK-NEXT: [[GCFRAME_SLOT3:%.*]] = getelementptr {}**, {}*** %ptls, i32 0 ; CHECK-NEXT: [[GCFRAME_SLOT4:%.*]] = bitcast {}*** [[GCFRAME_SLOT3]] to {} addrspace(10)** -; CHECK-NEXT: store {} addrspace(10)* [[PREV_GCFRAME_PTR4]], {} addrspace(10)** [[GCFRAME_SLOT4]], !tbaa !0 +; CHECK-NEXT: store {} addrspace(10)* [[PREV_GCFRAME_PTR4]], {} addrspace(10)** [[GCFRAME_SLOT4]], align 8, !tbaa !0 call void @julia.pop_gc_frame({} addrspace(10)** %gcframe) ; CHECK-NEXT: ret void ret void @@ -63,7 +63,7 @@ top: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 8) %0 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* %1 = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %0, i64 -1 - store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* %1, !tbaa !0 + store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* %1, align 8, !tbaa !0 ret {} addrspace(10)* %v } diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll index a12ba9e74df62e..ae2dfc9c5794c6 100644 --- a/test/llvmpasses/late-lower-gc.ll +++ b/test/llvmpasses/late-lower-gc.ll @@ -41,7 +41,7 @@ top: ; CHECK: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, [[SIZE_T:i.[0-9]+]] 8) ; CHECK-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* ; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1 -; CHECK-NEXT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]], !tbaa !0 +; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !0 %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag) ; CHECK-NEXT: ret {} addrspace(10)* %v ret {} addrspace(10)* %v @@ -60,7 +60,7 @@ top: ; CHECK: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, [[SIZE_T:i.[0-9]+]] 8) ; CHECK-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* ; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1 -; CHECK-NEXT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]], !tbaa !0 +; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !0 %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag) ; CHECK-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)* %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*