From 97ffa7fce9df65944244afa47fbcd068220fc89f Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Fri, 29 Apr 2022 17:34:27 -0400 Subject: [PATCH] codegen: add handling for undefined phinode values The optimization pass often uses values for phi values (and thus by extension, also for pi, phic and upsilon values) that are invalid. We make sure that these have a null pointer, so that we can detect that case at runtime (at the cost of slightly worse code generation for them), but it means we need to be very careful to check for that. This is identical to #39747, which added the equivalent code to the other side of the conditional there, but missed some additional relevant, but rare, cases that are observed to be possible. The `emit_isa_and_defined` is derived from the LLVM name for this operation: `isa_and_nonnull`. Secondly, we also optimize `emit_unionmove` to change a bad IR case to a better IR form. Fix #44501 --- src/cgutils.cpp | 120 ++++++++++++++++++---------------- src/codegen.cpp | 30 +++++---- src/llvm-late-gc-lowering.cpp | 3 +- 3 files changed, 83 insertions(+), 70 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 6f346b32728b3..655fe33eff294 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -847,7 +847,7 @@ static bool is_uniontype_allunboxed(jl_value_t *typ) return for_each_uniontype_small([&](unsigned, jl_datatype_t*) {}, typ, counter); } -static Value *emit_typeof_boxed(jl_codectx_t &ctx, const jl_cgval_t &p); +static Value *emit_typeof_boxed(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull=false); static unsigned get_box_tindex(jl_datatype_t *jt, jl_value_t *ut) { @@ -902,16 +902,9 @@ static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, ssize_t n, MDNo } static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v); +static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull); -// Returns ctx.types().T_prjlvalue -static Value *emit_typeof(jl_codectx_t &ctx, Value *tt) -{ - ++EmittedTypeof; - assert(tt != NULL && !isa(tt) && "expected a conditionally boxed value"); - return ctx.builder.CreateCall(prepare_call(jl_typeof_func), {tt}); -} - -static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p) +static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull) { // given p, compute its type if (p.constant) @@ -924,7 +917,7 @@ static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p) return mark_julia_const(ctx, jl_typeof(tp)); } } - return mark_julia_type(ctx, emit_typeof(ctx, p.V), true, jl_datatype_type); + return mark_julia_type(ctx, emit_typeof(ctx, p.V, maybenull), true, jl_datatype_type); } if (p.TIndex) { Value *tindex = ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f)); @@ -959,7 +952,7 @@ static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p) BasicBlock *mergeBB = BasicBlock::Create(ctx.builder.getContext(), "merge", ctx.f); ctx.builder.CreateCondBr(isnull, boxBB, unboxBB); ctx.builder.SetInsertPoint(boxBB); - auto boxTy = emit_typeof(ctx, p.Vboxed); + auto boxTy = emit_typeof(ctx, p.Vboxed, maybenull); ctx.builder.CreateBr(mergeBB); boxBB = ctx.builder.GetInsertBlock(); // could have changed ctx.builder.SetInsertPoint(unboxBB); @@ -981,9 +974,9 @@ static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p) } // Returns ctx.types().T_prjlvalue -static Value *emit_typeof_boxed(jl_codectx_t &ctx, const jl_cgval_t &p) +static Value *emit_typeof_boxed(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull) { - return boxed(ctx, emit_typeof(ctx, p)); + return boxed(ctx, emit_typeof(ctx, p, maybenull)); } static Value *emit_datatype_types(jl_codectx_t &ctx, Value *dt) @@ -1224,6 +1217,24 @@ static Value *emit_nullcheck_guard2(jl_codectx_t &ctx, Value *nullcheck1, }); } +// Returns typeof(v), or null if v is a null pointer at run time and maybenull is true. +// This is used when the value might have come from an undefined value (a PhiNode), +// yet we try to read its type to compute a union index when moving the value (a PiNode). +// Returns a ctx.types().T_prjlvalue typed Value +static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull) +{ + ++EmittedTypeof; + assert(v != NULL && !isa(v) && "expected a conditionally boxed value"); + Function *typeof = prepare_call(jl_typeof_func); + if (maybenull) + return emit_guarded_test(ctx, null_pointer_cmp(ctx, v), Constant::getNullValue(typeof->getReturnType()), [&] { + // e.g. emit_typeof(ctx, v) + return ctx.builder.CreateCall(typeof, {v}); + }); + return ctx.builder.CreateCall(typeof, {v}); +} + + static void emit_type_error(jl_codectx_t &ctx, const jl_cgval_t &x, Value *type, const std::string &msg) { Value *msg_val = stringConstPtr(ctx.emission_context, ctx.builder, msg); @@ -1353,7 +1364,7 @@ static std::pair emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x, BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_isa", ctx.f); ctx.builder.CreateCondBr(isboxed, isaBB, postBB); ctx.builder.SetInsertPoint(isaBB); - Value *istype_boxed = ctx.builder.CreateICmpEQ(emit_typeof(ctx, x.Vboxed), + Value *istype_boxed = ctx.builder.CreateICmpEQ(emit_typeof(ctx, x.Vboxed, false), track_pjlvalue(ctx, literal_pointer_val(ctx, intersected_type))); ctx.builder.CreateBr(postBB); isaBB = ctx.builder.GetInsertBlock(); // could have changed @@ -1409,6 +1420,20 @@ static std::pair emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0)), false); } +// If this might have been sourced from a PhiNode object, it is possible our +// Vboxed pointer itself is null (undef) at runtime even if we thought we should +// know exactly the type of the bytes that should have been inside. +// +// n.b. It is also possible the value is a ghost of some sort, and we will +// declare that the pointer is legal (for zero bytes) even though it might be undef. +static Value *emit_isa_and_defined(jl_codectx_t &ctx, const jl_cgval_t &val, jl_value_t *typ) +{ + return emit_nullcheck_guard(ctx, val.ispointer() ? val.V : nullptr, [&] { + return emit_isa(ctx, val, typ, nullptr).first; + }); +} + + static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type, const std::string &msg) { Value *istype; @@ -3005,42 +3030,16 @@ static Value *compute_box_tindex(jl_codectx_t &ctx, Value *datatype, jl_value_t return tindex; } -// Returns typeof(v), or null if v is a null pointer at run time. -// This is used when the value might have come from an undefined variable, -// yet we try to read its type to compute a union index when moving the value. -static Value *emit_typeof_or_null(jl_codectx_t &ctx, Value *v) -{ - BasicBlock *nonnull = BasicBlock::Create(ctx.builder.getContext(), "nonnull", ctx.f); - BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "postnull", ctx.f); - Value *isnull = ctx.builder.CreateICmpEQ(v, Constant::getNullValue(v->getType())); - ctx.builder.CreateCondBr(isnull, postBB, nonnull); - BasicBlock *entry = ctx.builder.GetInsertBlock(); - ctx.builder.SetInsertPoint(nonnull); - Value *typof = emit_typeof(ctx, v); - ctx.builder.CreateBr(postBB); - nonnull = ctx.builder.GetInsertBlock(); // could have changed - ctx.builder.SetInsertPoint(postBB); - PHINode *ti = ctx.builder.CreatePHI(typof->getType(), 2); - ti->addIncoming(Constant::getNullValue(typof->getType()), entry); - ti->addIncoming(typof, nonnull); - return ti; -} - // get the runtime tindex value, assuming val is already converted to type typ if it has a TIndex -static Value *compute_tindex_unboxed(jl_codectx_t &ctx, const jl_cgval_t &val, jl_value_t *typ) +static Value *compute_tindex_unboxed(jl_codectx_t &ctx, const jl_cgval_t &val, jl_value_t *typ, bool maybenull=false) { if (val.typ == jl_bottom_type) return UndefValue::get(getInt8Ty(ctx.builder.getContext())); if (val.constant) return ConstantInt::get(getInt8Ty(ctx.builder.getContext()), get_box_tindex((jl_datatype_t*)jl_typeof(val.constant), typ)); - if (val.TIndex) return ctx.builder.CreateAnd(val.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f)); - Value *typof; - if (val.isboxed && !jl_is_concrete_type(val.typ) && !jl_is_type_type(val.typ)) - typof = emit_typeof_or_null(ctx, val.V); - else - typof = emit_typeof_boxed(ctx, val); + Value *typof = emit_typeof_boxed(ctx, val, maybenull); return compute_box_tindex(ctx, typof, val.typ, typ); } @@ -3222,14 +3221,17 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con Value *src_ptr = data_pointer(ctx, src); unsigned nb = jl_datatype_size(typ); unsigned alignment = julia_alignment(typ); - Value *nbytes = ConstantInt::get(getSizeTy(ctx.builder.getContext()), nb); - if (skip) { - // TODO: this Select is very bad for performance, but is necessary to work around LLVM bugs with the undef option that we want to use: - // select copy dest -> dest to simulate an undef value / conditional copy - // src_ptr = ctx.builder.CreateSelect(skip, dest, src_ptr); - nbytes = ctx.builder.CreateSelect(skip, Constant::getNullValue(getSizeTy(ctx.builder.getContext())), nbytes); - } - emit_memcpy(ctx, dest, tbaa_dst, src_ptr, src.tbaa, nbytes, alignment, isVolatile); + // TODO: this branch may be bad for performance, but is necessary to work around LLVM bugs with the undef option that we want to use: + // select copy dest -> dest to simulate an undef value / conditional copy + // if (skip) src_ptr = ctx.builder.CreateSelect(skip, dest, src_ptr); + auto f = [&] { + (void)emit_memcpy(ctx, dest, tbaa_dst, src_ptr, src.tbaa, nb, alignment, isVolatile); + return nullptr; + }; + if (skip) + emit_guarded_test(ctx, skip, nullptr, f); + else + f(); } } } @@ -3282,12 +3284,16 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con } else { assert(src.isboxed && "expected boxed value for sizeof/alignment computation"); - Value *datatype = emit_typeof_boxed(ctx, src); - Value *copy_bytes = emit_datatype_size(ctx, datatype); - if (skip) { - copy_bytes = ctx.builder.CreateSelect(skip, ConstantInt::get(copy_bytes->getType(), 0), copy_bytes); - } - emit_memcpy(ctx, dest, tbaa_dst, src, copy_bytes, /*TODO: min-align*/1, isVolatile); + auto f = [&] { + Value *datatype = emit_typeof_boxed(ctx, src); + Value *copy_bytes = emit_datatype_size(ctx, datatype); + emit_memcpy(ctx, dest, tbaa_dst, src, copy_bytes, /*TODO: min-align*/1, isVolatile); + return nullptr; + }; + if (skip) + emit_guarded_test(ctx, skip, nullptr, f); + else + f(); } } diff --git a/src/codegen.cpp b/src/codegen.cpp index fa7485a8448af..a95631962e93d 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1816,7 +1816,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t & if (!union_isaBB) { union_isaBB = BasicBlock::Create(ctx.builder.getContext(), "union_isa", ctx.f); ctx.builder.SetInsertPoint(union_isaBB); - union_box_dt = emit_typeof_or_null(ctx, v.Vboxed); + union_box_dt = emit_typeof(ctx, v.Vboxed, skip != NULL); post_union_isaBB = ctx.builder.GetInsertBlock(); } }; @@ -1915,7 +1915,6 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_ return ghostValue(ctx, typ); Value *new_tindex = NULL; if (jl_is_concrete_type(typ)) { - assert(skip == nullptr && "skip only valid for union type return"); if (v.TIndex && !jl_is_pointerfree(typ)) { // discovered that this union-split type must actually be isboxed if (v.Vboxed) { @@ -1923,14 +1922,20 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_ } else { // type mismatch: there weren't any boxed values in the union - CreateTrap(ctx.builder); + if (skip) + *skip = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1); + else + CreateTrap(ctx.builder); return jl_cgval_t(ctx.builder.getContext()); } } if (jl_is_concrete_type(v.typ) && !jl_is_kind(v.typ)) { if (jl_is_concrete_type(typ) && !jl_is_kind(typ)) { // type mismatch: changing from one leaftype to another - CreateTrap(ctx.builder); + if (skip) + *skip = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1); + else + CreateTrap(ctx.builder); return jl_cgval_t(ctx.builder.getContext()); } } @@ -2938,7 +2943,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, } else if (f == jl_builtin_typeof && nargs == 1) { - *ret = emit_typeof(ctx, argv[1]); + *ret = emit_typeof(ctx, argv[1], false); return true; } @@ -4129,7 +4134,7 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i) ctx.spvals_ptr, i + sizeof(jl_svec_t) / sizeof(jl_value_t*)); Value *sp = tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)))); - Value *isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp), + Value *isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp, false), track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jl_tvar_type))); jl_unionall_t *sparam = (jl_unionall_t*)ctx.linfo->def.method->sig; for (size_t j = 0; j < i; j++) { @@ -4204,7 +4209,7 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym) ctx.spvals_ptr, i + sizeof(jl_svec_t) / sizeof(jl_value_t*)); Value *sp = tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)))); - isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp), + isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp, false), track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jl_tvar_type))); } else { @@ -4378,7 +4383,7 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu } } else { - emit_unionmove(ctx, vi.value.V, ctx.tbaa().tbaa_stack, rval_info, isboxed, vi.isVolatile); + emit_unionmove(ctx, vi.value.V, ctx.tbaa().tbaa_stack, rval_info, /*skip*/isboxed, vi.isVolatile); } } } @@ -4840,7 +4845,8 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval) jl_error("GotoIfNot in value position"); } if (jl_is_pinode(expr)) { - return convert_julia_type(ctx, emit_expr(ctx, jl_fieldref_noalloc(expr, 0)), jl_fieldref_noalloc(expr, 1)); + Value *skip = NULL; + return convert_julia_type(ctx, emit_expr(ctx, jl_fieldref_noalloc(expr, 0)), jl_fieldref_noalloc(expr, 1), &skip); } if (!jl_is_expr(expr)) { int needroot = true; @@ -7717,7 +7723,7 @@ static jl_llvm_functions_t else { // must be careful to emit undef here (rather than a bitcast or // load of val) if the runtime type of val isn't phiType - Value *isvalid = emit_isa(ctx, val, phiType, NULL).first; + Value *isvalid = emit_isa_and_defined(ctx, val, phiType); V = emit_guarded_test(ctx, isvalid, undef_value_for_type(VN->getType()), [&] { return emit_unbox(ctx, VN->getType(), val, phiType); }); @@ -7729,7 +7735,7 @@ static jl_llvm_functions_t // must be careful to emit undef here (rather than a bitcast or // load of val) if the runtime type of val isn't phiType assert(lty != ctx.types().T_prjlvalue); - Value *isvalid = emit_isa(ctx, val, phiType, NULL).first; + Value *isvalid = emit_isa_and_defined(ctx, val, phiType); emit_guarded_test(ctx, isvalid, nullptr, [&] { (void)emit_unbox(ctx, lty, val, phiType, maybe_decay_tracked(ctx, dest), ctx.tbaa().tbaa_stack); return nullptr; @@ -7771,7 +7777,7 @@ static jl_llvm_functions_t RTindex = new_union.TIndex; if (!RTindex) { assert(new_union.isboxed && new_union.Vboxed && "convert_julia_type failed"); - RTindex = compute_tindex_unboxed(ctx, new_union, phiType); + RTindex = compute_tindex_unboxed(ctx, new_union, phiType, true); if (dest) { // If dest is not set, this is a ghost union, the recipient of which // is often not prepared to handle a boxed representation of the ghost. diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index cf4d771f02a89..335d0803b8638 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1071,8 +1071,9 @@ void RecursivelyVisit(callback f, Value *V) { if (isa(TheUser)) f(VU); if (isa(TheUser) || isa(TheUser) || - isa(TheUser) || isa(TheUser) || + isa(TheUser) || isa(TheUser) || // TODO: should these be removed from this list? isa(TheUser) || isa(TheUser) || + isa(TheUser) || // ICmpEQ/ICmpNE can be used with ptr types isa(TheUser) || isa(TheUser)) continue; if (isa(TheUser) || isa(TheUser) || isa(TheUser)) {