Skip to content

Commit

Permalink
Merge branch 'v1.9.2+RAI' into nhd-snapshot-4-streaming
Browse files Browse the repository at this point in the history
  • Loading branch information
NHDaly authored Oct 13, 2023
2 parents 1dc3bcf + 3f99734 commit fca5304
Show file tree
Hide file tree
Showing 13 changed files with 164 additions and 52 deletions.
3 changes: 3 additions & 0 deletions base/reflection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1092,6 +1092,7 @@ struct CodegenParams
prefer_specsig::Cint
gnu_pubnames::Cint
debug_info_kind::Cint
safepoint_on_entry::Cint

lookup::Ptr{Cvoid}

Expand All @@ -1100,12 +1101,14 @@ struct CodegenParams
function CodegenParams(; track_allocations::Bool=true, code_coverage::Bool=true,
prefer_specsig::Bool=false,
gnu_pubnames=true, debug_info_kind::Cint = default_debug_info_kind(),
safepoint_on_entry::Bool=true,
lookup::Ptr{Cvoid}=cglobal(:jl_rettype_inferred),
generic_context = nothing)
return new(
Cint(track_allocations), Cint(code_coverage),
Cint(prefer_specsig),
Cint(gnu_pubnames), debug_info_kind,
Cint(safepoint_on_entry),
lookup, generic_context)
end
end
Expand Down
1 change: 0 additions & 1 deletion src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3933,7 +3933,6 @@ static Value *emit_defer_signal(jl_codectx_t &ctx)
return ctx.builder.CreateInBoundsGEP(ctx.types().T_sigatomic, ptls, ArrayRef<Value*>(offset), "jl_defer_signal");
}


#ifndef JL_NDEBUG
static int compare_cgparams(const jl_cgparams_t *a, const jl_cgparams_t *b)
{
Expand Down
7 changes: 6 additions & 1 deletion src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1279,6 +1279,7 @@ extern "C" {
1,
#endif
(int) DICompileUnit::DebugEmissionKind::FullDebug,
1,
jl_rettype_inferred, NULL };
}

Expand Down Expand Up @@ -7805,7 +7806,11 @@ static jl_llvm_functions_t
ctx.builder.CreateAlignedStore(load_world, world_age_field, Align(sizeof(size_t)));
}

// step 11b. Do codegen in control flow order
// step 11b. Emit the entry safepoint
if (JL_FEAT_TEST(ctx, safepoint_on_entry))
emit_gc_safepoint(ctx.builder, get_current_ptls(ctx), ctx.tbaa().tbaa_const);

// step 11c. Do codegen in control flow order
std::vector<int> workstack;
std::map<int, BasicBlock*> BB;
std::map<size_t, BasicBlock*> come_from_bb;
Expand Down
48 changes: 34 additions & 14 deletions src/codegen_shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <utility>
#include <llvm/ADT/ArrayRef.h>
#include <llvm/Support/Debug.h>
#include <llvm/IR/Attributes.h>
#include <llvm/IR/DebugLoc.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/MDBuilder.h>
Expand Down Expand Up @@ -233,20 +234,39 @@ static inline void emit_signal_fence(llvm::IRBuilder<> &builder)
builder.CreateFence(AtomicOrdering::SequentiallyConsistent, SyncScope::SingleThread);
}

static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::MDNode *tbaa)
static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::MDNode *tbaa, bool final = false)
{
using namespace llvm;
llvm::Value *signal_page = get_current_signal_page_from_ptls(builder, ptls, tbaa);
emit_signal_fence(builder);
builder.CreateLoad(getSizeTy(builder.getContext()), get_current_signal_page_from_ptls(builder, ptls, tbaa), true);
Module *M = builder.GetInsertBlock()->getModule();
LLVMContext &C = builder.getContext();
// inline jlsafepoint_func->realize(M)
if (final) {
auto T_size = getSizeTy(builder.getContext());
builder.CreateLoad(T_size, signal_page, true);
}
else {
Function *F = M->getFunction("julia.safepoint");
if (!F) {
auto T_size = getSizeTy(builder.getContext());
auto T_psize = T_size->getPointerTo();
FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {T_psize}, false);
F = Function::Create(FT, Function::ExternalLinkage, "julia.safepoint", M);
F->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
}
builder.CreateCall(F, {signal_page});
}
emit_signal_fence(builder);
}

static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state, llvm::Value *old_state)
static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state, llvm::Value *old_state, bool final)
{
using namespace llvm;
Type *T_int8 = state->getType();
ptls = emit_bitcast_with_builder(builder, ptls, builder.getInt8PtrTy());
llvm::Value *ptls_i8 = emit_bitcast_with_builder(builder, ptls, builder.getInt8PtrTy());
Constant *offset = ConstantInt::getSigned(builder.getInt32Ty(), offsetof(jl_tls_states_t, gc_state));
Value *gc_state = builder.CreateInBoundsGEP(T_int8, ptls, ArrayRef<Value*>(offset), "gc_state");
Value *gc_state = builder.CreateInBoundsGEP(T_int8, ptls_i8, ArrayRef<Value*>(offset), "gc_state");
if (old_state == nullptr) {
old_state = builder.CreateLoad(T_int8, gc_state);
cast<LoadInst>(old_state)->setOrdering(AtomicOrdering::Monotonic);
Expand All @@ -266,38 +286,38 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::V
passBB, exitBB);
builder.SetInsertPoint(passBB);
MDNode *tbaa = get_tbaa_const(builder.getContext());
emit_gc_safepoint(builder, ptls, tbaa);
emit_gc_safepoint(builder, ptls, tbaa, final);
builder.CreateBr(exitBB);
builder.SetInsertPoint(exitBB);
return old_state;
}

static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Value *ptls)
static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Value *ptls, bool final)
{
using namespace llvm;
Value *state = builder.getInt8(0);
return emit_gc_state_set(builder, ptls, state, nullptr);
return emit_gc_state_set(builder, ptls, state, nullptr, final);
}

static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state)
static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state, bool final)
{
using namespace llvm;
Value *old_state = builder.getInt8(0);
return emit_gc_state_set(builder, ptls, state, old_state);
return emit_gc_state_set(builder, ptls, state, old_state, final);
}

static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Value *ptls)
static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Value *ptls, bool final)
{
using namespace llvm;
Value *state = builder.getInt8(JL_GC_STATE_SAFE);
return emit_gc_state_set(builder, ptls, state, nullptr);
return emit_gc_state_set(builder, ptls, state, nullptr, final);
}

static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state)
static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state, bool final)
{
using namespace llvm;
Value *old_state = builder.getInt8(JL_GC_STATE_SAFE);
return emit_gc_state_set(builder, ptls, state, old_state);
return emit_gc_state_set(builder, ptls, state, old_state, final);
}

// Compatibility shims for LLVM attribute APIs that were renamed in LLVM 14.
Expand Down
6 changes: 3 additions & 3 deletions src/gc-pages.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,17 @@ static uint64_t poolmem_bytes_allocated = 0;
static uint64_t poolmem_blocks_allocated_total = 0;


JL_DLLEXPORT uint64_t jl_poolmem_blocks_allocated_total()
JL_DLLEXPORT uint64_t jl_poolmem_blocks_allocated_total(void)
{
return poolmem_blocks_allocated_total;
}

JL_DLLEXPORT uint64_t jl_poolmem_bytes_allocated()
JL_DLLEXPORT uint64_t jl_poolmem_bytes_allocated(void)
{
return poolmem_bytes_allocated;
}

JL_DLLEXPORT uint64_t jl_current_pg_count()
JL_DLLEXPORT uint64_t jl_current_pg_count(void)
{
return (uint64_t)jl_atomic_load(&current_pg_count);
}
Expand Down
52 changes: 42 additions & 10 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2061,7 +2061,8 @@ STATIC_INLINE void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_v
// the first young object before starting this chunk
// (this also would be valid for young objects, but probably less beneficial)
for (; obj_begin < obj_end; obj_begin += step) {
new_obj = *obj_begin;
jl_value_t **slot = obj_begin;
new_obj = *slot;
if (new_obj != NULL) {
verify_parent2("obj array", obj_parent, obj_begin, "elem(%d)",
gc_slot_to_arrayidx(obj_parent, obj_begin));
Expand All @@ -2070,7 +2071,7 @@ STATIC_INLINE void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_v
nptr |= 1;
if (!gc_marked(o->header))
break;
gc_heap_snapshot_record_array_edge(obj_parent, &new_obj);
gc_heap_snapshot_record_array_edge(obj_parent, slot);
}
}
}
Expand All @@ -2092,12 +2093,13 @@ STATIC_INLINE void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_v
}
}
for (; obj_begin < scan_end; obj_begin += step) {
jl_value_t **slot = obj_begin;
new_obj = *obj_begin;
if (new_obj != NULL) {
verify_parent2("obj array", obj_parent, obj_begin, "elem(%d)",
gc_slot_to_arrayidx(obj_parent, obj_begin));
gc_try_claim_and_push(mq, new_obj, &nptr);
gc_heap_snapshot_record_array_edge(obj_parent, &new_obj);
gc_heap_snapshot_record_array_edge(obj_parent, slot);
}
}
if (too_big) {
Expand Down Expand Up @@ -2128,7 +2130,8 @@ STATIC_INLINE void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_va
for (; ary8_begin < ary8_end; ary8_begin += elsize) {
int early_end = 0;
for (uint8_t *pindex = elem_begin; pindex < elem_end; pindex++) {
new_obj = ary8_begin[*pindex];
jl_value_t **slot = &ary8_begin[*pindex];
new_obj = *slot;
if (new_obj != NULL) {
verify_parent2("array", ary8_parent, &new_obj, "elem(%d)",
gc_slot_to_arrayidx(ary8_parent, ary8_begin));
Expand All @@ -2139,7 +2142,7 @@ STATIC_INLINE void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_va
early_end = 1;
break;
}
gc_heap_snapshot_record_array_edge(ary8_parent, &new_obj);
gc_heap_snapshot_record_array_edge(ary8_parent, slot);
}
}
if (early_end)
Expand All @@ -2165,12 +2168,13 @@ STATIC_INLINE void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_va
}
for (; ary8_begin < ary8_end; ary8_begin += elsize) {
for (uint8_t *pindex = elem_begin; pindex < elem_end; pindex++) {
new_obj = ary8_begin[*pindex];
jl_value_t **slot = &ary8_begin[*pindex];
new_obj = *slot;
if (new_obj != NULL) {
verify_parent2("array", ary8_parent, &new_obj, "elem(%d)",
gc_slot_to_arrayidx(ary8_parent, ary8_begin));
gc_try_claim_and_push(mq, new_obj, &nptr);
gc_heap_snapshot_record_array_edge(ary8_parent, &new_obj);
gc_heap_snapshot_record_array_edge(ary8_parent, slot);
}
}
}
Expand All @@ -2193,7 +2197,34 @@ STATIC_INLINE void gc_mark_array16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl_
jl_gc_markqueue_t *mq = &ptls->mark_queue;
jl_value_t *new_obj;
size_t elsize = ((jl_array_t *)ary16_parent)->elsize / sizeof(jl_value_t *);
// Decide whether need to chunk ary16
assert(elsize > 0);
// Decide whether need to chunk objary
if ((nptr & 0x2) == 0x2) {
// pre-scan this object: most of this object should be old, so look for
// the first young object before starting this chunk
// (this also would be valid for young objects, but probably less beneficial)
for (; ary16_begin < ary16_end; ary16_begin += elsize) {
int early_end = 0;
for (uint16_t *pindex = elem_begin; pindex < elem_end; pindex++) {
jl_value_t **slot = &ary16_begin[*pindex];
new_obj = *slot;
if (new_obj != NULL) {
verify_parent2("array", ary16_parent, &new_obj, "elem(%d)",
gc_slot_to_arrayidx(ary16_parent, ary16_begin));
jl_taggedvalue_t *o = jl_astaggedvalue(new_obj);
if (!gc_old(o->header))
nptr |= 1;
if (!gc_marked(o->header)){
early_end = 1;
break;
}
gc_heap_snapshot_record_array_edge(ary16_parent, slot);
}
}
if (early_end)
break;
}
}
size_t too_big = (ary16_end - ary16_begin) / GC_CHUNK_BATCH_SIZE > elsize; // use this order of operations to avoid idiv
jl_value_t **scan_end = ary16_end;
int pushed_chunk = 0;
Expand All @@ -2213,12 +2244,13 @@ STATIC_INLINE void gc_mark_array16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl_
}
for (; ary16_begin < scan_end; ary16_begin += elsize) {
for (uint16_t *pindex = elem_begin; pindex < elem_end; pindex++) {
new_obj = ary16_begin[*pindex];
jl_value_t **slot = &ary16_begin[*pindex];
new_obj = *slot;
if (new_obj != NULL) {
verify_parent2("array", ary16_parent, &new_obj, "elem(%d)",
gc_slot_to_arrayidx(ary16_parent, ary16_begin));
gc_try_claim_and_push(mq, new_obj, &nptr);
gc_heap_snapshot_record_array_edge(ary16_parent, &new_obj);
gc_heap_snapshot_record_array_edge(ary16_parent, slot);
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -2247,9 +2247,11 @@ typedef struct {

// controls the emission of debug-info. mirrors the clang options
int gnu_pubnames; // can we emit the gnu pubnames debuginfo
int debug_info_kind; // Enum for line-table-only, line-directives-only,
int debug_info_kind; // Enum for line-table-only, line-directives-only,
// limited, standalone

int safepoint_on_entry; // Emit a safepoint on entry to each function

// Cache access. Default: jl_rettype_inferred.
jl_codeinstance_lookup_t lookup;

Expand Down
32 changes: 29 additions & 3 deletions src/llvm-final-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ STATISTIC(GetGCFrameSlotCount, "Number of lowered getGCFrameSlotFunc intrinsics"
STATISTIC(GCAllocBytesCount, "Number of lowered GCAllocBytesFunc intrinsics");
STATISTIC(QueueGCRootCount, "Number of lowered queueGCRootFunc intrinsics");
STATISTIC(QueueGCBindingCount, "Number of lowered queueGCBindingFunc intrinsics");
STATISTIC(SafepointCount, "Number of lowered safepoint intrinsics");

using namespace llvm;

Expand Down Expand Up @@ -72,6 +73,9 @@ struct FinalLowerGC: private JuliaPassContext {

// Lowers a `julia.queue_gc_binding` intrinsic.
Value *lowerQueueGCBinding(CallInst *target, Function &F);

// Lowers a `julia.safepoint` intrinsic.
Value *lowerSafepoint(CallInst *target, Function &F);
};

Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
Expand Down Expand Up @@ -202,6 +206,18 @@ Value *FinalLowerGC::lowerQueueGCBinding(CallInst *target, Function &F)
return target;
}

Value *FinalLowerGC::lowerSafepoint(CallInst *target, Function &F)
{
++SafepointCount;
assert(target->arg_size() == 1);
IRBuilder<> builder(target->getContext());
builder.SetInsertPoint(target);
auto T_size = getSizeTy(builder.getContext());
Value* signal_page = target->getOperand(0);
Value* load = builder.CreateLoad(T_size, signal_page, true);
return load;
}

Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
{
++GCAllocBytesCount;
Expand Down Expand Up @@ -317,16 +333,20 @@ static void replaceInstruction(

bool FinalLowerGC::runOnFunction(Function &F)
{
LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
// Check availability of functions again since they might have been deleted.
initFunctions(*F.getParent());
if (!pgcstack_getter && !adoptthread_func)
if (!pgcstack_getter && !adoptthread_func) {
LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << "\n");
return false;
}

// Look for a call to 'julia.get_pgcstack'.
pgcstack = getPGCstack(F);
if (!pgcstack)
if (!pgcstack) {
LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << " no pgcstack\n");
return false;
}
LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");

// Acquire intrinsic functions.
auto newGCFrameFunc = getOrNull(jl_intrinsics::newGCFrame);
Expand All @@ -336,6 +356,7 @@ bool FinalLowerGC::runOnFunction(Function &F)
auto GCAllocBytesFunc = getOrNull(jl_intrinsics::GCAllocBytes);
auto queueGCRootFunc = getOrNull(jl_intrinsics::queueGCRoot);
auto queueGCBindingFunc = getOrNull(jl_intrinsics::queueGCBinding);
auto safepointFunc = getOrNull(jl_intrinsics::safepoint);

// Lower all calls to supported intrinsics.
for (BasicBlock &BB : F) {
Expand All @@ -347,6 +368,7 @@ bool FinalLowerGC::runOnFunction(Function &F)
}

Value *callee = CI->getCalledOperand();
assert(callee);

if (callee == newGCFrameFunc) {
replaceInstruction(CI, lowerNewGCFrame(CI, F), it);
Expand All @@ -371,6 +393,10 @@ bool FinalLowerGC::runOnFunction(Function &F)
else if (callee == queueGCBindingFunc) {
replaceInstruction(CI, lowerQueueGCBinding(CI, F), it);
}
else if (callee == safepointFunc) {
lowerSafepoint(CI, F);
it = CI->eraseFromParent();
}
else {
++it;
}
Expand Down
Loading

0 comments on commit fca5304

Please sign in to comment.