Skip to content

Commit

Permalink
fixup! Reland "gc: avoid cpu stalls when starting"
Browse files Browse the repository at this point in the history
  • Loading branch information
vtjnash committed Dec 12, 2023
1 parent bf0cd8c commit e0568e2
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 53 deletions.
12 changes: 1 addition & 11 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -966,17 +966,7 @@ JL_DLLEXPORT void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t
extern pthread_mutex_t in_signal_lock;
#endif

static inline void jl_set_gc_and_wait(void) // n.b. not used on _OS_DARWIN_
{
jl_task_t *ct = jl_current_task;
// reading own gc state doesn't need atomic ops since no one else
// should store to it.
int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
jl_safepoint_wait_gc();
jl_atomic_store_release(&ct->ptls->gc_state, state);
jl_safepoint_wait_thread_resume(); // block in thread-suspend now if requested, after clearing the gc_state
}
void jl_set_gc_and_wait(void); // n.b. not used on _OS_DARWIN_

// Query if a Julia object is if a permalloc region (due to part of a sys- pkg-image)
STATIC_INLINE size_t n_linkage_blobs(void) JL_NOTSAFEPOINT
Expand Down
4 changes: 2 additions & 2 deletions src/llvm-codegen-shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,8 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::T
BasicBlock *passBB = BasicBlock::Create(builder.getContext(), "safepoint", builder.GetInsertBlock()->getParent());
BasicBlock *exitBB = BasicBlock::Create(builder.getContext(), "after_safepoint", builder.GetInsertBlock()->getParent());
Constant *zero8 = ConstantInt::get(T_int8, 0);
builder.CreateCondBr(builder.CreateAnd(builder.CreateICmpNE(old_state, zero8), // if (old_state && !state)
builder.CreateICmpEQ(state, zero8)),
builder.CreateCondBr(builder.CreateOr(builder.CreateICmpEQ(old_state, zero8), // if (!old_state || !state)
builder.CreateICmpEQ(state, zero8)),
passBB, exitBB);
builder.SetInsertPoint(passBB);
MDNode *tbaa = get_tbaa_const(builder.getContext());
Expand Down
4 changes: 2 additions & 2 deletions src/rtutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,6 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
// This function should **NOT** have any safepoint before the ones at the
// end.
sig_atomic_t old_defer_signal = ct->ptls->defer_signal;
int8_t old_gc_state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
ct->eh = eh->prev;
ct->gcstack = eh->gcstack;
small_arraylist_t *locks = &ct->ptls->locks;
Expand All @@ -293,9 +292,10 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
}
ct->world_age = eh->world_age;
ct->ptls->defer_signal = eh->defer_signal;
int8_t old_gc_state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
if (old_gc_state != eh->gc_state)
jl_atomic_store_release(&ct->ptls->gc_state, eh->gc_state);
if (!eh->gc_state)
if (!old_gc_state || !eh->gc_state) // it was or is unsafe now
jl_gc_safepoint_(ct->ptls);
if (old_defer_signal && !eh->defer_signal)
jl_sigint_safepoint(ct->ptls);
Expand Down
77 changes: 42 additions & 35 deletions src/safepoint.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,43 +145,32 @@ void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads)
// We're currently also using atomic store release in mutator threads
// (in jl_gc_state_set), but we may want to use signals to flush the
// memory operations on those threads lazily instead.
while (!jl_atomic_load_relaxed(&ptls2->gc_state) || !jl_atomic_load_acquire(&ptls2->gc_state))
jl_cpu_pause(); // yield?
}
}
}

void jl_gc_wait_for_the_world(void)
{
assert(jl_n_threads);
if (jl_n_threads > 1)
jl_wake_libuv();
for (int i = 0; i < jl_n_threads; i++) {
jl_ptls_t ptls2 = jl_all_tls_states[i];
// This acquire load pairs with the release stores
// in the signal handler of safepoint so we are sure that
// all the stores on those threads are visible.
// We're currently also using atomic store release in mutator threads
// (in jl_gc_state_set), but we may want to use signals to flush the
// memory operations on those threads lazily instead.
while (!jl_atomic_load_relaxed(&ptls2->gc_state) || !jl_atomic_load_acquire(&ptls2->gc_state)) {
// Use system mutexes rather than spin locking to minimize wasted CPU time
// while we wait for other threads reach a safepoint.
// This is particularly important when run under rr.
uv_mutex_lock(&safepoint_lock);
if (!jl_atomic_load_relaxed(&ptls2->gc_state))
uv_cond_wait(&safepoint_cond_begin, &safepoint_lock);
uv_mutex_unlock(&safepoint_lock);
while (!jl_atomic_load_relaxed(&ptls2->gc_state) || !jl_atomic_load_acquire(&ptls2->gc_state)) {
// Use system mutexes rather than spin locking to minimize wasted CPU time
// while we wait for other threads reach a safepoint.
// This is particularly important when run under rr.
uv_mutex_lock(&safepoint_lock);
if (!jl_atomic_load_relaxed(&ptls2->gc_state))
uv_cond_wait(&safepoint_cond_begin, &safepoint_lock);
uv_mutex_unlock(&safepoint_lock);
}
}
}
}

int jl_safepoint_start_gc(void)
{
// The thread should have set this already
// The thread should have just set this before entry
assert(jl_atomic_load_relaxed(&jl_current_task->ptls->gc_state) == JL_GC_STATE_WAITING);
jl_safepoint_wait_thread_resume(); // make sure we are permitted to run GC now (we might be required to stop instead)
uv_mutex_lock(&safepoint_lock);
uv_cond_broadcast(&safepoint_cond_begin);
// make sure we are permitted to run GC now (we might be required to stop instead)
jl_task_t *ct = jl_current_task;
while (jl_atomic_load_relaxed(&ct->ptls->suspend_count)) {
uv_mutex_unlock(&safepoint_lock);
jl_safepoint_wait_thread_resume();
uv_mutex_lock(&safepoint_lock);
}
// In case multiple threads enter the GC at the same time, only allow
// one of them to actually run the collection. We can't just let the
// master thread do the GC since it might be running unmanaged code
Expand Down Expand Up @@ -224,14 +213,28 @@ void jl_safepoint_end_gc(void)
uv_cond_broadcast(&safepoint_cond_end);
}

void jl_set_gc_and_wait(void) // n.b. not used on _OS_DARWIN_
{
jl_task_t *ct = jl_current_task;
// reading own gc state doesn't need atomic ops since no one else
// should store to it.
int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
uv_mutex_lock(&safepoint_lock);
uv_cond_broadcast(&safepoint_cond_begin);
uv_mutex_unlock(&safepoint_lock);
jl_safepoint_wait_gc();
jl_atomic_store_release(&ct->ptls->gc_state, state);
jl_safepoint_wait_thread_resume(); // block in thread-suspend now if requested, after clearing the gc_state
}

// this is the core of jl_set_gc_and_wait
void jl_safepoint_wait_gc(void) JL_NOTSAFEPOINT
{
jl_task_t *ct = jl_current_task; (void)ct;
JL_TIMING_SUSPEND_TASK(GC_SAFEPOINT, ct);
// The thread should have set this is already
assert(jl_atomic_load_relaxed(&ct->ptls->gc_state) != 0);
uv_cond_broadcast(&safepoint_cond_begin);
// Use normal volatile load in the loop for speed until GC finishes.
// Then use an acquire load to make sure the GC result is visible on this thread.
while (jl_atomic_load_relaxed(&jl_gc_running) || jl_atomic_load_acquire(&jl_gc_running)) {
Expand All @@ -257,6 +260,14 @@ void jl_safepoint_wait_thread_resume(void)
int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
uv_mutex_lock(&ct->ptls->sleep_lock);
if (jl_atomic_load_relaxed(&ct->ptls->suspend_count)) {
// defer this broadcast until we determine whether uv_cond_wait is really going to be needed
uv_mutex_unlock(&ct->ptls->sleep_lock);
uv_mutex_lock(&safepoint_lock);
uv_cond_broadcast(&safepoint_cond_begin);
uv_mutex_unlock(&safepoint_lock);
uv_mutex_lock(&ct->ptls->sleep_lock);
}
while (jl_atomic_load_relaxed(&ct->ptls->suspend_count))
uv_cond_wait(&ct->ptls->wake_signal, &ct->ptls->sleep_lock);
// must while still holding the mutex_unlock, so we know other threads in
Expand Down Expand Up @@ -302,7 +313,7 @@ int jl_safepoint_suspend_thread(int tid, int waitstate)
break;
if (waitstate == 3 && state2 == JL_GC_STATE_WAITING)
break;
jl_cpu_pause(); // yield?
jl_cpu_pause(); // yield (wait for safepoint_cond_begin, for example)?
}
}
return suspend_count;
Expand All @@ -315,9 +326,7 @@ int jl_safepoint_resume_thread(int tid) JL_NOTSAFEPOINT
if (0 > tid || tid >= jl_atomic_load_acquire(&jl_n_threads))
return 0;
jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
# ifdef _OS_DARWIN_
uv_mutex_lock(&safepoint_lock);
# endif
uv_mutex_lock(&ptls2->sleep_lock);
int16_t suspend_count = jl_atomic_load_relaxed(&ptls2->suspend_count);
if (suspend_count == 1) { // last to unsuspend
Expand All @@ -336,9 +345,7 @@ int jl_safepoint_resume_thread(int tid) JL_NOTSAFEPOINT
jl_safepoint_disable(3);
}
uv_mutex_unlock(&ptls2->sleep_lock);
# ifdef _OS_DARWIN_
uv_mutex_unlock(&safepoint_lock);
# endif
return suspend_count;
}

Expand Down
5 changes: 2 additions & 3 deletions src/signals-mach.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,10 @@ static void attach_exception_port(thread_port_t thread, int segv_only);
// low 16 bits are the thread id, the next 8 bits are the original gc_state
static arraylist_t suspended_threads;
extern uv_mutex_t safepoint_lock;
extern uv_cond_t safepoint_cond_begin;

// see jl_safepoint_wait_thread_resume
void jl_safepoint_resume_thread_mach(jl_ptls_t ptls2, int16_t tid2)
extern uv_cond_t safepoint_cond_begin;
void jl_mach_gc_end(void)
{
// must be called with uv_mutex_lock(&safepoint_lock) and uv_mutex_lock(&ptls2->sleep_lock) held (in that order)
for (size_t i = 0; i < suspended_threads.len; i++) {
Expand Down Expand Up @@ -124,8 +123,8 @@ static void jl_mach_gc_wait(jl_ptls_t ptls2, mach_port_t thread, int16_t tid)
}
if (relaxed_suspend_count)
uv_mutex_unlock(&ptls2->sleep_lock);
uv_mutex_unlock(&safepoint_lock);
uv_cond_broadcast(&safepoint_cond_begin);
uv_mutex_unlock(&safepoint_lock);
return 1;
}

Expand Down

0 comments on commit e0568e2

Please sign in to comment.