Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add support for async backtraces of Tasks on any thread (JuliaLang#51430
Browse files Browse the repository at this point in the history
)
vtjnash authored and RAI CI (GitHub Action Automation) committed Nov 14, 2023
1 parent 48eae29 commit db4cec8
Showing 13 changed files with 532 additions and 328 deletions.
2 changes: 1 addition & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
@@ -43,7 +43,7 @@ endif
SRCS := \
jltypes gf typemap smallintset ast builtins module interpreter symbol \
dlload sys init task array staticdata toplevel jl_uv datatype \
simplevector runtime_intrinsics precompile jloptions \
simplevector runtime_intrinsics precompile jloptions mtarraylist \
threading partr stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \
jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \
crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall
80 changes: 54 additions & 26 deletions src/gc-stacks.c
Original file line number Diff line number Diff line change
@@ -119,7 +119,7 @@ static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz)
if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
unsigned pool_id = select_pool(bufsz);
if (pool_sizes[pool_id] == bufsz) {
arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
small_arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
return;
}
}
@@ -148,7 +148,7 @@ void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task)
#ifdef _COMPILER_ASAN_ENABLED_
__asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz);
#endif
arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
small_arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
}
}
}
@@ -163,9 +163,9 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) {
unsigned pool_id = select_pool(ssize);
ssize = pool_sizes[pool_id];
arraylist_t *pool = &ptls->heap.free_stacks[pool_id];
small_arraylist_t *pool = &ptls->heap.free_stacks[pool_id];
if (pool->len > 0) {
stk = arraylist_pop(pool);
stk = small_arraylist_pop(pool);
}
}
else {
@@ -184,8 +184,8 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
}
*bufsz = ssize;
if (owner) {
arraylist_t *live_tasks = &ptls->heap.live_tasks;
arraylist_push(live_tasks, owner);
small_arraylist_t *live_tasks = &ptls->heap.live_tasks;
mtarraylist_push(live_tasks, owner);
}
return stk;
}
@@ -209,7 +209,7 @@ void sweep_stack_pools(void)

// free half of stacks that remain unused since last sweep
for (int p = 0; p < JL_N_STACK_POOLS; p++) {
arraylist_t *al = &ptls2->heap.free_stacks[p];
small_arraylist_t *al = &ptls2->heap.free_stacks[p];
size_t n_to_free;
if (al->len > MIN_STACK_MAPPINGS_PER_POOL) {
n_to_free = al->len / 2;
@@ -220,12 +220,12 @@ void sweep_stack_pools(void)
n_to_free = 0;
}
for (int n = 0; n < n_to_free; n++) {
void *stk = arraylist_pop(al);
void *stk = small_arraylist_pop(al);
free_stack(stk, pool_sizes[p]);
}
}

arraylist_t *live_tasks = &ptls2->heap.live_tasks;
small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
size_t n = 0;
size_t ndel = 0;
size_t l = live_tasks->len;
@@ -268,24 +268,52 @@ void sweep_stack_pools(void)

JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
{
jl_task_t *ct = jl_current_task;
jl_ptls_t ptls = ct->ptls;
arraylist_t *live_tasks = &ptls->heap.live_tasks;
size_t i, j, l;
jl_array_t *a;
do {
l = live_tasks->len;
a = jl_alloc_vec_any(l + 1); // may gc, changing the number of tasks
} while (l + 1 < live_tasks->len);
l = live_tasks->len;
void **lst = live_tasks->items;
j = 0;
((void**)jl_array_data(a))[j++] = ptls->root_task;
for (i = 0; i < l; i++) {
if (((jl_task_t*)lst[i])->stkbuf != NULL)
((void**)jl_array_data(a))[j++] = lst[i];
size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
size_t l = 0; // l is not reset on restart, so we keep getting more aggressive at making a big enough list everything it fails
restart:
for (size_t i = 0; i < nthreads; i++) {
// skip GC threads since they don't have tasks
if (gc_first_tid <= i && i < gc_first_tid + jl_n_gcthreads) {
continue;
}
jl_ptls_t ptls2 = allstates[i];
if (ptls2 == NULL)
continue;
small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
size_t n = mtarraylist_length(live_tasks);
l += n + (ptls2->root_task->stkbuf != NULL);
}
l += l / 20; // add 5% for margin of estimation error
jl_array_t *a = jl_alloc_vec_any(l); // may gc, changing the number of tasks and forcing us to reload everything
nthreads = jl_atomic_load_acquire(&jl_n_threads);
allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
size_t j = 0;
for (size_t i = 0; i < nthreads; i++) {
// skip GC threads since they don't have tasks
if (gc_first_tid <= i && i < gc_first_tid + jl_n_gcthreads) {
continue;
}
jl_ptls_t ptls2 = allstates[i];
if (ptls2 == NULL)
continue;
jl_task_t *t = ptls2->root_task;
if (t->stkbuf != NULL) {
if (j == l)
goto restart;
((void**)jl_array_data(a))[j++] = t;
}
small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
size_t n = mtarraylist_length(live_tasks);
for (size_t i = 0; i < n; i++) {
jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i);
if (t->stkbuf != NULL) {
if (j == l)
goto restart;
((void**)jl_array_data(a))[j++] = t;
}
}
}
l = jl_array_len(a);
if (j < l) {
JL_GC_PUSH1(&a);
jl_array_del_end(a, l - j);
8 changes: 5 additions & 3 deletions src/gc.c
Original file line number Diff line number Diff line change
@@ -948,7 +948,7 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls,
jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*),
jl_weakref_type);
wr->value = value; // NOTE: wb not needed here
arraylist_push(&ptls->heap.weak_refs, wr);
small_arraylist_push(&ptls->heap.weak_refs, wr);
return wr;
}

@@ -3536,8 +3536,10 @@ void jl_init_thread_heap(jl_ptls_t ptls)
p[i].freelist = NULL;
p[i].newpages = NULL;
}
arraylist_new(&heap->weak_refs, 0);
arraylist_new(&heap->live_tasks, 0);
small_arraylist_new(&heap->weak_refs, 0);
small_arraylist_new(&heap->live_tasks, 0);
for (int i = 0; i < JL_N_STACK_POOLS; i++)
small_arraylist_new(&heap->free_stacks[i], 0);
heap->mallocarrays = NULL;
heap->mafreelist = NULL;
heap->big_objects = NULL;
3 changes: 2 additions & 1 deletion src/interpreter.c
Original file line number Diff line number Diff line change
@@ -65,7 +65,8 @@ extern void JL_GC_ENABLEFRAME(interpreter_state*) JL_NOTSAFEPOINT;
// we define this separately so that we can populate the frame before we add it to the backtrace
// it's recommended to mark the containing function with NOINLINE, though not essential
#define JL_GC_ENABLEFRAME(frame) \
((void**)&frame[1])[0] = __builtin_frame_address(0);
jl_signal_fence(); \
((void**)&frame[1])[0] = __builtin_frame_address(0);

#endif

5 changes: 5 additions & 0 deletions src/julia.h
Original file line number Diff line number Diff line change
@@ -1049,6 +1049,11 @@ JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
int isaligned, jl_value_t *owner);
JL_DLLEXPORT void jl_gc_safepoint(void);

void *mtarraylist_get(small_arraylist_t *_a, size_t idx) JL_NOTSAFEPOINT;
size_t mtarraylist_length(small_arraylist_t *_a) JL_NOTSAFEPOINT;
void mtarraylist_add(small_arraylist_t *_a, void *elt, size_t idx) JL_NOTSAFEPOINT;
void mtarraylist_push(small_arraylist_t *_a, void *elt) JL_NOTSAFEPOINT;

// object accessors -----------------------------------------------------------

#define jl_svec_len(t) (((jl_svec_t*)(t))->length)
5 changes: 5 additions & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
@@ -203,6 +203,8 @@ JL_DLLEXPORT void jl_lock_profile(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
JL_DLLEXPORT void jl_unlock_profile(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE;
JL_DLLEXPORT void jl_lock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
JL_DLLEXPORT void jl_unlock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE;
int jl_lock_stackwalk(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
void jl_unlock_stackwalk(int lockret) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE;

// number of cycles since power-on
static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT
@@ -1162,6 +1164,9 @@ void jl_print_bt_entry_codeloc(int sig, jl_bt_element_t *bt_data) JL_NOTSAFEPOIN
#ifdef _OS_WINDOWS_
JL_DLLEXPORT void jl_refresh_dbg_module_list(void);
#endif
int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx) JL_NOTSAFEPOINT;
void jl_thread_resume(int tid) JL_NOTSAFEPOINT;

// *to is NULL or malloc'd pointer, from is allowed to be NULL
STATIC_INLINE char *jl_copy_str(char **to, const char *from) JL_NOTSAFEPOINT
{
8 changes: 4 additions & 4 deletions src/julia_threads.h
Original file line number Diff line number Diff line change
@@ -109,7 +109,7 @@ typedef struct {

// handle to reference an OS thread
#ifdef _OS_WINDOWS_
typedef DWORD jl_thread_t;
typedef HANDLE jl_thread_t;
#else
typedef pthread_t jl_thread_t;
#endif
@@ -140,10 +140,10 @@ typedef struct {

typedef struct {
// variable for tracking weak references
arraylist_t weak_refs;
small_arraylist_t weak_refs;
// live tasks started on this thread
// that are holding onto a stack from the pool
arraylist_t live_tasks;
small_arraylist_t live_tasks;

// variables for tracking malloc'd arrays
struct _mallocarray_t *mallocarrays;
@@ -170,7 +170,7 @@ typedef struct {
jl_gc_pool_t norm_pools[JL_GC_N_POOLS];

#define JL_N_STACK_POOLS 16
arraylist_t free_stacks[JL_N_STACK_POOLS];
small_arraylist_t free_stacks[JL_N_STACK_POOLS];
} jl_thread_heap_t;

typedef struct {
81 changes: 81 additions & 0 deletions src/mtarraylist.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

#include "julia.h"
#include "julia_internal.h"
#include "julia_assert.h"

#ifdef __cplusplus
extern "C" {
#endif

// this file provides some alternate API functions for small_arraylist (push and add)
// which can be safely observed from other threads concurrently
// there is only permitted to be a single writer thread (or a mutex)
// but there can be any number of observers

typedef struct {
_Atomic(uint32_t) len;
uint32_t max;
_Atomic(_Atomic(void*)*) items;
_Atomic(void*) _space[SMALL_AL_N_INLINE];
} small_mtarraylist_t;

// change capacity to at least newlen
static void mtarraylist_resizeto(small_mtarraylist_t *a, size_t len, size_t newlen) JL_NOTSAFEPOINT
{
size_t max = a->max;
if (newlen > max) {
size_t nm = max * 2;
if (nm == 0)
nm = 1;
while (newlen > nm)
nm *= 2;
void *olditems = (void*)jl_atomic_load_relaxed(&a->items);
void *p = calloc_s(nm * sizeof(void*));
memcpy(p, olditems, len * sizeof(void*));
jl_atomic_store_release(&a->items, (_Atomic(void*)*)p);
a->max = nm;
if (olditems != (void*)&a->_space[0]) {
jl_task_t *ct = jl_current_task;
jl_gc_add_quiescent(ct->ptls, (void**)olditems, free);
}
}
}

// single-threaded
void mtarraylist_push(small_arraylist_t *_a, void *elt)
{
small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
size_t len = jl_atomic_load_relaxed(&a->len);
mtarraylist_resizeto(a, len, len + 1);
jl_atomic_store_release(&jl_atomic_load_relaxed(&a->items)[len], elt);
jl_atomic_store_release(&a->len, len + 1);
}

// single-threaded
void mtarraylist_add(small_arraylist_t *_a, void *elt, size_t idx)
{
small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
size_t len = jl_atomic_load_relaxed(&a->len);
mtarraylist_resizeto(a, len, idx + 1);
jl_atomic_store_release(&jl_atomic_load_relaxed(&a->items)[idx], elt);
if (jl_atomic_load_relaxed(&a->len) < idx + 1)
jl_atomic_store_release(&a->len, idx + 1);
}

// concurrent-safe
size_t mtarraylist_length(small_arraylist_t *_a)
{
small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
return jl_atomic_load_relaxed(&a->len);
}

// concurrent-safe
void *mtarraylist_get(small_arraylist_t *_a, size_t idx)
{
small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
size_t len = jl_atomic_load_acquire(&a->len);
if (idx >= len)
return NULL;
return jl_atomic_load_relaxed(&jl_atomic_load_relaxed(&a->items)[idx]);
}
29 changes: 18 additions & 11 deletions src/signals-mach.c
Original file line number Diff line number Diff line change
@@ -384,12 +384,12 @@ static void attach_exception_port(thread_port_t thread, int segv_only)
HANDLE_MACH_ERROR("thread_set_exception_ports", ret);
}

static int jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx)
static int jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx) JL_NOTSAFEPOINT
{
jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
if (ptls2 == NULL) // this thread is not alive
return 0;
jl_task_t *ct2 = ptls2 ? jl_atomic_load_relaxed(&ptls2->current_task) : NULL;
jl_task_t *ct2 = jl_atomic_load_relaxed(&ptls2->current_task);
if (ct2 == NULL) // this thread is already dead
return 0;

@@ -407,18 +407,18 @@ static int jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx)
return 1;
}

static void jl_thread_suspend_and_get_state(int tid, int timeout, unw_context_t **ctx)
int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
{
(void)timeout;
static host_thread_state_t state;
host_thread_state_t state;
if (!jl_thread_suspend_and_get_state2(tid, &state)) {
*ctx = NULL;
return;
return 0;
}
*ctx = (unw_context_t*)&state;
*ctx = *(unw_context_t*)&state;
return 1;
}

static void jl_thread_resume(int tid, int sig)
void jl_thread_resume(int tid)
{
jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
mach_port_t thread = pthread_mach_thread_np(ptls2->system_id);
@@ -593,8 +593,15 @@ static void jl_unlock_profile_mach(int dlsymlock, int keymgr_locked)
jl_unlock_profile();
}

#define jl_lock_profile() int keymgr_locked = jl_lock_profile_mach(1)
#define jl_unlock_profile() jl_unlock_profile_mach(1, keymgr_locked)
int jl_lock_stackwalk(void)
{
return jl_lock_profile_mach(1);
}

void jl_unlock_stackwalk(int lockret)
{
jl_unlock_profile_mach(1, lockret);
}

void *mach_profile_listener(void *arg)
{
@@ -691,7 +698,7 @@ void *mach_profile_listener(void *arg)
bt_data_prof[bt_size_cur++].uintptr = 0;
}
// We're done! Resume the thread.
jl_thread_resume(i, 0);
jl_thread_resume(i);
}
jl_unlock_profile_mach(0, keymgr_locked);
if (running) {
54 changes: 32 additions & 22 deletions src/signals-unix.c
Original file line number Diff line number Diff line change
@@ -291,6 +291,18 @@ int exc_reg_is_write_fault(uintptr_t esr) {
#include "signals-mach.c"
#else

int jl_lock_stackwalk(void)
{
jl_lock_profile();
return 0;
}

void jl_unlock_stackwalk(int lockret)
{
(void)lockret;
jl_unlock_profile();
}


#if defined(_OS_LINUX_) && (defined(_CPU_X86_64_) || defined(_CPU_X86_))
int is_write_fault(void *context) {
@@ -384,12 +396,12 @@ JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context)
}

#if !defined(JL_DISABLE_LIBUNWIND)
static unw_context_t *signal_context;
static bt_context_t *signal_context;
pthread_mutex_t in_signal_lock;
static pthread_cond_t exit_signal_cond;
static pthread_cond_t signal_caught_cond;

static void jl_thread_suspend_and_get_state(int tid, int timeout, unw_context_t **ctx)
int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
{
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
@@ -399,9 +411,8 @@ static void jl_thread_suspend_and_get_state(int tid, int timeout, unw_context_t
jl_task_t *ct2 = ptls2 ? jl_atomic_load_relaxed(&ptls2->current_task) : NULL;
if (ct2 == NULL) {
// this thread is not alive or already dead
*ctx = NULL;
pthread_mutex_unlock(&in_signal_lock);
return;
return 0;
}
jl_atomic_store_release(&ptls2->signal_request, 1);
pthread_kill(ptls2->system_id, SIGUSR2);
@@ -410,9 +421,8 @@ static void jl_thread_suspend_and_get_state(int tid, int timeout, unw_context_t
if (err == ETIMEDOUT) {
sig_atomic_t request = 1;
if (jl_atomic_cmpswap(&ptls2->signal_request, &request, 0)) {
*ctx = NULL;
pthread_mutex_unlock(&in_signal_lock);
return;
return 0;
}
// Request is either now 0 (meaning the other thread is waiting for
// exit_signal_cond already),
@@ -429,15 +439,16 @@ static void jl_thread_suspend_and_get_state(int tid, int timeout, unw_context_t
// checking it is 0, and add an acquire barrier for good measure)
int request = jl_atomic_load_acquire(&ptls2->signal_request);
assert(request == 0); (void) request;
*ctx = signal_context;
jl_atomic_store_release(&ptls2->signal_request, 1); // prepare to resume normally
*ctx = *signal_context;
return 1;
}

static void jl_thread_resume(int tid, int sig)
void jl_thread_resume(int tid)
{
jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
jl_atomic_store_release(&ptls2->signal_request, sig == -1 ? 3 : 1);
pthread_cond_broadcast(&exit_signal_cond);
pthread_cond_wait(&signal_caught_cond, &in_signal_lock); // wait for thread to acknowledge
pthread_cond_wait(&signal_caught_cond, &in_signal_lock); // wait for thread to acknowledge (so that signal_request doesn't get mixed up)
// The other thread is waiting to leave exit_signal_cond (verify that here by
// checking it is 0, and add an acquire barrier for good measure)
int request = jl_atomic_load_acquire(&ptls2->signal_request);
@@ -472,14 +483,14 @@ CFI_NORETURN
static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size)
{
jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
unw_context_t *signal_context;
bt_context_t signal_context;
// This also makes sure `sleep` is aborted.
jl_thread_suspend_and_get_state(0, 30, &signal_context);
if (signal_context != NULL) {
if (jl_thread_suspend_and_get_state(0, 30, &signal_context)) {
thread0_exit_signo = signo;
ptls2->bt_size = bt_size; // <= JL_MAX_BT_SIZE
memcpy(ptls2->bt_data, bt_data, ptls2->bt_size * sizeof(bt_data[0]));
jl_thread_resume(0, -1); // resume with message 3 (call jl_exit_thread0_cb)
jl_atomic_store_release(&ptls2->signal_request, 3);
jl_thread_resume(0); // resume with message 3 (call jl_exit_thread0_cb)
}
else {
// thread 0 is gone? just do the exit ourself
@@ -840,28 +851,27 @@ static void *signal_listener(void *arg)
int nthreads = jl_atomic_load_acquire(&jl_n_threads);
bt_size = 0;
#if !defined(JL_DISABLE_LIBUNWIND)
unw_context_t *signal_context;
bt_context_t signal_context;
// sample each thread, round-robin style in reverse order
// (so that thread zero gets notified last)
if (critical || profile) {
jl_lock_profile();
int lockret = jl_lock_stackwalk();
int *randperm;
if (profile)
randperm = profile_get_randperm(nthreads);
for (int idx = nthreads; idx-- > 0; ) {
// Stop the threads in the random or reverse round-robin order.
int i = profile ? randperm[idx] : idx;
// notify thread to stop
jl_thread_suspend_and_get_state(i, 1, &signal_context);
if (signal_context == NULL)
if (!jl_thread_suspend_and_get_state(i, 1, &signal_context))
continue;

// do backtrace on thread contexts for critical signals
// this part must be signal-handler safe
if (critical) {
bt_size += rec_backtrace_ctx(bt_data + bt_size,
JL_MAX_BT_SIZE / nthreads - 1,
signal_context, NULL);
&signal_context, NULL);
bt_data[bt_size++].uintptr = 0;
}

@@ -883,7 +893,7 @@ static void *signal_listener(void *arg)
} else {
// Get backtrace data
bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
bt_size_max - bt_size_cur - 1, signal_context, NULL);
bt_size_max - bt_size_cur - 1, &signal_context, NULL);
}
jl_set_safe_restore(old_buf);

@@ -908,9 +918,9 @@ static void *signal_listener(void *arg)
}

// notify thread to resume
jl_thread_resume(i, sig);
jl_thread_resume(i);
}
jl_unlock_profile();
jl_unlock_stackwalk(lockret);
}
#ifndef HAVE_MACH
if (profile && running) {
109 changes: 72 additions & 37 deletions src/signals-win.c
Original file line number Diff line number Diff line change
@@ -344,6 +344,54 @@ JL_DLLEXPORT void jl_install_sigint_handler(void)

static volatile HANDLE hBtThread = 0;

int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
{
(void)timeout;
jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
if (ptls2 == NULL) // this thread is not alive
return 0;
jl_task_t *ct2 = jl_atomic_load_relaxed(&ptls2->current_task);
if (ct2 == NULL) // this thread is already dead
return 0;
HANDLE hThread = ptls2->system_id;
if ((DWORD)-1 == SuspendThread(hThread))
return 0;
assert(sizeof(*ctx) == sizeof(CONTEXT));
memset(ctx, 0, sizeof(CONTEXT));
ctx->ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
if (!GetThreadContext(hThread, ctx)) {
if ((DWORD)-1 == ResumeThread(hThread))
abort();
return 0;
}
return 1;
}

void jl_thread_resume(int tid)
{
jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
HANDLE hThread = ptls2->system_id;
if ((DWORD)-1 == ResumeThread(hThread)) {
fputs("failed to resume main thread! aborting.", stderr);
abort();
}
}

int jl_lock_stackwalk(void)
{
uv_mutex_lock(&jl_in_stackwalk);
jl_lock_profile();
return 0;
}

void jl_unlock_stackwalk(int lockret)
{
(void)lockret;
jl_unlock_profile();
uv_mutex_unlock(&jl_in_stackwalk);
}


static DWORD WINAPI profile_bt( LPVOID lparam )
{
// Note: illegal to use jl_* functions from this thread except for profiling-specific functions
@@ -357,58 +405,45 @@ static DWORD WINAPI profile_bt( LPVOID lparam )
continue;
}
else {
uv_mutex_lock(&jl_in_stackwalk);
jl_lock_profile();
if ((DWORD)-1 == SuspendThread(hMainThread)) {
fputs("failed to suspend main thread. aborting profiling.", stderr);
break;
}
// TODO: bring this up to parity with other OS by adding loop over tid here
int lockret = jl_lock_stackwalk();
CONTEXT ctxThread;
memset(&ctxThread, 0, sizeof(CONTEXT));
ctxThread.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
if (!GetThreadContext(hMainThread, &ctxThread)) {
fputs("failed to get context from main thread. aborting profiling.", stderr);
if (!jl_thread_suspend_and_get_state(0, 0, &ctxThread)) {
jl_unlock_stackwalk(lockret);
fputs("failed to suspend main thread. aborting profiling.", stderr);
jl_profile_stop_timer();
break;
}
else {
// Get backtrace data
bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
bt_size_max - bt_size_cur - 1, &ctxThread, NULL);
// Get backtrace data
bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
bt_size_max - bt_size_cur - 1, &ctxThread, NULL);

jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[0]; // given only profiling hMainThread
jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[0]; // given only profiling hMainThread

// store threadid but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
// store threadid but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;

// store task id (never null)
bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
// store task id (never null)
bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);

// store cpu cycle clock
bt_data_prof[bt_size_cur++].uintptr = cycleclock();
// store cpu cycle clock
bt_data_prof[bt_size_cur++].uintptr = cycleclock();

// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;
// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;

// Mark the end of this block with two 0's
bt_data_prof[bt_size_cur++].uintptr = 0;
bt_data_prof[bt_size_cur++].uintptr = 0;
}
jl_unlock_profile();
uv_mutex_unlock(&jl_in_stackwalk);
if ((DWORD)-1 == ResumeThread(hMainThread)) {
jl_profile_stop_timer();
fputs("failed to resume main thread! aborting.", stderr);
jl_gc_debug_critical_error();
abort();
}
// Mark the end of this block with two 0's
bt_data_prof[bt_size_cur++].uintptr = 0;
bt_data_prof[bt_size_cur++].uintptr = 0;
jl_unlock_stackwalk(lockret);
jl_thread_resume(0);
jl_check_profile_autostop();
}
}
}
jl_unlock_profile();
uv_mutex_unlock(&jl_in_stackwalk);
jl_profile_stop_timer();
hBtThread = 0;
hBtThread = NULL;
return 0;
}

470 changes: 249 additions & 221 deletions src/stackwalk.c

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions src/threading.c
Original file line number Diff line number Diff line change
@@ -314,6 +314,8 @@ static uv_mutex_t tls_lock; // controls write-access to these variables:
_Atomic(jl_ptls_t*) jl_all_tls_states JL_GLOBALLY_ROOTED;
int jl_all_tls_states_size;
static uv_cond_t cond;
// concurrent reads are permitted, using the same pattern as mtsmall_arraylist
// it is implemented separately because the API of direct jl_all_tls_states use is already widely prevalent

// return calling thread's ID
JL_DLLEXPORT int16_t jl_threadid(void)
@@ -347,7 +349,7 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
#ifndef _OS_WINDOWS_
pthread_setspecific(jl_task_exit_key, (void*)ptls);
#endif
ptls->system_id = (jl_thread_t)(uintptr_t)uv_thread_self();
ptls->system_id = uv_thread_self();
ptls->rngseed = jl_rand();
if (tid == 0)
ptls->disable_gc = 1;
@@ -382,10 +384,10 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
uv_cond_init(&ptls->wake_signal);

uv_mutex_lock(&tls_lock);
jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
if (tid == -1)
tid = jl_atomic_load_relaxed(&jl_n_threads);
ptls->tid = tid;
jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
if (jl_all_tls_states_size <= tid) {
int i, newsize = jl_all_tls_states_size + tid + 2;
jl_ptls_t *newpptls = (jl_ptls_t*)calloc(newsize, sizeof(jl_ptls_t));

0 comments on commit db4cec8

Please sign in to comment.