From 857e51c23d1ca94046d75967b3d8dc109116409f Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Fri, 18 Aug 2023 14:58:46 -0400 Subject: [PATCH] Improve `jl_print_task_backtraces()` We use `jl_rec_backtrace()` which tries to set the task's tid to the current thread before gathering the backtrace. This will fail for tasks that are sticky to another thread as their tid is never reset. However, for `jl_print_task_backtraces()`, we aren't concerned about thread safety since we assume that all threads are stopped so we add a flag to `jl_rec_backtrace()` to ignore the task's tid. With this, `jl_print_task_backtraces()` should now only miss tasks that are currently executing on threads other than the calling thread. --- src/stackwalk.c | 48 +++++++++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/src/stackwalk.c b/src/stackwalk.c index 18bf4b21269380..b201ff04caea00 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -856,7 +856,7 @@ _os_ptr_munge(uintptr_t ptr) extern bt_context_t *jl_to_bt_context(void *sigctx); -void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT +void jl_rec_backtrace(jl_task_t *t, int ignore_tid) JL_NOTSAFEPOINT { jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; @@ -868,8 +868,9 @@ void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT if (t->copy_stack || !t->started || t->stkbuf == NULL) return; int16_t old = -1; - if (!jl_atomic_cmpswap(&t->tid, &old, ptls->tid) && old != ptls->tid) - return; + if (!ignore_tid) + if (!jl_atomic_cmpswap(&t->tid, &old, ptls->tid) && old != ptls->tid) + return; bt_context_t *context = NULL; #if defined(_OS_WINDOWS_) bt_context_t c; @@ -1079,10 +1080,18 @@ void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT #endif if (context) ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, context, t->gcstack); - if (old == -1) + if (!ignore_tid && old == -1) jl_atomic_store_relaxed(&t->tid, old); } +void print_bt(size_t bt_size, jl_bt_element_t *bt_data) JL_NOTSAFEPOINT +{ + size_t i; + for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) { + jl_print_bt_entry_codeloc(bt_data + i); + } +} + //-------------------------------------------------- // Tools for interactive debugging in gdb @@ -1100,11 +1109,7 @@ JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT jl_excstack_t *s = ct->excstack; if (!s) return; - size_t i, bt_size = jl_excstack_bt_size(s, s->top); - jl_bt_element_t *bt_data = jl_excstack_bt_data(s, s->top); - for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) { - jl_print_bt_entry_codeloc(bt_data + i); - } + print_bt(jl_excstack_bt_size(s, s->top), jl_excstack_bt_data(s, s->top)); } // Print backtrace for specified task @@ -1112,12 +1117,8 @@ JL_DLLEXPORT void jlbacktracet(jl_task_t *t) JL_NOTSAFEPOINT { jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; - jl_rec_backtrace(t); - size_t i, bt_size = ptls->bt_size; - jl_bt_element_t *bt_data = ptls->bt_data; - for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) { - jl_print_bt_entry_codeloc(bt_data + i); - } + jl_rec_backtrace(t, 0); + print_bt(ptls->bt_size, ptls->bt_data); } JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT @@ -1130,6 +1131,8 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT // all of Julia's threads are not stopped! JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT { + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; size_t nthreads = jl_atomic_load_acquire(&jl_n_threads); jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states); for (size_t i = 0; i < nthreads; i++) { @@ -1143,7 +1146,8 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT ptls2->root_task->sticky, ptls2->root_task->started, jl_atomic_load_relaxed(&ptls2->root_task->_state), jl_atomic_load_relaxed(&ptls2->root_task->tid) + 1); - jlbacktracet(ptls2->root_task); + jl_rec_backtrace(ptls2->root_task, 1); + print_bt(ptls->bt_size, ptls->bt_data); void **lst = live_tasks->items; for (size_t j = 0; j < live_tasks->len; j++) { @@ -1156,10 +1160,16 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT jl_safe_printf(" (sticky: %d, started: %d, state: %d, tid: %d)\n", t->sticky, t->started, t_state, jl_atomic_load_relaxed(&t->tid) + 1); - if (t->stkbuf != NULL) - jlbacktracet(t); - else + if (t == ptls2->current_task) { + jl_safe_printf(" running\n"); + } + else if (t->stkbuf == NULL) { jl_safe_printf(" no stack\n"); + } + else { + jl_rec_backtrace(t, 1); + print_bt(ptls->bt_size, ptls->bt_data); + } jl_safe_printf(" ---- End task %zu\n", j + 1); } jl_safe_printf("==== End thread %d\n", ptls2->tid + 1);