Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tasks-debugging: make it possible to get the backtrace of a task #32283

Merged
merged 1 commit into from
Dec 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion src/gc-stacks.c
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,12 @@ void sweep_stack_pools(void)
continue;
while (1) {
jl_task_t *t = (jl_task_t*)lst[n];
assert(jl_is_task(t));
if (gc_marked(jl_astaggedvalue(t)->bits.gc)) {
n++;
if (t->stkbuf == NULL)
ndel++; // jl_release_task_stack called
else
n++;
}
else {
ndel++;
Expand All @@ -243,3 +247,30 @@ void sweep_stack_pools(void)
live_tasks->len -= ndel;
}
}

JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
{
jl_ptls_t ptls = jl_get_ptls_states();
arraylist_t *live_tasks = &ptls->heap.live_tasks;
size_t i, j, l;
jl_array_t *a;
do {
l = live_tasks->len;
a = jl_alloc_vec_any(l + 1); // may gc
} while (l + 1 < live_tasks->len);
l = live_tasks->len;
void **lst = live_tasks->items;
j = 0;
((void**)jl_array_data(a))[j++] = ptls->root_task;
for (i = 0; i < l; i++) {
if (((jl_task_t*)lst[i])->stkbuf != NULL)
((void**)jl_array_data(a))[j++] = lst[i];
}
l = jl_array_len(a);
if (j < l) {
JL_GC_PUSH1(&a);
jl_array_del_end(a, l - j);
JL_GC_POP();
}
return a;
}
1 change: 1 addition & 0 deletions src/julia.expmap
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
add_library_mapping;
utf8proc_*;
jlbacktrace;
jlbacktracet;
julia_type_to_llvm;
_IO_stdin_used;
__ZN4llvm23createLowerSimdLoopPassEv;
Expand Down
12 changes: 11 additions & 1 deletion src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -1762,7 +1762,17 @@ typedef struct _jl_task_t {
// current exception handler
jl_handler_t *eh;

jl_ucontext_t ctx; // saved thread state
union {
jl_ucontext_t ctx; // saved thread state
#ifdef _OS_WINDOWS_
jl_ucontext_t copy_stack_ctx;
#else
struct jl_stack_context_t copy_stack_ctx;
#endif
};
#if defined(JL_TSAN_ENABLED)
void *tsan_state;
#endif
void *stkbuf; // malloc'd memory (either copybuf or stack)
size_t bufsz; // actual sizeof stkbuf
unsigned int copy_stack:31; // sizeof stack for copybuf
Expand Down
49 changes: 29 additions & 20 deletions src/julia_threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@

// Options for task switching algorithm (in order of preference):
// JL_HAVE_ASM -- mostly setjmp
// JL_HAVE_ASYNCIFY -- task switching based on the binaryen asyncify transform
// JL_HAVE_UNW_CONTEXT -- hybrid of libunwind for start, setjmp for resume
// JL_HAVE_ASM && JL_HAVE_UNW_CONTEXT -- libunwind-based
// JL_HAVE_UNW_CONTEXT -- libunwind-based
// JL_HAVE_ASYNCIFY -- task switching based on the binary asyncify transform
// JL_HAVE_UCONTEXT -- posix standard API, requires syscall for resume
// JL_HAVE_SIGALTSTACK -- requires several syscall for start, setjmp for resume

Expand All @@ -33,24 +34,25 @@ typedef win32_ucontext_t jl_ucontext_t;
#if (defined(_CPU_X86_64_) || defined(_CPU_X86_) || defined(_CPU_AARCH64_) || \
defined(_CPU_ARM_) || defined(_CPU_PPC64_))
#define JL_HAVE_ASM
#elif defined(_OS_DARWIN_)
#endif
#if defined(_OS_DARWIN_)
#define JL_HAVE_UNW_CONTEXT
#elif defined(_OS_LINUX_)
#define JL_HAVE_UCONTEXT
//#elif defined(_OS_LINUX_)
//#define JL_HAVE_UNW_CONTEXT // very slow, but more debugging
#elif defined(_OS_EMSCRIPTEN_)
#define JL_HAVE_ASYNCIFY
#else
#define JL_HAVE_UNW_CONTEXT
#elif !defined(JL_HAVE_ASM)
#define JL_HAVE_UNW_CONTEXT // optimistically?
#endif
#endif

#if defined(JL_HAVE_ASM) || defined(JL_HAVE_SIGALTSTACK)
typedef struct {

struct jl_stack_context_t {
jl_jmp_buf uc_mcontext;
#if defined(JL_TSAN_ENABLED)
void *tsan_state;
#endif
} jl_ucontext_t;
};

#if (!defined(JL_HAVE_UNW_CONTEXT) && defined(JL_HAVE_ASM)) || defined(JL_HAVE_SIGALTSTACK)
typedef struct jl_stack_context_t jl_ucontext_t;
#endif
#if defined(JL_HAVE_ASYNCIFY)
#if defined(JL_TSAN_ENABLED)
Expand All @@ -65,15 +67,14 @@ typedef struct {
void *stacktop;
} jl_ucontext_t;
#endif
#if defined(JL_HAVE_UCONTEXT) || defined(JL_HAVE_UNW_CONTEXT)
#if defined(JL_HAVE_UNW_CONTEXT)
#define UNW_LOCAL_ONLY
#include <libunwind.h>
typedef struct {
ucontext_t ctx;
#if defined(JL_TSAN_ENABLED)
void *tsan_state;
typedef unw_context_t jl_ucontext_t;
#endif
} jl_ucontext_t;
#if defined(JL_HAVE_UCONTEXT)
#include <ucontext.h>
typedef ucontext_t jl_ucontext_t;
#endif
#endif

Expand Down Expand Up @@ -210,7 +211,15 @@ struct _jl_tls_states_t {
struct _jl_timing_block_t *timing_stack;
void *stackbase;
size_t stacksize;
jl_ucontext_t base_ctx; // base context of stack
union {
jl_ucontext_t base_ctx; // base context of stack
// This hack is needed to support always_copy_stacks:
#ifdef _OS_WINDOWS_
jl_ucontext_t copy_stack_ctx;
#else
struct jl_stack_context_t copy_stack_ctx;
#endif
};
jl_jmp_buf *safe_restore;
// Temp storage for exception thrown in signal handler. Not rooted.
struct _jl_value_t *sig_exception;
Expand Down
2 changes: 1 addition & 1 deletion src/partr.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ uint64_t io_wakeup_leave;
JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int tid) JL_NOTSAFEPOINT
{
// Try to acquire the lock on this task.
int16_t was = task->tid;
int16_t was = jl_atomic_load_relaxed(&task->tid);
if (was == tid)
return 1;
if (was == -1)
Expand Down
1 change: 1 addition & 0 deletions src/signals-unix.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

#include "julia_assert.h"

// helper function for returning the unw_context_t inside a ucontext_t
static bt_context_t *jl_to_bt_context(void *sigctx)
{
#ifdef __APPLE__
Expand Down
77 changes: 70 additions & 7 deletions src/stackwalk.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
// returning from the callee function will invalidate the context
#ifdef _OS_WINDOWS_
jl_mutex_t jl_in_stackwalk;
#define jl_unw_get(context) RtlCaptureContext(context)
#define jl_unw_get(context) (RtlCaptureContext(context), 0)
#elif !defined(JL_DISABLE_LIBUNWIND)
#define jl_unw_get(context) unw_getcontext(context)
#else
void jl_unw_get(void *context) {};
int jl_unw_get(void *context) { return -1; }
#endif

#ifdef __cplusplus
Expand Down Expand Up @@ -204,7 +204,9 @@ NOINLINE size_t rec_backtrace(jl_bt_element_t *bt_data, size_t maxsize, int skip
{
bt_context_t context;
memset(&context, 0, sizeof(context));
jl_unw_get(&context);
int r = jl_unw_get(&context);
if (r < 0)
return 0;
jl_gcframe_t *pgcstack = jl_pgcstack;
bt_cursor_t cursor;
if (!jl_unw_init(&cursor, &context))
Expand Down Expand Up @@ -239,9 +241,9 @@ JL_DLLEXPORT jl_value_t *jl_backtrace_from_here(int returnsp, int skip)
bt_context_t context;
bt_cursor_t cursor;
memset(&context, 0, sizeof(context));
jl_unw_get(&context);
int r = jl_unw_get(&context);
jl_gcframe_t *pgcstack = jl_pgcstack;
if (jl_unw_init(&cursor, &context)) {
if (r == 0 && jl_unw_init(&cursor, &context)) {
// Skip frame for jl_backtrace_from_here itself
skip += 1;
size_t offset = 0;
Expand Down Expand Up @@ -688,8 +690,59 @@ void jl_print_bt_entry_codeloc(jl_bt_element_t *bt_entry) JL_NOTSAFEPOINT
}
}

extern bt_context_t *jl_to_bt_context(void *sigctx);

void jl_rec_backtrace(jl_task_t *t)
{
jl_ptls_t ptls = jl_get_ptls_states();
ptls->bt_size = 0;
if (t == ptls->current_task) {
ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
return;
}
if (t->copy_stack || !t->started || t->stkbuf == NULL)
return;
int old = jl_atomic_compare_exchange(&t->tid, -1, ptls->tid);
if (old != -1 && old != ptls->tid)
return;
bt_context_t *context = NULL;
#if defined(_OS_WINDOWS_)
bt_context_t c;
memset(&c, 0, sizeof(c));
_JUMP_BUFFER *mctx = (_JUMP_BUFFER*)&t->ctx.uc_mcontext;
#if defined(_CPU_X86_64_)
c.Rbx = mctx->Rbx;
c.Rsp = mctx->Rsp;
c.Rbp = mctx->Rbp;
c.Rsi = mctx->Rsi;
c.Rdi = mctx->Rdi;
c.R12 = mctx->R12;
c.R13 = mctx->R13;
c.R14 = mctx->R14;
c.R15 = mctx->R15;
c.Rip = mctx->Rip;
memcpy(&c.Xmm6, &mctx->Xmm6, 10 * sizeof(mctx->Xmm6)); // Xmm6-Xmm15
#else
c.Eip = mctx->Eip;
c.Esp = mctx->Esp;
c.Ebp = mctx->Ebp;
#endif
context = &c;
#elif defined(JL_HAVE_UNW_CONTEXT)
context = &t->ctx;
#elif defined(JL_HAVE_UCONTEXT)
context = jl_to_bt_context(&t->ctx);
#else
#endif
if (context)
ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, context, t->gcstack);
if (old == -1)
jl_atomic_store_relaxed(&t->tid, old);
}

//--------------------------------------------------
// Tools for interactive debugging in gdb

JL_DLLEXPORT void jl_gdblookup(void* ip)
{
jl_print_native_codeloc((uintptr_t)ip);
Expand All @@ -701,9 +754,19 @@ JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT
jl_excstack_t *s = jl_get_ptls_states()->current_task->excstack;
if (!s)
return;
size_t bt_size = jl_excstack_bt_size(s, s->top);
size_t i, bt_size = jl_excstack_bt_size(s, s->top);
jl_bt_element_t *bt_data = jl_excstack_bt_data(s, s->top);
for (size_t i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
jl_print_bt_entry_codeloc(bt_data + i);
}
}
JL_DLLEXPORT void jlbacktracet(jl_task_t *t)
{
jl_ptls_t ptls = jl_get_ptls_states();
jl_rec_backtrace(t);
size_t i, bt_size = ptls->bt_size;
jl_bt_element_t *bt_data = ptls->bt_data;
for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
jl_print_bt_entry_codeloc(bt_data + i);
}
}
Expand Down
Loading