diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index 4b8f0eddc72..28866c3eb07 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -170,9 +170,12 @@ typedef enum { typedef enum { /** * The subsequent instruction is the start of a handler for a kernel-initiated - * event: a signal handler on UNIX, or an APC, exception, or callback dispatcher - * on Windows. The value holds the module offset of the interruption point PC, - * which is used in post-processing. + * event: a signal handler or restartable sequence abort handler on UNIX, or an + * APC, exception, or callback dispatcher on Windows. + * The value holds the module offset of the interruption point PC, + * which is used in post-processing. The value is 0 for some types, namely + * Windows callbacks and Linux rseq aborts, but these can be assumed to target + * the start of a block and so there is no loss of accuracy when post-processing. */ TRACE_MARKER_TYPE_KERNEL_EVENT, /** diff --git a/clients/drcachesim/tracer/raw2trace.h b/clients/drcachesim/tracer/raw2trace.h index 65a11756ad4..7faf01bc3d1 100644 --- a/clients/drcachesim/tracer/raw2trace.h +++ b/clients/drcachesim/tracer/raw2trace.h @@ -1066,12 +1066,26 @@ template class trace_converter_t { int_modoffs, cur_modoffs); // Because we increment the instr fetch first, the signal modoffs may be // less than the current for a memref fault. - if (int_modoffs == cur_modoffs || + // It might also be 0, which means it should be on the first instruction. + if (int_modoffs == 0 || int_modoffs == cur_modoffs || int_modoffs + instr_length == cur_modoffs) { impl()->log(4, "Signal/exception interrupted the bb @ +" PIFX "\n", int_modoffs); append = true; *interrupted = true; + if (int_modoffs == 0) { + // This happens on rseq aborts, where the trace instru includes + // the rseq committing store before the native rseq execution + // hits the abort. Pretend the abort happened *before* the + // committing store by walking the store backward. + trace_type_t skipped_type; + do { + --*buf_in; + skipped_type = static_cast((*buf_in)->type); + DR_ASSERT(*buf_in >= impl()->get_write_buffer(tls)); + } while (!type_is_instr(skipped_type) && + skipped_type != TRACE_TYPE_INSTR_NO_FETCH); + } } else { // Put it back. We do not have a problem with other markers // following this, because we will have to hit the correct point diff --git a/clients/drcachesim/tracer/tracer.cpp b/clients/drcachesim/tracer/tracer.cpp index f77d383681b..adef3a0cb48 100644 --- a/clients/drcachesim/tracer/tracer.cpp +++ b/clients/drcachesim/tracer/tracer.cpp @@ -1236,18 +1236,20 @@ event_kernel_xfer(void *drcontext, const dr_kernel_xfer_info_t *info) case DR_XFER_SIGNAL_DELIVERY: case DR_XFER_EXCEPTION_DISPATCHER: case DR_XFER_RAISE_DISPATCHER: - case DR_XFER_CALLBACK_DISPATCHER: marker_type = TRACE_MARKER_TYPE_KERNEL_EVENT; break; + case DR_XFER_CALLBACK_DISPATCHER: + case DR_XFER_RSEQ_ABORT: marker_type = TRACE_MARKER_TYPE_KERNEL_EVENT; break; case DR_XFER_SIGNAL_RETURN: case DR_XFER_CALLBACK_RETURN: case DR_XFER_CONTINUE: case DR_XFER_SET_CONTEXT_THREAD: marker_type = TRACE_MARKER_TYPE_KERNEL_XFER; break; - default: return; + case DR_XFER_CLIENT_REDIRECT: return; + default: DR_ASSERT(false && "unknown kernel xfer type"); return; } NOTIFY(2, "%s: type %d, sig %d\n", __FUNCTION__, info->type, info->sig); /* TODO i3937: We need something similar to this for online too, to place signals * inside instr bundles. */ - if (op_offline.get_value()) { + if (op_offline.get_value() && info->source_mcontext != nullptr) { /* Enable post-processing to figure out the ordering of this xfer vs * non-memref instrs in the bb. */ diff --git a/core/arch/mangle_shared.c b/core/arch/mangle_shared.c index 1b547052c3d..b9b0bf00a57 100644 --- a/core/arch/mangle_shared.c +++ b/core/arch/mangle_shared.c @@ -924,6 +924,36 @@ mangle_rseq_insert_call_sequence(dcontext_t *dcontext, instrlist_t *ilist, instr # endif } +static void +mangle_rseq_write_exit_reason(dcontext_t *dcontext, instrlist_t *ilist, + instr_t *insert_at, reg_id_t scratch_reg) +{ + /* We use slot 1 to avoid conflict with segment mangling. */ + if (SCRATCH_ALWAYS_TLS()) { + PRE(ilist, insert_at, + instr_create_save_to_tls(dcontext, scratch_reg, TLS_REG1_SLOT)); + insert_get_mcontext_base(dcontext, ilist, insert_at, scratch_reg); + } else { + PRE(ilist, insert_at, + instr_create_save_to_dcontext(dcontext, scratch_reg, REG1_OFFSET)); + insert_mov_immed_ptrsz(dcontext, (ptr_int_t)dcontext, + opnd_create_reg(scratch_reg), ilist, insert_at, NULL, + NULL); + } + PRE(ilist, insert_at, + XINST_CREATE_store(dcontext, + opnd_create_dcontext_field_via_reg_sz( + dcontext, scratch_reg, EXIT_REASON_OFFSET, OPSZ_2), + OPND_CREATE_INT16(EXIT_REASON_RSEQ_ABORT))); + if (SCRATCH_ALWAYS_TLS()) { + PRE(ilist, insert_at, + instr_create_restore_from_tls(dcontext, scratch_reg, TLS_REG1_SLOT)); + } else { + PRE(ilist, insert_at, + instr_create_restore_from_dcontext(dcontext, scratch_reg, REG1_OFFSET)); + } +} + /* May modify next_instr. */ static void mangle_rseq_insert_native_sequence(dcontext_t *dcontext, instrlist_t *ilist, @@ -1028,8 +1058,14 @@ mangle_rseq_insert_native_sequence(dcontext_t *dcontext, instrlist_t *ilist, # endif PRE(ilist, insert_at, abort_sig); PRE(ilist, insert_at, label_abort); - instrlist_preinsert(ilist, insert_at, - XINST_CREATE_jump(dcontext, opnd_create_pc(handler))); + /* To raise a kernel xfer event we need to go back to DR. Thus this exit will + * never be linked. This should be quite rare, however, and should not impose + * a performance burden. + */ + mangle_rseq_write_exit_reason(dcontext, ilist, insert_at, scratch_reg); + instr_t *abort_exit = XINST_CREATE_jump(dcontext, opnd_create_pc(handler)); + instr_branch_set_special_exit(abort_exit, true); + instrlist_preinsert(ilist, insert_at, abort_exit); PRE(ilist, insert_at, skip_abort); /* Point this thread's struct rseq ptr at an rseq_cs which points at the bounds @@ -1128,6 +1164,18 @@ mangle_rseq_insert_native_sequence(dcontext_t *dcontext, instrlist_t *ilist, instr_exit_branch_set_type(exit, exit_type); instrlist_preinsert(ilist, insert_at, exit); } +# ifdef DEBUG + /* Support for the api.rseq test with (officially unsupported) syscall in + * its rseq code executing before the app executes a syscall. + */ + if (instr_is_syscall(copy) && + get_syscall_method() == SYSCALL_METHOD_UNINITIALIZED) { + ASSERT(instr_get_opcode(copy) == OP_syscall && + check_filter("api.rseq", get_short_name(get_application_name()))); + set_syscall_method(SYSCALL_METHOD_SYSCALL); + update_syscalls(dcontext); + } +# endif } PRE(ilist, insert_at, label_end); /* Update all intra-region targets to use instr_t* operands. We can't simply diff --git a/core/dispatch.c b/core/dispatch.c index a500fa1a203..e324afc732b 100644 --- a/core/dispatch.c +++ b/core/dispatch.c @@ -940,6 +940,15 @@ dispatch_enter_dynamorio(dcontext_t *dcontext) /* Forge single step exception with right address. */ os_forge_exception(dcontext->next_tag, SINGLE_STEP_EXCEPTION); ASSERT_NOT_REACHED(); + } else if (dcontext->upcontext.upcontext.exit_reason == + EXIT_REASON_RSEQ_ABORT) { +#ifdef LINUX + rseq_process_native_abort(dcontext); +#else + ASSERT_NOT_REACHED(); +#endif + /* Unset the reason. */ + dcontext->upcontext.upcontext.exit_reason = EXIT_REASON_SELFMOD; } else { /* When adding any new reason, be sure to clear exit_reason, * as selfmod exits do not bother to set the reason field to diff --git a/core/globals.h b/core/globals.h index 162c3ffd615..9d7082352b5 100644 --- a/core/globals.h +++ b/core/globals.h @@ -714,6 +714,8 @@ enum { EXIT_REASON_NI_SYSCALL_INT_0x82, /* Single step exception needs to be forged. */ EXIT_REASON_SINGLE_STEP, + /* We need to raise a kernel xfer event on an rseq-native abort. */ + EXIT_REASON_RSEQ_ABORT, }; /* Number of nested calls into native modules that we support. This number diff --git a/core/lib/instrument_api.h b/core/lib/instrument_api.h index da7ed986b23..26fbef46e81 100644 --- a/core/lib/instrument_api.h +++ b/core/lib/instrument_api.h @@ -937,6 +937,7 @@ typedef enum { DR_XFER_CONTINUE, /**< NtContinue system call. */ DR_XFER_SET_CONTEXT_THREAD, /**< NtSetContextThread system call. */ DR_XFER_CLIENT_REDIRECT, /**< dr_redirect_execution() or #DR_SIGNAL_REDIRECT. */ + DR_XFER_RSEQ_ABORT, /**< A Linux restartable sequence was aborted. */ } dr_kernel_xfer_type_t; /** Data structure passed for dr_register_kernel_xfer_event(). */ @@ -945,7 +946,9 @@ typedef struct _dr_kernel_xfer_info_t { dr_kernel_xfer_type_t type; /** * The source machine context which is about to be changed. This may be NULL - * if it is unknown, which is the case for #DR_XFER_CALLBACK_DISPATCHER. + * if it is unknown, which is the case for #DR_XFER_CALLBACK_DISPATCHER and + * #DR_XFER_RSEQ_ABORT (where the PC is not known but the rest of the state + * matches the current state). */ const dr_mcontext_t *source_mcontext; /** diff --git a/core/unix/os_exports.h b/core/unix/os_exports.h index 652993c4ab3..7b9f770b535 100644 --- a/core/unix/os_exports.h +++ b/core/unix/os_exports.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2011-2019 Google, Inc. All rights reserved. + * Copyright (c) 2011-2020 Google, Inc. All rights reserved. * Copyright (c) 2000-2010 VMware, Inc. All rights reserved. * **********************************************************/ @@ -570,6 +570,9 @@ rseq_remove_fragment(dcontext_t *dcontext, fragment_t *f); void rseq_shared_fragment_flushtime_update(dcontext_t *dcontext); +void +rseq_process_native_abort(dcontext_t *dcontext); + #endif #endif /* _OS_EXPORTS_H_ */ diff --git a/core/unix/rseq_linux.c b/core/unix/rseq_linux.c index abda1fe15e2..0602763ead9 100644 --- a/core/unix/rseq_linux.c +++ b/core/unix/rseq_linux.c @@ -1,5 +1,5 @@ /* ******************************************************************************* - * Copyright (c) 2019 Google, Inc. All rights reserved. + * Copyright (c) 2019-2020 Google, Inc. All rights reserved. * *******************************************************************************/ /* @@ -46,6 +46,9 @@ #include "rseq_linux.h" #include "../fragment.h" #include "decode.h" +#ifdef CLIENT_INTERFACE +# include "instrument.h" +#endif #include #ifdef HAVE_RSEQ # include @@ -316,7 +319,11 @@ rseq_analyze_instructions(rseq_region_t *info) "Rseq sequence contains invalid instructions"); ASSERT_NOT_REACHED(); } - if (instr_is_syscall(&instr)) { + if (instr_is_syscall(&instr) + /* Allow a syscall for our test in debug build. */ + IF_DEBUG( + &&!check_filter("api.rseq;linux.rseq;linux.rseq_table;linux.rseq_noarray", + get_short_name(get_application_name())))) { REPORT_FATAL_ERROR_AND_EXIT(RSEQ_BEHAVIOR_UNSUPPORTED, 3, get_application_name(), get_application_pid(), "Rseq sequence contains a system call"); @@ -719,3 +726,22 @@ rseq_module_init(module_area_t *ma, bool at_map) rseq_process_module(ma, at_map); } } + +void +rseq_process_native_abort(dcontext_t *dcontext) +{ +#ifdef CLIENT_INTERFACE + /* Raise a transfer event. */ + LOG(THREAD, LOG_INTERP | LOG_VMAREAS, 2, "Abort triggered in rseq native code\n"); + get_mcontext(dcontext)->pc = dcontext->next_tag; + if (instrument_kernel_xfer(dcontext, DR_XFER_RSEQ_ABORT, osc_empty, + /* We do not know the source PC so we do not + * supply a source state. + */ + NULL, NULL, dcontext->next_tag, + get_mcontext(dcontext)->xsp, osc_empty, + get_mcontext(dcontext), 0)) { + dcontext->next_tag = canonicalize_pc_target(dcontext, get_mcontext(dcontext)->pc); + } +#endif +} diff --git a/core/unix/rseq_linux.h b/core/unix/rseq_linux.h index a4863e4cc8a..56a25aa4dd9 100644 --- a/core/unix/rseq_linux.h +++ b/core/unix/rseq_linux.h @@ -1,5 +1,5 @@ /* ******************************************************************************* - * Copyright (c) 2019 Google, Inc. All rights reserved. + * Copyright (c) 2019-2020 Google, Inc. All rights reserved. * *******************************************************************************/ /* diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 0abc440f4db..38ddc29d7bf 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -3628,9 +3628,11 @@ if (UNIX) append_property_string(TARGET linux.rseq_noarray COMPILE_FLAGS "-DRSEQ_TEST_USE_NO_ARRAY") # Test attaching, which has a separate lazy rseq check. - tobuild_api(api.rseq linux/rseq.c "" "" OFF OFF) + # We build with static DR to also test client interactions. + tobuild_api(api.rseq linux/rseq.c "" "" OFF ON) link_with_pthread(api.rseq) append_property_string(TARGET api.rseq COMPILE_FLAGS "-DRSEQ_TEST_ATTACH") + set(api.rseq_expectbase rseq_client) # Test non-compliant code with our workaround flag. tobuild_ops(linux.rseq_disable linux/rseq_disable.c "-disable_rseq" "") endif () diff --git a/suite/tests/linux/rseq.c b/suite/tests/linux/rseq.c index a57a03cd720..eed18863dbc 100644 --- a/suite/tests/linux/rseq.c +++ b/suite/tests/linux/rseq.c @@ -52,6 +52,10 @@ #ifndef HAVE_RSEQ # error The linux/rseq header is required. #endif +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif +#include #include #include #include @@ -321,8 +325,6 @@ test_rseq_native_fault(void) "jmp 2b\n\t" /* Clear the ptr. */ - "13:\n\t" - "12:\n\t" "5:\n\t" "movq $0, %[rseq_tls]\n\t" /* clang-format on */ @@ -334,6 +336,88 @@ test_rseq_native_fault(void) assert(restarts > 0); } +/* Tests that DR handles an rseq abort from migration or context switch (a signal + * is tested in test_rseq_native_fault()) in the native rseq execution. + * We again cheat and take advantage of DR not restoring XMM state to have different + * behavior in the two DR executions of the rseq code. + * The only reliable way we can force a context switch or migration is to use + * a system call, which is officially disallowed. We have special exceptions in + * the code which look for the test name "linux.rseq" and are limited to DEBUG. + */ +static void +test_rseq_native_abort(void) +{ +#ifdef DEBUG /* See above: special code in core/ is DEBUG-only> */ + int restarts = 0; + __asm__ __volatile__( + /* clang-format off */ /* (avoid indenting next few lines) */ + RSEQ_ADD_TABLE_ENTRY(abort, 2f, 3f, 4f) + /* clang-format on */ + + "6:\n\t" + /* Store the entry into the ptr. */ + "leaq rseq_cs_abort(%%rip), %%rax\n\t" + "movq %%rax, %[rseq_tls]\n\t" + "pxor %%xmm0, %%xmm0\n\t" + "mov $1,%%rcx\n\t" + "movq %%rcx, %%xmm1\n\t" + + /* Restartable sequence. */ + "2:\n\t" + /* Increase xmm0 every time. DR (currently) won't restore xmm inputs + * to rseq sequences, nor does it detect that it needs to. + */ + "paddq %%xmm1,%%xmm0\n\t" + "movq %%xmm0, %%rax\n\t" + /* Only raise the signal on the 2nd run == native run. */ + "cmp $2, %%rax\n\t" + "jne 11f\n\t" + /* Force a migration by setting the affinity mask to two different singleton + * CPU's. + */ + "mov $0, %%rdi\n\t" + "mov %[cpu_mask_size], %%rsi\n\t" + "leaq sched_mask_1(%%rip), %%rdx\n\t" + "mov %[sysnum_setaffinity], %%eax\n\t" + "syscall\n\t" + "mov $0, %%rdi\n\t" + "mov %[cpu_mask_size], %%rsi\n\t" + "leaq sched_mask_2(%%rip), %%rdx\n\t" + "mov %[sysnum_setaffinity], %%eax\n\t" + "syscall\n\t" + "11:\n\t" + "nop\n\t" + + /* Post-commit. */ + "3:\n\t" + "jmp 5f\n\t" + + /* Abort handler. */ + /* clang-format off */ /* (avoid indenting next few lines) */ + ".long " STRINGIFY(RSEQ_SIG) "\n\t" + "4:\n\t" + "addl $1, %[restarts]\n\t" + "jmp 2b\n\t" + + "sched_mask_1:\n\t" + ".long 0x1, 0, 0, 0\n\t" /* cpu #1 */ + "sched_mask_2:\n\t" + ".long 0x2, 0, 0, 0\n\t" /* cpu #2 */ + + /* Clear the ptr. */ + "5:\n\t" + "movq $0, %[rseq_tls]\n\t" + /* clang-format on */ + + : [rseq_tls] "=m"(rseq_tls.rseq_cs), [restarts] "=m"(restarts) + : [cpu_mask_size] "i"(sizeof(cpu_set_t)), + [sysnum_setaffinity] "i"(SYS_sched_setaffinity) + : "rax", "rcx", "rdx", "xmm0", "xmm1", "memory"); + /* This is expected to fail on a native run where restarts will be 0. */ + assert(restarts > 0); +#endif /* DEBUG */ +} + #ifdef RSEQ_TEST_ATTACH void * rseq_thread_loop(void *arg) @@ -398,6 +482,33 @@ rseq_thread_loop(void *arg) : "rax", "memory"); return NULL; } + +static void +kernel_xfer_event(void *drcontext, const dr_kernel_xfer_info_t *info) +{ + static bool skip_print; + if (!skip_print) + dr_fprintf(STDERR, "%s: type %d\n", __FUNCTION__, info->type); + /* Avoid tons of prints for the trace loop in main(). */ + if (info->type == DR_XFER_RSEQ_ABORT) + skip_print = true; + dr_mcontext_t mc = { sizeof(mc) }; + mc.flags = DR_MC_CONTROL; + bool ok = dr_get_mcontext(drcontext, &mc); + assert(ok); + assert(mc.pc == info->target_pc); + assert(mc.xsp == info->target_xsp); + mc.flags = DR_MC_ALL; + ok = dr_get_mcontext(drcontext, &mc); + assert(ok); +} + +DR_EXPORT void +dr_client_main(client_id_t id, int argc, const char *argv[]) +{ + /* Ensure DR_XFER_RSEQ_ABORT is rasied. */ + dr_register_kernel_xfer_event(kernel_xfer_event); +} #endif /* RSEQ_TEST_ATTACH */ int @@ -421,6 +532,8 @@ main() test_rseq_branches(); /* Test a fault in the native run. */ test_rseq_native_fault(); + /* Test a non-fault abort in the native run. */ + test_rseq_native_abort(); /* Test a trace. */ int i; for (i = 0; i < 200; i++) diff --git a/suite/tests/linux/rseq_client.expect b/suite/tests/linux/rseq_client.expect new file mode 100644 index 00000000000..d3fbd05d099 --- /dev/null +++ b/suite/tests/linux/rseq_client.expect @@ -0,0 +1,8 @@ +kernel_xfer_event: type 0 +kernel_xfer_event: type 1 +kernel_xfer_event: type 0 +kernel_xfer_event: type 1 +kernel_xfer_event: type 0 +kernel_xfer_event: type 1 +kernel_xfer_event: type 10 +All done