From 00ae95b651dd8376a44730913a83b0f7d92160d5 Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Thu, 22 Aug 2019 13:28:28 -0400 Subject: [PATCH] i#2350 rseq: Target sequence start instead of abort handler (#3791) Changes the native execution of the rseq region to target the start of the sequence rather than the abort handler. This is done for two reasons: to support abort handlers which simply abort and do not restart, and to make it much easier in the future to run a copy of the sequence rather than rely on call-return semantics in the app code. Adds preservation of any possible input registers that might normally be re-set-up by the abort handler we're skipping, by preserving registers that are written inside the sequence. We limit this to general-purpose registers only, for space and time and code simplicity and because we do not expect others to be used, and document this restriction. Removes the register clobber used for the indirect call, going through TLS instead. Issue: #2350 --- api/docs/bt.dox | 14 +++- core/arch/arch.c | 1 + core/arch/arch.h | 6 +- core/arch/arch_exports.h | 22 ++++- core/arch/mangle_shared.c | 168 ++++++++++++++++++++++++++++++++------ core/globals.h | 6 +- core/translate.c | 2 +- core/unix/os.c | 4 +- core/unix/os_exports.h | 3 +- core/unix/rseq_linux.c | 96 +++++++++++++++++++++- core/unix/rseq_linux.h | 3 + core/vmareas.c | 14 ++-- suite/tests/linux/rseq.c | 10 ++- 13 files changed, 301 insertions(+), 48 deletions(-) diff --git a/api/docs/bt.dox b/api/docs/bt.dox index a469fbe430f..8c35961b283 100644 --- a/api/docs/bt.dox +++ b/api/docs/bt.dox @@ -1311,8 +1311,8 @@ This run-twice approach is subject to the following limitations: section of pointers into the __rseq_cs section, per established conventions. These sections must be located in loaded segments. - Each rseq region's code must never be also executed as a non-restartable sequence. -- Each rseq region must make forward progress if its abort handler is always - called the first time it is executed. +- Each rseq region must handle being directly restarted without its + abort handler being called (with the machine state restored). - Each memory store instruction inside an rseq region must have no other side effects: it must only write to memory and not to any registers. For example, a push instruction which both writes to memory and the @@ -1322,13 +1322,21 @@ This run-twice approach is subject to the following limitations: semantics. - Each rseq region's code must end with a fall-through (non-control-flow) instruction. -- Any helper function called from within an rseq region must have no side effects. +- Each rseq region must be entered only from the top, with no branches from outside + the region targeting a point inside the region. +- No system calls are allowed inside rseq regions. +- No call instructions are allowed inside rseq regions. +- The only register inputs to an rseq region must be general-purpose registers read + prior to any write to that register when the sequence is considered linearly without + regard to control flow. - The instrumented execution of the rseq region may not perfectly reflect the native behavior of the application. The instrumentation will never see the abort handler called, and memory addresses may be wrong if they are based on the underlying cpuid and a migration occurred mid-region. These are minor and acceptable for most tools (especially given that there is no better alternative). +Some of these limitations are explicitly checked, and DR will exit with an error +message if they are not met. However, not all are efficiently verifiable. If an application does not satisfy these limitations, the \ref op_disable_rseq "disable_rseq" runtime option may be used to return ENOSYS, which can provide a workaround for applications which have fallback code diff --git a/core/arch/arch.c b/core/arch/arch.c index 424505b5399..b05a4e07369 100644 --- a/core/arch/arch.c +++ b/core/arch/arch.c @@ -681,6 +681,7 @@ d_r_arch_init(void) * masks. Also priv_mcontext_t.opmask slots are AVX512BW wide. */ IF_X86(ASSERT(sizeof(dr_opmask_t) == OPMASK_AVX512BW_REG_SIZE)); + ASSERT(NUM_GP_REGS == DR_NUM_GPR_REGS); /* Verify that the structures used for a register spill area and to hold IBT * table addresses & masks for IBL code are laid out as expected. We expect diff --git a/core/arch/arch.h b/core/arch/arch.h index a48a471022f..88f57928ae5 100644 --- a/core/arch/arch.h +++ b/core/arch/arch.h @@ -85,7 +85,9 @@ mixed_mode_enabled(void) #ifdef X86 # define XAX_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, xax))) +# define REG0_OFFSET XAX_OFFSET # define XBX_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, xbx))) +# define REG1_OFFSET XBX_OFFSET # define XCX_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, xcx))) # define XDX_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, xdx))) # define XSI_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, xsi))) @@ -117,7 +119,9 @@ mixed_mode_enabled(void) # define SCRATCH_REG5_OFFS XDI_OFFSET #elif defined(AARCHXX) # define R0_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r0))) +# define REG0_OFFSET R0_OFFSET # define R1_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r1))) +# define REG1_OFFSET R1_OFFSET # define R2_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r2))) # define R3_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r3))) # define R4_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r4))) @@ -380,8 +384,6 @@ typedef enum { # define SHARED_GENCODE_MATCH_THREAD(dc) get_shared_gencode(dc) #endif -#define NUM_GP_REGS DR_NUM_GPR_REGS - /* Information about each individual clean call invocation site. * The whole struct is set to 0 at init time. */ diff --git a/core/arch/arch_exports.h b/core/arch/arch_exports.h index 2f21bf2d06e..16b47472f32 100644 --- a/core/arch/arch_exports.h +++ b/core/arch/arch_exports.h @@ -566,7 +566,7 @@ atomic_add_exchange_int64(volatile int64 *var, int64 value) : "=qm"(flag) \ : \ : "cc", "memory") -/* clang-flag on */ +/* clang-format on */ # define SET_IF_NOT_ZERO(flag) SET_FLAG(nz, flag) # define SET_IF_NOT_LESS(flag) SET_FLAG(nl, flag) @@ -2523,4 +2523,24 @@ void encode_reset_it_block(dcontext_t *dcontext); #endif +/* DR_NUM_GPR_REGS is not exported. We use our own defines here. + * These are checked vs DR_NUM_GPR_REGS in d_r_arch_init(). + */ +#ifdef X86_32 +# define NUM_GP_REGS 8 +#elif defined(X86_64) +# define NUM_GP_REGS 16 +#elif defined(ARM) +# define NUM_GP_REGS 16 +#elif defined(AARCH64) +# define NUM_GP_REGS 32 +#endif + +#ifdef LINUX +/* Register state preserved on input to restartable sequences ("rseq"). */ +typedef struct _rseq_entry_state_t { + reg_t gpr[NUM_GP_REGS]; +} rseq_entry_state_t; +#endif + #endif /* _ARCH_EXPORTS_H_ */ diff --git a/core/arch/mangle_shared.c b/core/arch/mangle_shared.c index 68abd6596bd..893aa6671da 100644 --- a/core/arch/mangle_shared.c +++ b/core/arch/mangle_shared.c @@ -777,12 +777,76 @@ mangle_syscall_code(dcontext_t *dcontext, fragment_t *f, byte *pc, bool skip) static bool mangle_rseq(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, instr_t *next_instr) { + int i; app_pc pc = get_app_instr_xl8(instr); app_pc start, end, handler; - if (!rseq_get_region_info(pc, &start, &end, &handler)) { + bool *reg_written; + int reg_written_size; + reg_id_t scratch_reg = DR_REG_START_GPR; + if (!rseq_get_region_info(pc, &start, &end, &handler, ®_written, + ®_written_size)) { ASSERT_NOT_REACHED(); /* Caller was supposed to check for overlap */ return false; } + /* We need to know the type of register so we can't completely abstract this. */ + ASSERT(reg_written_size == DR_NUM_GPR_REGS); + int reg_written_count = 0; + for (i = 0; i < DR_NUM_GPR_REGS; i++) { + if (reg_written[i]) { + /* For simplicity we avoid our scratch being a register we're preserving. */ + if (DR_REG_START_GPR + (reg_id_t)i == scratch_reg) + scratch_reg++; + reg_written_count++; + } + } + if (scratch_reg == DR_NUM_GPR_REGS) { + /* We could handle this by an xchg or sthg but it seems so rare, and given + * that we already have so many rseq limitations, I'm bailing on it. + */ + REPORT_FATAL_ERROR_AND_EXIT( + RSEQ_BEHAVIOR_UNSUPPORTED, 3, get_application_name(), get_application_pid(), + "Rseq sequence writes every general-purpose register"); + ASSERT_NOT_REACHED(); + } + if (pc == start && reg_written_count > 0) { + /* Preserve any input register state that will be re-set-up by the abort + * handler on a restart. We directly invoke start on a restart to make it + * easier to use a copy of the code, and to support non-restarting handlers. + */ + /* XXX i#3798: Be sure to insert these register saves prior to any client + * instrumentation, which may move app register values elsewhere. We've + * arranged the rseq start to always be a block start, and with current drreg + * implementation, all values are native (i.e., in registers) at block start, so + * we're ok for now, but we may want some kind of barrier API in the future. + */ + instr_t *first = instrlist_first(ilist); + if (SCRATCH_ALWAYS_TLS()) { + PRE(ilist, first, + instr_create_save_to_tls(dcontext, scratch_reg, TLS_REG0_SLOT)); + insert_get_mcontext_base(dcontext, ilist, first, scratch_reg); + } else { + PRE(ilist, first, + instr_create_save_to_dcontext(dcontext, scratch_reg, REG0_OFFSET)); + insert_mov_immed_ptrsz(dcontext, (ptr_int_t)dcontext, + opnd_create_reg(scratch_reg), ilist, first, NULL, + NULL); + } + for (i = 0; i < DR_NUM_GPR_REGS; i++) { + if (reg_written[i]) { + size_t offs = offsetof(dcontext_t, rseq_entry_state) + sizeof(reg_t) * i; + PRE(ilist, first, + XINST_CREATE_store(dcontext, OPND_CREATE_MEMPTR(scratch_reg, offs), + opnd_create_reg(DR_REG_START_GPR + (reg_id_t)i))); + } + } + if (SCRATCH_ALWAYS_TLS()) { + PRE(ilist, first, + instr_create_restore_from_tls(dcontext, scratch_reg, TLS_REG0_SLOT)); + } else { + PRE(ilist, first, + instr_create_restore_from_dcontext(dcontext, scratch_reg, REG0_OFFSET)); + } + } int len = instr_length(dcontext, instr); if (pc + len >= end) { if (pc + len != end) { @@ -801,27 +865,62 @@ mangle_rseq(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, instr_t *n } # ifdef X86 /* We just ran the instrumented version of the rseq code, with the stores - * removed. Now we need to invoke it again natively for real. We have to - * invoke the abort handler, not the rseq start, as it may perform some setup. + * removed. Now we need to invoke it again natively for real. We would prefer + * to invoke the abort handler, as it may perform some setup, but in too many + * cases it is truly an "abort" handler that just exits rather than a "restart + * handler". Furthermore, to support executing a copy of the code natively in + * order to provide guarantees on regaining control and not rely on call-return + * semantics, it is simpler to execute only the limited-scope rseq region. + * Thus, we target the start point. * - * XXX i#2350: We assume the code can handle "aborting" once every time and - * still make forward progress. We document this in our Limitations section. - * We also ignore the flags for when to restart. It's possible the app - * disabled restarts on preempts and migrations and can't handle our restart - * here. + * In case the abort handler does perform setup, we checkpoint and restore GPR + * register values. Memory should remain as it was, due to nop-ing of stores. + * + * XXX i#2350: We ignore the app's rseq flags for when to restart. It's + * possible the app disabled restarts on preempts and migrations and can't + * handle our restart here, but that seems pathological: we expect the rseq + * feature to be used for restarts rather than just a detection mechanism of + * preemption. */ LOG(THREAD, LOG_INTERP, 4, "mangle: inserting call to native rseq " PFX "\n", - handler); + start); RSTATS_INC(num_rseq_native_calls_inserted); + + /* Create a scratch register. */ + if (SCRATCH_ALWAYS_TLS()) { + PRE(ilist, next_instr, + instr_create_save_to_tls(dcontext, scratch_reg, TLS_REG0_SLOT)); + insert_get_mcontext_base(dcontext, ilist, next_instr, scratch_reg); + } else { + PRE(ilist, next_instr, + instr_create_save_to_dcontext(dcontext, scratch_reg, REG0_OFFSET)); + insert_mov_immed_ptrsz(dcontext, (ptr_int_t)dcontext, + opnd_create_reg(scratch_reg), ilist, next_instr, NULL, + NULL); + } + if (reg_written_count > 0) { + /* Restore the entry state we preserved earlier. */ + for (i = 0; i < DR_NUM_GPR_REGS; i++) { + if (reg_written[i]) { + size_t offs = + offsetof(dcontext_t, rseq_entry_state) + sizeof(reg_t) * i; + PRE(ilist, next_instr, + XINST_CREATE_load(dcontext, + opnd_create_reg(DR_REG_START_GPR + (reg_id_t)i), + OPND_CREATE_MEMPTR(scratch_reg, offs))); + } + } + } + /* For simplicity in this first version of the code, we assume call-return - * semantics for the abort handler + rseq region. We create an extra frame + * semantics for the rseq region. We create an extra frame * and assume that causes no problems. We assume the native invocation will * come back to us. - * TODO i#2350: Make a copy of the rseq code and append a "return" as a jmp* - * through TLS. Make a copy of the abort handler too. Decode it and ensure - * it transitions directly to the rseq code (if not, bail). "Call" the - * copied abort handler+rseq by setting the retaddr in TLS and jumping. Then - * we have a guaranteed return and no extra frame, with verified assumptions. + * TODO i#2350: Make a local copy of the rseq code so we can arrange for a + * guaranteed return on (any) exit from the region, and use relative jumps to + * avoid needing a scratch register (though on x86 we could call through TLS). + * We would transform all mid-point exits into capture points. This gets rid + * of the call-return assumptions and the extra frame. */ instr_t check; instr_init(dcontext, &check); @@ -838,9 +937,26 @@ mangle_rseq(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, instr_t *n * of future drreg optimizations, etc. Do we need new interface features, or * do we live with a fake app jump or sthg? */ - /* A direct call may not reach so we clobber a scratch register. */ - insert_mov_immed_ptrsz(dcontext, (ptr_int_t)handler, opnd_create_reg(DR_REG_RAX), + /* A direct call may not reach, so we need an indirect call. We use a TLS slot + * to avoid needing a dead register. + */ + insert_mov_immed_ptrsz(dcontext, (ptr_int_t)start, opnd_create_reg(scratch_reg), ilist, next_instr, NULL, NULL); + if (SCRATCH_ALWAYS_TLS()) { + PRE(ilist, next_instr, + instr_create_save_to_tls(dcontext, scratch_reg, TLS_REG1_SLOT)); + } else { + PRE(ilist, next_instr, + instr_create_save_to_dcontext(dcontext, scratch_reg, REG1_OFFSET)); + } + /* Restore the scratch register. */ + if (SCRATCH_ALWAYS_TLS()) { + PRE(ilist, next_instr, + instr_create_restore_from_tls(dcontext, scratch_reg, TLS_REG0_SLOT)); + } else { + PRE(ilist, next_instr, + instr_create_restore_from_dcontext(dcontext, scratch_reg, REG0_OFFSET)); + } /* Set up the frame and stack alignment. We assume the rseq code was a leaf * function and that rsp is 16-aligned now. * TODO i#2350: If we stick with an extra call frame, it would be better to @@ -852,12 +968,21 @@ mangle_rseq(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, instr_t *n OPND_CREATE_INT32(8))); instrlist_meta_preinsert( ilist, next_instr, - INSTR_CREATE_call_ind(dcontext, opnd_create_reg(DR_REG_RAX))); + INSTR_CREATE_call_ind( + dcontext, + SCRATCH_ALWAYS_TLS() + ? opnd_create_tls_slot(os_tls_offset(TLS_REG1_SLOT)) + : opnd_create_dcontext_field(dcontext, REG1_OFFSET))); instrlist_meta_preinsert(ilist, next_instr, XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_RSP), OPND_CREATE_INT32(8))); # else - /* TODO i#2350: Add non-x86 support. */ + /* TODO i#2350: Add non-x86 support. We need to pay particular attention + * to the stolen register. If we do a local copy (with no callouts) we could + * mangle it. We also cannot do an indirect call through anything but a + * register and thus need a dead register for the call-return approach, but + * that disappears once DR uses a local copy. + */ REPORT_FATAL_ERROR_AND_EXIT(RSEQ_BEHAVIOR_UNSUPPORTED, 3, get_application_name(), get_application_pid(), "Rseq is not yet supported for non-x86"); @@ -873,11 +998,6 @@ mangle_rseq(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, instr_t *n */ if (!instr_writes_memory(instr)) return false; - /* We allow a call to a helper function, since this happens in some apps we target. - * The helper is assumed to have no side effects. - */ - if (instr_is_call(instr)) - return false; /* XXX i#2350: We want to turn just the store portion of the instr into a nop * and keep any register side effects. That is complex, however. For now we * only support simple stores. diff --git a/core/globals.h b/core/globals.h index b98f310e588..4360dfbe371 100644 --- a/core/globals.h +++ b/core/globals.h @@ -751,7 +751,7 @@ typedef struct { /* WARNING: if you change the offsets of any of these fields, * you must also change the offsets in / */ - priv_mcontext_t mcontext; /* real machine context (in arch_exports.h) */ + priv_mcontext_t mcontext; /* real machine context (in globals_shared.h + mcxtx.h) */ #ifdef UNIX int dr_errno; /* errno used for DR (no longer used for app) */ #endif @@ -1103,6 +1103,10 @@ struct _dcontext_t { bool currently_stopped; /* This is a flag requesting that this thread go native. */ bool go_native; +#ifdef LINUX + /* State for handling restartable sequences ("rseq"). */ + rseq_entry_state_t rseq_entry_state; +#endif }; /* sentinel value for dcontext_t* used to indicate diff --git a/core/translate.c b/core/translate.c index 9b7309141df..345d52fc289 100644 --- a/core/translate.c +++ b/core/translate.c @@ -574,7 +574,7 @@ translate_restore_special_cases(app_pc pc) { #ifdef LINUX app_pc handler; - if (rseq_get_region_info(pc, NULL, NULL, &handler)) { + if (rseq_get_region_info(pc, NULL, NULL, &handler, NULL, NULL)) { LOG(THREAD_GET, LOG_INTERP, 2, "recreate_app: moving " PFX " inside rseq region to handler " PFX "\n", pc, handler); diff --git a/core/unix/os.c b/core/unix/os.c index 9f15cd3b94d..11d8c675adb 100644 --- a/core/unix/os.c +++ b/core/unix/os.c @@ -10128,8 +10128,10 @@ os_thread_take_over(priv_mcontext_t *mc, kernel_sigset_t *sigset) /* See whether we should initiate lazy rseq handling, and avoid treating * regions as rseq when the rseq syscall is never set up. */ - if (rseq_is_registered_for_current_thread()) + if (rseq_is_registered_for_current_thread()) { rseq_locate_rseq_regions(); + rseq_thread_attach(dcontext); + } #endif /* Start interpreting from the signal context. */ diff --git a/core/unix/os_exports.h b/core/unix/os_exports.h index 29c56d0af3b..890b0cf3911 100644 --- a/core/unix/os_exports.h +++ b/core/unix/os_exports.h @@ -539,7 +539,8 @@ at_dl_runtime_resolve_ret(dcontext_t *dcontext, app_pc source_fragment, int *ret extern vm_area_vector_t *d_r_rseq_areas; bool -rseq_get_region_info(app_pc pc, app_pc *start OUT, app_pc *end OUT, app_pc *handler OUT); +rseq_get_region_info(app_pc pc, app_pc *start OUT, app_pc *end OUT, app_pc *handler OUT, + bool **reg_written OUT, int *reg_written_size OUT); #endif #endif /* _OS_EXPORTS_H_ */ diff --git a/core/unix/rseq_linux.c b/core/unix/rseq_linux.c index 80c589e13bb..864430c0281 100644 --- a/core/unix/rseq_linux.c +++ b/core/unix/rseq_linux.c @@ -45,6 +45,7 @@ #include "os_private.h" #include "rseq_linux.h" #include "../fragment.h" +#include "decode.h" #ifdef HAVE_RSEQ # include #else @@ -75,9 +76,12 @@ typedef struct _rseq_region_t { app_pc start; app_pc end; app_pc handler; - /* XXX i#2350: Additional information will be added here for eliminating - * some existing limitations. + /* We need to preserve input registers for targeting "start" instead of "handler" + * for our 2nd invocation, if they're written in the rseq region. We only support + * GPR inputs. We document that we do not support any other inputs (no flags, no + * SIMD registers). */ + bool reg_written[DR_NUM_GPR_REGS]; } rseq_region_t; /* vmvector callbacks */ @@ -115,14 +119,34 @@ d_r_rseq_exit(void) DELETE_LOCK(rseq_trigger_lock); } +void +rseq_thread_attach(dcontext_t *dcontext) +{ + rseq_region_t *info; + if (!vmvector_lookup_data(d_r_rseq_areas, dcontext->next_tag, NULL, NULL, + (void **)&info)) + return; + /* The thread missed the save of its state on rseq entry. We could try to save here + * so the restore on rseq exit won't read incorrect values, but it's simpler and + * less error-prone to send it to the abort handler, like we do on detach or other + * translation points. + */ + dcontext->next_tag = info->handler; +} + bool -rseq_get_region_info(app_pc pc, app_pc *start OUT, app_pc *end OUT, app_pc *handler OUT) +rseq_get_region_info(app_pc pc, app_pc *start OUT, app_pc *end OUT, app_pc *handler OUT, + bool **reg_written OUT, int *reg_written_size OUT) { rseq_region_t *info; if (!vmvector_lookup_data(d_r_rseq_areas, pc, start, end, (void **)&info)) return false; if (handler != NULL) *handler = info->handler; + if (reg_written != NULL) + *reg_written = info->reg_written; + if (reg_written_size != NULL) + *reg_written_size = sizeof(info->reg_written) / sizeof(info->reg_written[0]); return true; } @@ -148,6 +172,71 @@ rseq_is_registered_for_current_thread(void) return false; } +static void +rseq_analyze_instructions(rseq_region_t *info) +{ + /* We analyze the instructions inside [start,end) looking for register state that we + * need to preserve for our restart. We do not want to blindly spill and restore + * 16+ registers for every sequence (too much overhead). + */ + instr_t instr; + instr_init(GLOBAL_DCONTEXT, &instr); + app_pc pc = info->start; + int i; + bool reached_cti = false; + memset(info->reg_written, 0, sizeof(info->reg_written)); + while (pc < info->end) { + instr_reset(GLOBAL_DCONTEXT, &instr); + app_pc next_pc = decode(GLOBAL_DCONTEXT, pc, &instr); + if (next_pc == NULL) { + REPORT_FATAL_ERROR_AND_EXIT(RSEQ_BEHAVIOR_UNSUPPORTED, 3, + get_application_name(), get_application_pid(), + "Rseq sequence contains invalid instructions"); + ASSERT_NOT_REACHED(); + } + if (instr_is_syscall(&instr)) { + REPORT_FATAL_ERROR_AND_EXIT(RSEQ_BEHAVIOR_UNSUPPORTED, 3, + get_application_name(), get_application_pid(), + "Rseq sequence contains a system call"); + ASSERT_NOT_REACHED(); + } + if (instr_is_call(&instr)) { + REPORT_FATAL_ERROR_AND_EXIT(RSEQ_BEHAVIOR_UNSUPPORTED, 3, + get_application_name(), get_application_pid(), + "Rseq sequence contains a call"); + ASSERT_NOT_REACHED(); + } + if (instr_is_cti(&instr)) + reached_cti = true; + /* We potentially need to preserve any register written anywhere inside + * the sequence. We can't limit ourselves to registers clearly live on + * input, since code *after* the sequence could read them. We do disallow + * callouts to helper functions to simplify our lives. + * + * We only preserve GPR's, for simplicity, and because they are far more likely + * as inputs than flags or SIMD registers. We'd like to verify that only GPR's + * are used, but A) we can't easily check values read *after* the sequence (the + * handler could set up state read afterward and sometimes clobbered inside), B) + * we do want to support SIMD and flags writes in the sequence, and C) even + * checking for values read in the sequence would want new interfaces like + * DR_REG_START_SIMD or register iterators for reasonable code. + */ + for (i = 0; i < DR_NUM_GPR_REGS; i++) { + if (info->reg_written[i]) + continue; + reg_id_t reg = DR_REG_START_GPR + (reg_id_t)i; + if (instr_writes_to_reg(&instr, reg, DR_QUERY_DEFAULT)) { + LOG(GLOBAL, LOG_LOADER, 3, + "Rseq region @" PFX " writes register %s at " PFX "\n", info->start, + reg_names[reg], pc); + info->reg_written[i] = true; + } + } + pc = next_pc; + } + instr_free(GLOBAL_DCONTEXT, &instr); +} + static void rseq_process_entry(struct rseq_cs *entry, ssize_t load_offs) { @@ -162,6 +251,7 @@ rseq_process_entry(struct rseq_cs *entry, ssize_t load_offs) info->start = (app_pc)(ptr_uint_t)entry->start_ip + load_offs; info->end = info->start + entry->post_commit_offset; info->handler = (app_pc)(ptr_uint_t)entry->abort_ip + load_offs; + rseq_analyze_instructions(info); vmvector_add(d_r_rseq_areas, info->start, info->end, (void *)info); RSTATS_INC(num_rseq_regions); /* Check the start pc. We don't take the effort to check for non-tags or diff --git a/core/unix/rseq_linux.h b/core/unix/rseq_linux.h index 9fa9bdceb4b..ba24cd0533f 100644 --- a/core/unix/rseq_linux.h +++ b/core/unix/rseq_linux.h @@ -51,6 +51,9 @@ d_r_rseq_init(void); void d_r_rseq_exit(void); +void +rseq_thread_attach(dcontext_t *dcontext); + bool rseq_is_registered_for_current_thread(void); diff --git a/core/vmareas.c b/core/vmareas.c index df874979313..671cb780b2d 100644 --- a/core/vmareas.c +++ b/core/vmareas.c @@ -8251,16 +8251,16 @@ check_thread_vm_area(dcontext_t *dcontext, app_pc pc, app_pc tag, void **vmlist, ASSERT(*stop != NULL); #ifdef LINUX if (!vmvector_empty(d_r_rseq_areas)) { - /* While for core operation we do not need to end a block at an rseq - * endpoint, we need clients to treat the endpoint as a barrier and + /* XXX i#3798: While for core operation we do not need to end a block at + * an rseq endpoint, we need clients to treat the endpoint as a barrier and * restore app state. drreg today treats a block end as a barrier. If * drreg adds optimizations that cross blocks (such as in traces), we may * need to add some other feature here: a fake app cti? That affects - * clients measuring app behavior, though with rseq fidelity is already - * not 100%. - * Similarly, we don't really need to not have a block span the start of - * an rseq region. But, that makes it easier to turn on full_decode for - * simpler mangling. + * clients measuring app behavior, though with rseq fidelity is already not + * 100%. Similarly, we don't really need to not have a block span the start + * of an rseq region. But, we need to save app values at the start, which + * is best done prior to drreg storing them elsewhere; plus, it makes it + * easier to turn on full_decode for simpler mangling. */ bool entered_rseq = false; app_pc rseq_start, next_boundary = NULL; diff --git a/suite/tests/linux/rseq.c b/suite/tests/linux/rseq.c index 49adf5895b2..6f8ab8f4870 100644 --- a/suite/tests/linux/rseq.c +++ b/suite/tests/linux/rseq.c @@ -108,6 +108,8 @@ test_rseq(void) /* Store the entry into the ptr. */ "leaq 1b(%%rip), %%rax\n\t" "movq %%rax, %0\n\t" + /* Test a register input to the sequence. */ + "mov %3, %%rax\n\t" /* Test "falling into" the rseq region. */ /* Restartable sequence. @@ -115,8 +117,10 @@ test_rseq(void) * handler invoked: a simple way to test a restart natively. */ "2:\n\t" - "mov %3, %%rax\n\t" "mov %%rax, %1\n\t" + /* Test clobbering an input register. */ + "mov %4, %%rax\n\t" + "addl $1, %2\n\t" /* Post-commit. */ "3:\n\t" @@ -126,17 +130,15 @@ test_rseq(void) /* clang-format off */ /* (avoid indenting next few lines) */ ".long " STRINGIFY(RSEQ_SIG) "\n\t" "4:\n\t" - "addl $1, %2\n\t" "jmp 6b\n\t" /* Clear the ptr. */ "5:\n\t" - "leaq 1b(%%rip), %%rax\n\t" "movq $0, %0\n\t" /* clang-format on */ : "=m"(rseq_tls.rseq_cs), "=m"(id), "=m"(restarts) - : "m"(rseq_tls.cpu_id) + : "m"(rseq_tls.cpu_id), "i"(RSEQ_CPU_ID_UNINITIALIZED) : "rax", "memory"); assert(id != RSEQ_CPU_ID_UNINITIALIZED); return restarts;