Skip to content

Commit

Permalink
i#2350 rseq: Translate from rseq region to handler (#3757)
Browse files Browse the repository at this point in the history
On any translation, and in particular on detach, we translate from
inside an rseq region to the abort handler.  This is necessary to
avoid problems with a cpu migration earlier in the region while
running the instrumented version.

Augments the api.rseq test with a thread that sits in a loop in an
rseq region to test translation on detach: without the translation, it
loops forever.

Issue: #2350
  • Loading branch information
derekbruening authored Jul 26, 2019
1 parent c12595f commit ec729f0
Show file tree
Hide file tree
Showing 6 changed files with 149 additions and 7 deletions.
7 changes: 7 additions & 0 deletions api/docs/release.dox
Original file line number Diff line number Diff line change
Expand Up @@ -1647,7 +1647,14 @@ not supported.
- Each rseq region must end with a return instruction, and each abort handler
plus rseq code must combine into a callee following normal call-return
semantics.
- Each rseq region's code must end with a fall-through (non-control-flow)
instruction.
- Any helper function called from within an rseq region must have no side effects.
- The instrumented execution of the rseq region may not perfectly reflect
the native behavior of the application. The instrumentation will never see
the abort handler called, and memory addresses may be wrong if they are based on
the underlying cpuid and a migration occurred mid-region. These are minor and
acceptable for most tools (especially given that there is no better alternative).

\subsection sec_limit_perf Performance Limitations

Expand Down
19 changes: 17 additions & 2 deletions core/arch/mangle_shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,13 @@ mangle_rseq(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, instr_t *n
"Malformed rseq endpoint: not on instruction boundary");
ASSERT_NOT_REACHED();
}
if (instr_is_cti(instr)) {
REPORT_FATAL_ERROR_AND_EXIT(
RSEQ_BEHAVIOR_UNSUPPORTED, 3, get_application_name(),
get_application_pid(),
"Rseq sequences must fall through their endpoints");
ASSERT_NOT_REACHED();
}
# ifdef X86
/* We just ran the instrumented version of the rseq code, with the stores
* removed. Now we need to invoke it again natively for real. We have to
Expand Down Expand Up @@ -835,6 +842,9 @@ mangle_rseq(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, instr_t *n
ilist, next_instr, NULL, NULL);
/* Set up the frame and stack alignment. We assume the rseq code was a leaf
* function and that rsp is 16-aligned now.
* TODO i#2350: If we stick with an extra call frame, it would be better to
* spill rsp and hard-align it using a bitmask to ensure alignment; however,
* see above where we hope to eliminate the call-return assumption altogether.
*/
instrlist_meta_preinsert(ilist, next_instr,
XINST_CREATE_sub(dcontext, opnd_create_reg(DR_REG_RSP),
Expand All @@ -851,7 +861,6 @@ mangle_rseq(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, instr_t *n
get_application_pid(),
"Rseq is not yet supported for non-x86");
ASSERT_NOT_REACHED();

# endif
}

Expand All @@ -872,6 +881,10 @@ mangle_rseq(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, instr_t *n
* and keep any register side effects. That is complex, however. For now we
* only support simple stores.
*/
/* We perform this mangling of earlier instructions in the region out of logical
* order (*after* the mangling above of the end of the region) to avoid issues
* with accessing "instr" after we delete it.
*/
if (instr_num_dsts(instr) > 1) {
REPORT_FATAL_ERROR_AND_EXIT(RSEQ_BEHAVIOR_UNSUPPORTED, 3, get_application_name(),
get_application_pid(),
Expand Down Expand Up @@ -981,7 +994,9 @@ d_r_mangle(dcontext_t *dcontext, instrlist_t *ilist, uint *flags INOUT, bool man
* solve. We expect the vmvector_empty check to be fast enough for the common
* case.
*/
if (instr_is_app(instr) && !vmvector_empty(d_r_rseq_areas)) {
if (instr_is_app(instr) &&
!instr_is_our_mangling(instr) /* avoid synthetic exit jump*/ &&
!vmvector_empty(d_r_rseq_areas)) {
app_pc pc = get_app_instr_xl8(instr);
if (vmvector_overlap(d_r_rseq_areas, pc, pc + 1)) {
if (mangle_rseq(dcontext, ilist, instr, next_instr))
Expand Down
18 changes: 18 additions & 0 deletions core/translate.c
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,21 @@ translate_restore_clean_call(dcontext_t *tdcontext, translate_walk_t *walk)
*/
}

static app_pc
translate_restore_special_cases(app_pc pc)
{
#ifdef LINUX
app_pc handler;
if (vmvector_lookup_data(d_r_rseq_areas, pc, NULL, NULL, (void **)&handler)) {
LOG(THREAD_GET, LOG_INTERP, 2,
"recreate_app: moving " PFX " inside rseq region to handler " PFX "\n", pc,
handler);
return handler;
}
#endif
return pc;
}

/* Returns a success code, but makes a best effort regardless.
* If just_pc is true, only recreates pc.
* Modifies mc with the recreated state.
Expand Down Expand Up @@ -699,6 +714,7 @@ recreate_app_state_from_info(dcontext_t *tdcontext, const translation_info_t *in

if (!just_pc)
translate_walk_restore(tdcontext, &walk, &instr, answer);
answer = translate_restore_special_cases(answer);
LOG(THREAD_GET, LOG_INTERP, 2, "recreate_app -- found ok pc " PFX "\n", answer);
mc->pc = answer;
return res;
Expand Down Expand Up @@ -891,6 +907,7 @@ recreate_app_state_from_ilist(dcontext_t *tdcontext, instrlist_t *ilist, byte *s
}
if (!just_pc)
translate_walk_restore(tdcontext, &walk, inst, answer);
answer = translate_restore_special_cases(answer);
LOG(THREAD_GET, LOG_INTERP, 2, "recreate_app -- found ok pc " PFX "\n",
answer);
mc->pc = answer;
Expand Down Expand Up @@ -931,6 +948,7 @@ recreate_app_state_from_ilist(dcontext_t *tdcontext, instrlist_t *ilist, byte *s
ASSERT_NOT_REACHED();
if (just_pc) {
/* just guess */
answer = translate_restore_special_cases(answer);
mc->pc = answer;
}
return RECREATE_FAILURE;
Expand Down
2 changes: 1 addition & 1 deletion core/unix/module_elf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1725,7 +1725,7 @@ module_init_rseq(module_area_t *ma, bool at_map)
* over it. We're reading the loaded data, not the file, so it will
* always be aligned.
*/
# define RSEQ_CS_ALIGNMENT 4 * sizeof(__u64)
# define RSEQ_CS_ALIGNMENT (4 * sizeof(__u64))
struct rseq_cs *array = (struct rseq_cs *)ALIGN_FORWARD(
sec_hdr->sh_addr + load_offs, RSEQ_CS_ALIGNMENT);
int j;
Expand Down
3 changes: 2 additions & 1 deletion suite/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3454,7 +3454,7 @@ if (UNIX)
set(linux.persist-use_FLAKY_depends linux.persist_FLAKY)

if (LINUX AND X86 AND X64 AND HAVE_RSEQ)
# The rseq feature is Linux-only.
# The rseq kernel feature is Linux-only.
# TODO i#2350: Port the assembly in the test to 32-bit, ARM, AArch64.
tobuild(linux.rseq linux/rseq.c)
# Test the other sections. Unfortunately we need a separate binary for each.
Expand All @@ -3466,6 +3466,7 @@ if (UNIX)
COMPILE_FLAGS "-DRSEQ_TEST_USE_NO_ARRAY")
# Test attaching, which has a separate lazy rseq check.
tobuild_api(api.rseq linux/rseq.c "" "" OFF OFF)
link_with_pthread(api.rseq)
append_property_string(TARGET api.rseq COMPILE_FLAGS "-DRSEQ_TEST_ATTACH")
endif ()
else (UNIX)
Expand Down
107 changes: 104 additions & 3 deletions suite/tests/linux/rseq.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
# include "dr_api.h"
#endif
#include "tools.h"
#ifdef RSEQ_TEST_ATTACH
# include "thread.h"
# include "condvar.h"
#endif
#ifndef LINUX
# error Only Linux is supported.
#endif
Expand All @@ -58,9 +62,14 @@
#define RSEQ_SIG 0x90909090 /* nops to disasm nicely */

/* This cannot be a stack-local variable, as the kernel will force SIGSEGV on a syscall
* if it can't read this struct.
* if it can't read this struct. And for multiple threads it should be in TLS.
*/
static struct rseq rseq_tls;
static __thread volatile struct rseq rseq_tls;

#ifdef RSEQ_TEST_ATTACH
static volatile int exit_requested;
static void *thread_ready;
#endif

int
test_rseq(void)
Expand All @@ -82,7 +91,7 @@ test_rseq(void)
".quad 2f, 3f-2f, 4f\n\t" /* start_ip, post_commit_offset, abort_ip */
".popsection\n\t"
#if !defined(RSEQ_TEST_USE_OLD_SECTION_NAME) && !defined(RSEQ_TEST_USE_NO_ARRAY)
/* Add an array section. */
/* Add an array entry. */
".pushsection __rseq_cs_ptr_array, \"aw\"\n\t"
".quad 1b\n\t"
".popsection\n\t"
Expand Down Expand Up @@ -133,6 +142,88 @@ test_rseq(void)
return restarts;
}

#ifdef RSEQ_TEST_ATTACH
void *
rseq_thread_loop(void *arg)
{
/* We don't try to signal inside the rseq code. Just having the thread scheduled
* in this function is close enough: the test already has non-determinism.
*/
signal_cond_var(thread_ready);
rseq_tls.cpu_id = RSEQ_CPU_ID_UNINITIALIZED;
int res = syscall(SYS_rseq, &rseq_tls, sizeof(rseq_tls), 0, RSEQ_SIG);
if (res != 0)
return NULL;
static int zero;
__asm__ __volatile__(
/* Add a table entry. */
".pushsection __rseq_cs, \"aw\"\n\t"
".balign 32\n\t"
"1:\n\t"
".long 0, 0\n\t" /* version, flags */
".quad 2f, 3f-2f, 4f\n\t" /* start_ip, post_commit_offset, abort_ip */
".popsection\n\t"
/* Add an array entry. */
".pushsection __rseq_cs_ptr_array, \"aw\"\n\t"
".quad 1b\n\t"
".popsection\n\t"

/* Although our abort handler has to handle being called (that's all DR
* supports), we structure the code to allow directly calling past it, to
* count restart_count.
*/
"call 6f\n\t"
"jmp 5f\n\t"

"6:\n\t"
/* Store the entry into the ptr. */
"leaq 1b(%%rip), %%rax\n\t"
"movq %%rax, %0\n\t"
/* Test "falling into" the rseq region. */

/* Restartable sequence. We loop to ensure we're in the region on
* detach. If DR fails to translate this thread to the abort handler
* on detach, it will loop forever and the test will timeout and fail.
* Note that this breaks DR's assumptions: the instrumented run
* never exits the loop, and thus never reaches the "commit point" of the
* nop, and thus never invokes the handler natively. However, we don't
* care: we just want to test detach.
*/
"2:\n\t"
/* I was going to assert that zero==0 at the end, but that requires more
* synch to not reach here natively before DR attaches. Decided against it.
*/
"movl $1, %1\n\t"
"jmp 2b\n\t"
/* We can't end the sequence in a branch (DR can't handle it). */
"nop\n\t"

/* Post-commit. */
"3:\n\t"
"ret\n\t"

/* Abort handler: if we're done, exit; else, re-enter. */
/* clang-format off */ /* (avoid indenting next few lines) */
".long " STRINGIFY(RSEQ_SIG) "\n\t"
"4:\n\t"
"mov %2, %%rax\n\t"
"cmp $0, %%rax\n\t"
"jne 3b\n\t"
"jmp 6b\n\t"

/* Clear the ptr. */
"5:\n\t"
"leaq 1b(%%rip), %%rax\n\t"
"movq $0, %0\n\t"
/* clang-format on */

: "=m"(rseq_tls.rseq_cs), "=m"(zero)
: "m"(exit_requested)
: "rax", "memory");
return NULL;
}
#endif /* RSEQ_TEST_ATTACH */

int
main()
{
Expand All @@ -141,11 +232,21 @@ main()
int res = syscall(SYS_rseq, &rseq_tls, sizeof(rseq_tls), 0, RSEQ_SIG);
if (res == 0) {
#ifdef RSEQ_TEST_ATTACH
/* Create a thread that sits in the rseq region, to test attaching and detaching
* from inside the region.
*/
thread_ready = create_cond_var();
thread_t mythread = create_thread(rseq_thread_loop, NULL);
wait_cond_var(thread_ready);
dr_app_setup_and_start();
#endif
restart_count = test_rseq();
#ifdef RSEQ_TEST_ATTACH
/* Detach while the thread is in its rseq region loop. */
exit_requested = 1; /* atomic on x86; ARM will need more. */
dr_app_stop_and_cleanup();
join_thread(mythread);
destroy_cond_var(thread_ready);
#endif
} else {
/* Linux kernel 4.18+ is required. */
Expand Down

0 comments on commit ec729f0

Please sign in to comment.