Skip to content

Commit

Permalink
i#4316 a64 rseq: Nop writeback stores (#4858)
Browse files Browse the repository at this point in the history
Adds nop-ing of writeback stores inside rseq regions by replacing them
with adds to preserve the GPR changes but throw away the memory changes.

Updates the documentation.
Adds a test case.

Fixes #4316
  • Loading branch information
derekbruening authored Apr 16, 2021
1 parent 54fb46d commit f424cfd
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 20 deletions.
2 changes: 2 additions & 0 deletions api/docs/bt.dox
Original file line number Diff line number Diff line change
Expand Up @@ -1327,6 +1327,8 @@ This run-twice approach is subject to the following limitations:
effects: it must only write to memory and not to any registers.
For example, a push instruction which both writes to memory and the
stack pointer register is not supported.
An exception is a pre-indexed or post-indexed writeback store, which is
supported.
- Each rseq region's code must end with a fall-through (non-control-flow)
instruction.
- Indirect branches that do not exit the rseq region are not allowed.
Expand Down
76 changes: 60 additions & 16 deletions core/arch/mangle_shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -1321,6 +1321,65 @@ mangle_rseq_insert_native_sequence(dcontext_t *dcontext, instrlist_t *ilist,
});
}

/* The caller should only call this for instr_writes_memory(instr).
* Returns whether it destroyed "instr".
*/
static bool
mangle_rseq_nop_store(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr)
{
ASSERT(instr_writes_memory(instr));
RSTATS_INC(num_rseq_stores_elided);
/* XXX i#2350: We want to turn just the store portion of the instr into a nop
* and keep any register side effects. That is complex, however. For now we
* only support simple stores and aarchxx writebacks.
*/
# ifdef AARCHXX
/* Handle writeback via pre-index or post-index addressing. */
opnd_t memop = instr_get_dst(instr, 0);
if (opnd_is_base_disp(memop) && instr_num_dsts(instr) == 2 &&
instr_num_srcs(instr) == 3 && opnd_is_reg(instr_get_src(instr, 0)) &&
opnd_is_reg(instr_get_src(instr, 1)) &&
opnd_is_immed_int(instr_get_src(instr, 2)) &&
opnd_is_reg(instr_get_dst(instr, 1)) && opnd_is_base_disp(memop) &&
opnd_get_index(memop) == DR_REG_NULL && opnd_get_scale(memop) == DR_REG_NULL) {
/* We need to mangle this instruction in case it uses the stolen register.
* We can't adjust next_instr backward as that will re-trigger rseq mangling:
* we want to hit the stolen reg mangling checked after rseq.
* Thus we re-use "instr".
*/
int increment = (int)opnd_get_immed_int(instr_get_src(instr, 2));
instr_t *add = INSTR_XL8(XINST_CREATE_add(dcontext, instr_get_dst(instr, 1),
OPND_CREATE_INT(increment)),
get_app_instr_xl8(instr));
LOG(THREAD, LOG_INTERP, 3,
"mangle: turning writeback store inside rseq region to add @" PFX "\n",
get_app_instr_xl8(instr));
/* XXX: This is kind of hacky. Should we provide a variant of instr_clone()?
* Or should we directly call mangle_special_registers() here to avoid all
* this?
*/
instr_free(dcontext, instr);
add->next = instr->next;
add->prev = instr->prev;
memcpy(instr, add, sizeof(*instr));
instr_init(dcontext, add);
instr_destroy(dcontext, add);
return false;
}
# endif
if (instr_num_dsts(instr) > 1) {
REPORT_FATAL_ERROR_AND_EXIT(RSEQ_BEHAVIOR_UNSUPPORTED, 3, get_application_name(),
get_application_pid(),
"Store inside rseq region has multiple destinations");
ASSERT_NOT_REACHED();
}
LOG(THREAD, LOG_INTERP, 3, "mangle: removing store inside rseq region @" PFX "\n",
get_app_instr_xl8(instr));
instrlist_remove(ilist, instr);
instr_destroy(dcontext, instr);
return true; /* destroyed instr */
}

/* Returns whether it destroyed "instr". May modify next_instr. */
static bool
mangle_rseq(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr,
Expand Down Expand Up @@ -1440,26 +1499,11 @@ mangle_rseq(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr,
*/
if (!instr_writes_memory(instr))
return false;
/* XXX i#2350: We want to turn just the store portion of the instr into a nop
* and keep any register side effects. That is complex, however. For now we
* only support simple stores.
*/
/* We perform this mangling of earlier instructions in the region out of logical
* order (*after* the mangling above of the end of the region) to avoid issues
* with accessing "instr" after we delete it.
*/
if (instr_num_dsts(instr) > 1) {
REPORT_FATAL_ERROR_AND_EXIT(RSEQ_BEHAVIOR_UNSUPPORTED, 3, get_application_name(),
get_application_pid(),
"Store inside rseq region has multiple destinations");
ASSERT_NOT_REACHED();
}
LOG(THREAD, LOG_INTERP, 3, "mangle: removing store inside rseq region @" PFX "\n",
pc);
RSTATS_INC(num_rseq_stores_elided);
instrlist_remove(ilist, instr);
instr_destroy(dcontext, instr);
return true; /* destroyed instr */
return mangle_rseq_nop_store(dcontext, ilist, instr);
}

static void
Expand Down
1 change: 1 addition & 0 deletions suite/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4716,6 +4716,7 @@ if (NOT ANDROID AND AARCHXX)
code_api|sample.opcodes
PROPERTIES LABELS RUNS_ON_QEMU)
if (LINUX AND X64 AND HAVE_RSEQ)
# QEMU just returns ENOSYS, but still this is testing that.
set_tests_properties(
code_api|linux.rseq
code_api|linux.rseq_table
Expand Down
69 changes: 65 additions & 4 deletions suite/tests/linux/rseq.c
Original file line number Diff line number Diff line change
Expand Up @@ -338,8 +338,7 @@ test_rseq_branches_once(bool force_restart, int *completions_out, int *restarts_
: [rseq_tls] "=m"(rseq_tls.rseq_cs), [id] "=m"(id),
[completions] "=m"(completions), [restarts] "=m"(restarts),
[force_restart_write] "=m"(force_restart)
: [cpu_id] "m"(rseq_tls.cpu_id), [cpu_id_uninit] "i"(RSEQ_CPU_ID_UNINITIALIZED),
[force_restart] "m"(force_restart)
: [cpu_id] "m"(rseq_tls.cpu_id), [force_restart] "m"(force_restart)
: "rax", "rcx", "memory");
#elif defined(AARCH64)
__asm__ __volatile__(
Expand Down Expand Up @@ -409,8 +408,7 @@ test_rseq_branches_once(bool force_restart, int *completions_out, int *restarts_
: [rseq_tls] "=m"(rseq_tls.rseq_cs), [id] "=m"(id),
[completions] "=m"(completions), [restarts] "=m"(restarts),
[force_restart_write] "=m"(force_restart)
: [cpu_id] "m"(rseq_tls.cpu_id), [cpu_id_uninit] "i"(RSEQ_CPU_ID_UNINITIALIZED),
[force_restart] "m"(force_restart)
: [cpu_id] "m"(rseq_tls.cpu_id), [force_restart] "m"(force_restart)
: "x0", "x1", "memory");
#else
# error Unsupported arch
Expand Down Expand Up @@ -711,6 +709,65 @@ test_rseq_native_abort(void)
#endif /* DEBUG */
}

#ifdef AARCH64
static void
test_rseq_writeback_store(void)
{
__u32 id = RSEQ_CPU_ID_UNINITIALIZED;
int array[2] = { 0 };
int *index = array;
__asm__ __volatile__(
/* clang-format off */ /* (avoid indenting next few lines) */
RSEQ_ADD_TABLE_ENTRY(writeback, 2f, 3f, 4f)
/* clang-format on */

"6:\n\t"
/* Store the entry into the ptr. */
"adrp x0, rseq_cs_writeback\n\t"
"add x0, x0, :lo12:rseq_cs_writeback\n\t"
"str x0, %[rseq_tls]\n\t"
/* Test a register input to the sequence. */
"ldr x0, %[cpu_id]\n\t"
/* Test "falling into" the rseq region. */

"2:\n\t"
"str x0, %[id]\n\t"
/* Test writeback stores on both executions by having a loop
* that won't terminate otherwise.
* Further stress corner cases by using the default stolen register.
*/
"mov x28, %[index]\n\t"
"add x2, x28, #8\n\t"
"mov w1, #42\n\t"
"loop:\n\t"
"str w1, [x28, 4]!\n\t"
"str w1, [x28], 4\n\t"
"cmp x28, x2\n\t"
"b.ne loop\n\t"
"nop\n\t"

/* Post-commit. */
"3:\n\t"
"b 5f\n\t"

/* Abort handler. */
/* clang-format off */ /* (avoid indenting next few lines) */
".long " STRINGIFY(RSEQ_SIG) "\n\t"
"4:\n\t"
"b 6b\n\t"

/* Clear the ptr. */
"5:\n\t"
"str xzr, %[rseq_tls]\n\t"
/* clang-format on */

: [rseq_tls] "=m"(rseq_tls.rseq_cs), [id] "=m"(id), [index] "=g"(index)
: [cpu_id] "m"(rseq_tls.cpu_id)
: "x0", "x1", "x2", "x28", "memory");
assert(id != RSEQ_CPU_ID_UNINITIALIZED);
}
#endif

#ifdef RSEQ_TEST_ATTACH
void *
rseq_thread_loop(void *arg)
Expand Down Expand Up @@ -886,6 +943,10 @@ main()
int i;
for (i = 0; i < 200; i++)
test_rseq_branches();
#ifdef AARCH64
/* Test nop-ing stores with side effects. */
test_rseq_writeback_store();
#endif
#ifdef RSEQ_TEST_ATTACH
/* Detach while the thread is in its rseq region loop. */
atomic_store(&exit_requested, 1);
Expand Down

0 comments on commit f424cfd

Please sign in to comment.