From f424cfd32d1c664893264c2c7093cba5c719b8c0 Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Fri, 16 Apr 2021 18:00:21 -0400 Subject: [PATCH] i#4316 a64 rseq: Nop writeback stores (#4858) Adds nop-ing of writeback stores inside rseq regions by replacing them with adds to preserve the GPR changes but throw away the memory changes. Updates the documentation. Adds a test case. Fixes #4316 --- api/docs/bt.dox | 2 + core/arch/mangle_shared.c | 76 ++++++++++++++++++++++++++++++-------- suite/tests/CMakeLists.txt | 1 + suite/tests/linux/rseq.c | 69 ++++++++++++++++++++++++++++++++-- 4 files changed, 128 insertions(+), 20 deletions(-) diff --git a/api/docs/bt.dox b/api/docs/bt.dox index dc227642b81..63d3e492dd6 100644 --- a/api/docs/bt.dox +++ b/api/docs/bt.dox @@ -1327,6 +1327,8 @@ This run-twice approach is subject to the following limitations: effects: it must only write to memory and not to any registers. For example, a push instruction which both writes to memory and the stack pointer register is not supported. + An exception is a pre-indexed or post-indexed writeback store, which is + supported. - Each rseq region's code must end with a fall-through (non-control-flow) instruction. - Indirect branches that do not exit the rseq region are not allowed. diff --git a/core/arch/mangle_shared.c b/core/arch/mangle_shared.c index 939b73f12cd..1b01e7cb9eb 100644 --- a/core/arch/mangle_shared.c +++ b/core/arch/mangle_shared.c @@ -1321,6 +1321,65 @@ mangle_rseq_insert_native_sequence(dcontext_t *dcontext, instrlist_t *ilist, }); } +/* The caller should only call this for instr_writes_memory(instr). + * Returns whether it destroyed "instr". + */ +static bool +mangle_rseq_nop_store(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr) +{ + ASSERT(instr_writes_memory(instr)); + RSTATS_INC(num_rseq_stores_elided); + /* XXX i#2350: We want to turn just the store portion of the instr into a nop + * and keep any register side effects. That is complex, however. For now we + * only support simple stores and aarchxx writebacks. + */ +# ifdef AARCHXX + /* Handle writeback via pre-index or post-index addressing. */ + opnd_t memop = instr_get_dst(instr, 0); + if (opnd_is_base_disp(memop) && instr_num_dsts(instr) == 2 && + instr_num_srcs(instr) == 3 && opnd_is_reg(instr_get_src(instr, 0)) && + opnd_is_reg(instr_get_src(instr, 1)) && + opnd_is_immed_int(instr_get_src(instr, 2)) && + opnd_is_reg(instr_get_dst(instr, 1)) && opnd_is_base_disp(memop) && + opnd_get_index(memop) == DR_REG_NULL && opnd_get_scale(memop) == DR_REG_NULL) { + /* We need to mangle this instruction in case it uses the stolen register. + * We can't adjust next_instr backward as that will re-trigger rseq mangling: + * we want to hit the stolen reg mangling checked after rseq. + * Thus we re-use "instr". + */ + int increment = (int)opnd_get_immed_int(instr_get_src(instr, 2)); + instr_t *add = INSTR_XL8(XINST_CREATE_add(dcontext, instr_get_dst(instr, 1), + OPND_CREATE_INT(increment)), + get_app_instr_xl8(instr)); + LOG(THREAD, LOG_INTERP, 3, + "mangle: turning writeback store inside rseq region to add @" PFX "\n", + get_app_instr_xl8(instr)); + /* XXX: This is kind of hacky. Should we provide a variant of instr_clone()? + * Or should we directly call mangle_special_registers() here to avoid all + * this? + */ + instr_free(dcontext, instr); + add->next = instr->next; + add->prev = instr->prev; + memcpy(instr, add, sizeof(*instr)); + instr_init(dcontext, add); + instr_destroy(dcontext, add); + return false; + } +# endif + if (instr_num_dsts(instr) > 1) { + REPORT_FATAL_ERROR_AND_EXIT(RSEQ_BEHAVIOR_UNSUPPORTED, 3, get_application_name(), + get_application_pid(), + "Store inside rseq region has multiple destinations"); + ASSERT_NOT_REACHED(); + } + LOG(THREAD, LOG_INTERP, 3, "mangle: removing store inside rseq region @" PFX "\n", + get_app_instr_xl8(instr)); + instrlist_remove(ilist, instr); + instr_destroy(dcontext, instr); + return true; /* destroyed instr */ +} + /* Returns whether it destroyed "instr". May modify next_instr. */ static bool mangle_rseq(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, @@ -1440,26 +1499,11 @@ mangle_rseq(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, */ if (!instr_writes_memory(instr)) return false; - /* XXX i#2350: We want to turn just the store portion of the instr into a nop - * and keep any register side effects. That is complex, however. For now we - * only support simple stores. - */ /* We perform this mangling of earlier instructions in the region out of logical * order (*after* the mangling above of the end of the region) to avoid issues * with accessing "instr" after we delete it. */ - if (instr_num_dsts(instr) > 1) { - REPORT_FATAL_ERROR_AND_EXIT(RSEQ_BEHAVIOR_UNSUPPORTED, 3, get_application_name(), - get_application_pid(), - "Store inside rseq region has multiple destinations"); - ASSERT_NOT_REACHED(); - } - LOG(THREAD, LOG_INTERP, 3, "mangle: removing store inside rseq region @" PFX "\n", - pc); - RSTATS_INC(num_rseq_stores_elided); - instrlist_remove(ilist, instr); - instr_destroy(dcontext, instr); - return true; /* destroyed instr */ + return mangle_rseq_nop_store(dcontext, ilist, instr); } static void diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 5b9688c9f72..fe441e3c872 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4716,6 +4716,7 @@ if (NOT ANDROID AND AARCHXX) code_api|sample.opcodes PROPERTIES LABELS RUNS_ON_QEMU) if (LINUX AND X64 AND HAVE_RSEQ) + # QEMU just returns ENOSYS, but still this is testing that. set_tests_properties( code_api|linux.rseq code_api|linux.rseq_table diff --git a/suite/tests/linux/rseq.c b/suite/tests/linux/rseq.c index 7d148a31868..6aa5ccfdb4d 100644 --- a/suite/tests/linux/rseq.c +++ b/suite/tests/linux/rseq.c @@ -338,8 +338,7 @@ test_rseq_branches_once(bool force_restart, int *completions_out, int *restarts_ : [rseq_tls] "=m"(rseq_tls.rseq_cs), [id] "=m"(id), [completions] "=m"(completions), [restarts] "=m"(restarts), [force_restart_write] "=m"(force_restart) - : [cpu_id] "m"(rseq_tls.cpu_id), [cpu_id_uninit] "i"(RSEQ_CPU_ID_UNINITIALIZED), - [force_restart] "m"(force_restart) + : [cpu_id] "m"(rseq_tls.cpu_id), [force_restart] "m"(force_restart) : "rax", "rcx", "memory"); #elif defined(AARCH64) __asm__ __volatile__( @@ -409,8 +408,7 @@ test_rseq_branches_once(bool force_restart, int *completions_out, int *restarts_ : [rseq_tls] "=m"(rseq_tls.rseq_cs), [id] "=m"(id), [completions] "=m"(completions), [restarts] "=m"(restarts), [force_restart_write] "=m"(force_restart) - : [cpu_id] "m"(rseq_tls.cpu_id), [cpu_id_uninit] "i"(RSEQ_CPU_ID_UNINITIALIZED), - [force_restart] "m"(force_restart) + : [cpu_id] "m"(rseq_tls.cpu_id), [force_restart] "m"(force_restart) : "x0", "x1", "memory"); #else # error Unsupported arch @@ -711,6 +709,65 @@ test_rseq_native_abort(void) #endif /* DEBUG */ } +#ifdef AARCH64 +static void +test_rseq_writeback_store(void) +{ + __u32 id = RSEQ_CPU_ID_UNINITIALIZED; + int array[2] = { 0 }; + int *index = array; + __asm__ __volatile__( + /* clang-format off */ /* (avoid indenting next few lines) */ + RSEQ_ADD_TABLE_ENTRY(writeback, 2f, 3f, 4f) + /* clang-format on */ + + "6:\n\t" + /* Store the entry into the ptr. */ + "adrp x0, rseq_cs_writeback\n\t" + "add x0, x0, :lo12:rseq_cs_writeback\n\t" + "str x0, %[rseq_tls]\n\t" + /* Test a register input to the sequence. */ + "ldr x0, %[cpu_id]\n\t" + /* Test "falling into" the rseq region. */ + + "2:\n\t" + "str x0, %[id]\n\t" + /* Test writeback stores on both executions by having a loop + * that won't terminate otherwise. + * Further stress corner cases by using the default stolen register. + */ + "mov x28, %[index]\n\t" + "add x2, x28, #8\n\t" + "mov w1, #42\n\t" + "loop:\n\t" + "str w1, [x28, 4]!\n\t" + "str w1, [x28], 4\n\t" + "cmp x28, x2\n\t" + "b.ne loop\n\t" + "nop\n\t" + + /* Post-commit. */ + "3:\n\t" + "b 5f\n\t" + + /* Abort handler. */ + /* clang-format off */ /* (avoid indenting next few lines) */ + ".long " STRINGIFY(RSEQ_SIG) "\n\t" + "4:\n\t" + "b 6b\n\t" + + /* Clear the ptr. */ + "5:\n\t" + "str xzr, %[rseq_tls]\n\t" + /* clang-format on */ + + : [rseq_tls] "=m"(rseq_tls.rseq_cs), [id] "=m"(id), [index] "=g"(index) + : [cpu_id] "m"(rseq_tls.cpu_id) + : "x0", "x1", "x2", "x28", "memory"); + assert(id != RSEQ_CPU_ID_UNINITIALIZED); +} +#endif + #ifdef RSEQ_TEST_ATTACH void * rseq_thread_loop(void *arg) @@ -886,6 +943,10 @@ main() int i; for (i = 0; i < 200; i++) test_rseq_branches(); +#ifdef AARCH64 + /* Test nop-ing stores with side effects. */ + test_rseq_writeback_store(); +#endif #ifdef RSEQ_TEST_ATTACH /* Detach while the thread is in its rseq region loop. */ atomic_store(&exit_requested, 1);