Skip to content

Commit

Permalink
i#4316 a64 rseq: Port rseq mangling to AArch64
Browse files Browse the repository at this point in the history
Ports rseq mangling to AArch64.  This requires implementing
patch_mov_immed_arch() (done by leveraging existing raw encoding code
from exit stub support) and spilling extra scratch registers in
multiple places.

Expands translation support to cover the new mangling.  This includes
adding proper recognition of an mcontext base load, which was
incorrectly identified as an indirect branch target load on x86.

Ports the heuristic for finding the app's rseq TLS offset on attach
(when we did not see an rseq syscall) for AArch64 to look forward
instead of backward as on x86 where negative segment offsets are used.

Enables the rseq tests for AArch64.
Updates the rseq docs to state that AArch64 is supported.

One final step is to support stores with writeback, which are seen in
real rseq sequences.  That will be done separately.  This completes
porting the x86 support to AArch64.

Issue: #4316
  • Loading branch information
derekbruening committed Apr 15, 2021
1 parent 4d9f6f0 commit af7719a
Show file tree
Hide file tree
Showing 10 changed files with 184 additions and 53 deletions.
2 changes: 1 addition & 1 deletion api/docs/bt.dox
Original file line number Diff line number Diff line change
Expand Up @@ -1312,7 +1312,7 @@ commit.

This run-twice approach is subject to the following limitations:

- Only x86 is supported for now (no arm or aarch64 support yet).
- Only x86 and aarch64 are supported for now, and 32-bit x86 is not as well-tested.
- The application must store an rseq_cs struct for each rseq region in a
section of its binary named "__rseq_cs", optionally with an "__rseq_cs_ptr_array"
section of pointers into the __rseq_cs section, per established conventions.
Expand Down
13 changes: 6 additions & 7 deletions core/arch/aarch64/emit_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
#define PRE instrlist_meta_preinsert
#define OPREG opnd_create_reg

#define NOP_INST 0xd503201f
#define BR_X1_INST (0xd61f0000 | 1 << 5) /* br x1 */

/***************************************************************************/
Expand Down Expand Up @@ -149,12 +148,12 @@ get_fcache_return_tls_offs(dcontext_t *dcontext, uint flags)
/* Generate move (immediate) of a 64-bit value using at most 4 instructions.
* pc must be a writable (vmcode) pc.
*/
static uint *
uint *
insert_mov_imm(uint *pc, reg_id_t dst, ptr_int_t val)
{
uint rt = dst - DR_REG_X0;
ASSERT(rt < 31);
*pc++ = 0xd2800000 | rt | (val & 0xffff) << 5; /* mov x(rt), #x */
*pc++ = 0xd2800000 | rt | (val & 0xffff) << 5; /* movz x(rt), #x */

if ((val >> 16 & 0xffff) != 0)
*pc++ = 0xf2a00000 | rt | (val >> 16 & 0xffff) << 5; /* movk x(rt), #x, lsl #16 */
Expand Down Expand Up @@ -211,7 +210,7 @@ insert_exit_stub_other_flags(dcontext_t *dcontext, fragment_t *f, linkstub_t *l,
* lots of places expect the stub size to be fixed.
*/
for (uint j = 0; j < num_nops_needed; j++)
*pc++ = NOP_INST;
*pc++ = RAW_NOP_INST;
/* The final slot is a data slot, which will hold the address of either
* the fcache-return routine or the linked fragment. We reserve 12 bytes
* and use the 8-byte aligned region of 8 bytes within it.
Expand Down Expand Up @@ -248,7 +247,7 @@ insert_exit_stub_other_flags(dcontext_t *dcontext, fragment_t *f, linkstub_t *l,
* lots of places expect the stub size to be fixed.
*/
for (uint j = 0; j < num_nops_needed; j++)
*pc++ = NOP_INST;
*pc++ = RAW_NOP_INST;
}

return (int)((byte *)pc - (byte *)write_stub_pc);
Expand Down Expand Up @@ -404,7 +403,7 @@ static uint *
get_stub_branch(uint *pc)
{
/* Skip NOP instructions backwards. */
while (*pc == NOP_INST)
while (*pc == RAW_NOP_INST)
pc--;
/* The First non-NOP instruction must be the branch. */
ASSERT(*pc == BR_X1_INST);
Expand Down Expand Up @@ -1047,6 +1046,6 @@ fill_with_nops(dr_isa_mode_t isa_mode, byte *addr, size_t size)
return false;
}
for (pc = addr; pc < addr + size; pc += 4)
*(uint *)pc = NOP_INST; /* nop */
*(uint *)pc = RAW_NOP_INST; /* nop */
return true;
}
25 changes: 24 additions & 1 deletion core/arch/aarchxx/mangle.c
Original file line number Diff line number Diff line change
Expand Up @@ -1201,11 +1201,34 @@ mangle_reinstate_it_blocks(dcontext_t *dcontext, instrlist_t *ilist, instr_t *st

#endif /* !AARCH64 */

/* This is *not* a hot-patchable patch: i.e., it is subject to races. */
void
patch_mov_immed_arch(dcontext_t *dcontext, ptr_int_t val, byte *pc, instr_t *first,
instr_t *last)
{
ASSERT_NOT_IMPLEMENTED(false); /* FIXME i#1551, i#1569 */
#ifdef AARCH64
uint *write_pc = (uint *)vmcode_get_writable_addr(pc);
ASSERT(first != NULL && last != NULL);
/* We expect OP_movz followed by up to 3 OP_movk. */
ASSERT(instr_get_opcode(first) == OP_movz && opnd_is_reg(instr_get_dst(first, 0)));
reg_id_t dst_reg = opnd_get_reg(instr_get_dst(first, 0));
int instr_count = 1;
for (instr_t *inst = instr_get_next(first); inst != NULL;
inst = instr_get_next(inst)) {
++instr_count;
ASSERT(instr_get_opcode(inst) == OP_movk && opnd_is_reg(instr_get_dst(inst, 0)));
if (inst == last)
break;
}
uint *end_pc = insert_mov_imm(write_pc, dst_reg, val);
ASSERT(end_pc - write_pc <= instr_count);
while (end_pc - write_pc < instr_count) {
*end_pc = RAW_NOP_INST;
++end_pc;
}
#else
ASSERT_NOT_IMPLEMENTED(false); /* TODO i#1551: NYI */
#endif
}

/* Used for fault translation */
Expand Down
8 changes: 8 additions & 0 deletions core/arch/arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -1201,6 +1201,14 @@ emit_do_syscall(dcontext_t *dcontext, generated_code_t *code, byte *pc,
byte *fcache_return_pc, bool thread_shared, int interrupt,
uint *syscall_offs /*OUT*/);

#ifdef AARCH64
/* Generate move (immediate) of a 64-bit value using at most 4 instructions.
* pc must be a writable (vmcode) pc.
*/
uint *
insert_mov_imm(uint *pc, reg_id_t dst, ptr_int_t val);
#endif

#ifdef AARCHXX
byte *
emit_fcache_enter_gonative(dcontext_t *dcontext, generated_code_t *code, byte *pc);
Expand Down
97 changes: 77 additions & 20 deletions core/arch/mangle_shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -947,19 +947,21 @@ mangle_rseq_insert_call_sequence(dcontext_t *dcontext, instrlist_t *ilist, instr
ilist, next_instr,
XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_RSP), OPND_CREATE_INT32(8)));
# else
/* TODO i#2350: Add non-x86 support. We need to pay particular attention
/* TODO i#2350: Given that we plan to deprecate -rseq_assume_call, it may not be
* worth implementing non-x86 support. We'd need to pay particular attention
* to the stolen register. If we do a local copy (with no callouts) we could
* mangle it. We also cannot do an indirect call through anything but a
* register and thus need a dead register for the call-return approach, but
* that disappears once DR uses a local copy.
*/
REPORT_FATAL_ERROR_AND_EXIT(RSEQ_BEHAVIOR_UNSUPPORTED, 3, get_application_name(),
get_application_pid(),
"Rseq is not yet supported for non-x86");
"-rseq_assume_call is not supported for non-x86");
ASSERT_NOT_REACHED();
# endif
}

/* scratch_reg is *not* spilled on entry. */
static void
mangle_rseq_write_exit_reason(dcontext_t *dcontext, instrlist_t *ilist,
instr_t *insert_at, reg_id_t scratch_reg)
Expand All @@ -976,11 +978,25 @@ mangle_rseq_write_exit_reason(dcontext_t *dcontext, instrlist_t *ilist,
opnd_create_reg(scratch_reg), ilist, insert_at, NULL,
NULL);
}
# ifdef AARCHXX
/* We need a 2nd scratch for our immediate. */
ASSERT(SCRATCH_ALWAYS_TLS());
reg_id_t scratch2 =
(scratch_reg == DR_REG_START_GPR) ? DR_REG_START_GPR + 1 : DR_REG_START_GPR;
PRE(ilist, insert_at, instr_create_save_to_tls(dcontext, scratch2, TLS_REG2_SLOT));
insert_mov_immed_ptrsz(dcontext, EXIT_REASON_RSEQ_ABORT, opnd_create_reg(scratch2),
ilist, insert_at, NULL, NULL);
# endif
PRE(ilist, insert_at,
XINST_CREATE_store(dcontext,
opnd_create_dcontext_field_via_reg_sz(
dcontext, scratch_reg, EXIT_REASON_OFFSET, OPSZ_2),
OPND_CREATE_INT16(EXIT_REASON_RSEQ_ABORT)));
XINST_CREATE_store_2bytes(dcontext,
opnd_create_dcontext_field_via_reg_sz(
dcontext, scratch_reg, EXIT_REASON_OFFSET, OPSZ_2),
IF_X86_ELSE(OPND_CREATE_INT16(EXIT_REASON_RSEQ_ABORT),
opnd_create_reg(scratch2))));
# ifdef AARCHXX
PRE(ilist, insert_at,
instr_create_restore_from_tls(dcontext, scratch2, TLS_REG2_SLOT));
# endif
if (SCRATCH_ALWAYS_TLS()) {
PRE(ilist, insert_at,
instr_create_restore_from_tls(dcontext, scratch_reg, TLS_REG1_SLOT));
Expand Down Expand Up @@ -1120,27 +1136,45 @@ mangle_rseq_insert_native_sequence(dcontext_t *dcontext, instrlist_t *ilist,
* decode_fragment() and even disassembly.
*/
instr_t *immed_first, *immed_last;
insert_mov_immed_ptrsz(dcontext, (ptr_int_t)INT_MAX IF_X64(+1),
opnd_create_reg(scratch_reg), ilist, insert_at, &immed_first,
&immed_last);
insert_mov_immed_ptrsz(dcontext, (ptr_int_t)-1, opnd_create_reg(scratch_reg), ilist,
insert_at, &immed_first, &immed_last);
ASSERT(immed_first != NULL);
IF_X86(ASSERT(immed_last == NULL));
int immed_count = 1;
for (instr_t *immed_inst = immed_first;
immed_last != NULL && immed_inst != immed_last;
immed_inst = instr_get_next(immed_inst)) {
++immed_count;
}
instr_t *label_rseq_cs =
mangle_rseq_create_label(dcontext, DR_RSEQ_LABEL_CS, immed_last == NULL ? 1 : 2);
mangle_rseq_create_label(dcontext, DR_RSEQ_LABEL_CS, immed_count);
PRE(ilist, immed_first /*prior to immeds*/, label_rseq_cs);
/* We need to mangle this segment ref, and all of the subsequent local copy. */
# ifdef X86
/* We need to mangle this segment ref, and all of the subsequent local copy. */
instr_t *start_mangling = XINST_CREATE_store(
dcontext,
opnd_create_far_base_disp(LIB_SEG_TLS, DR_REG_NULL, DR_REG_NULL, 0,
rseq_get_tls_ptr_offset(), OPSZ_PTR),
opnd_create_reg(scratch_reg));
instrlist_preinsert(ilist, insert_at, start_mangling);
# else
/* TODO i#2350: Construct an app TLS access instruction for aarchxx. */
ASSERT_NOT_IMPLEMENTED(false);
instr_t *start_mangling = INSTR_CREATE_label(dcontext); /* So it compiles. */
# endif
/* We need another scratch reg to write to TLS. */
ASSERT(SCRATCH_ALWAYS_TLS());
reg_id_t scratch2 =
(scratch_reg == DR_REG_START_GPR) ? DR_REG_START_GPR + 1 : DR_REG_START_GPR;
PRE(ilist, insert_at, instr_create_save_to_tls(dcontext, scratch2, TLS_REG2_SLOT));
/* We need to mangle this segment ref, and the local copy below. */
instr_t *start_mangling = INSTR_CREATE_mrs(dcontext, opnd_create_reg(scratch2),
opnd_create_reg(LIB_SEG_TLS));
instrlist_preinsert(ilist, insert_at, start_mangling);
PRE(ilist, insert_at,
XINST_CREATE_store(dcontext,
opnd_create_base_disp(scratch2, DR_REG_NULL, 0,
rseq_get_tls_ptr_offset(), OPSZ_PTR),
opnd_create_reg(scratch_reg)));
PRE(ilist, insert_at,
instr_create_restore_from_tls(dcontext, scratch2, TLS_REG2_SLOT));
# endif

/* Restore scratch_reg. */
if (SCRATCH_ALWAYS_TLS()) {
Expand Down Expand Up @@ -1240,6 +1274,7 @@ mangle_rseq_insert_native_sequence(dcontext_t *dcontext, instrlist_t *ilist,
}
generic_hash_destroy(dcontext, pc2instr);
/* Now mangle from this point. */
ASSERT(start_mangling != NULL);
*next_instr = start_mangling;

/* Clear the rseq ptr on exit to avoid problems if we free the rseq_cs and
Expand All @@ -1256,8 +1291,18 @@ mangle_rseq_insert_native_sequence(dcontext_t *dcontext, instrlist_t *ilist,
rseq_get_tls_ptr_offset(), OPSZ_PTR),
OPND_CREATE_INT32(0)));
# else
/* TODO i#2350: Construct an app TLS access instruction for aarchxx. */
ASSERT_NOT_IMPLEMENTED(false);
PRE(ilist, insert_at, instr_create_save_to_tls(dcontext, scratch2, TLS_REG2_SLOT));
PRE(ilist, insert_at,
INSTR_CREATE_mrs(dcontext, opnd_create_reg(scratch2),
opnd_create_reg(LIB_SEG_TLS)));
instrlist_preinsert(
ilist, insert_at,
XINST_CREATE_store(dcontext,
opnd_create_base_disp(scratch2, DR_REG_NULL, 0,
rseq_get_tls_ptr_offset(), OPSZ_PTR),
opnd_create_reg(DR_REG_XZR)));
PRE(ilist, insert_at,
instr_create_restore_from_tls(dcontext, scratch2, TLS_REG2_SLOT));
# endif

DOLOG(4, LOG_INTERP, {
Expand All @@ -1277,6 +1322,9 @@ mangle_rseq(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr,
bool *reg_written;
int reg_written_size;
reg_id_t scratch_reg = DR_REG_START_GPR;
# ifdef ARM
ASSERT_NOT_TESTED();
# endif
if (!rseq_get_region_info(pc, &start, &end, &handler, &reg_written,
&reg_written_size)) {
ASSERT_NOT_REACHED(); /* Caller was supposed to check for overlap */
Expand Down Expand Up @@ -1424,16 +1472,23 @@ mangle_rseq_finalize(dcontext_t *dcontext, instrlist_t *ilist, fragment_t *f)
case DR_RSEQ_LABEL_CS:
immed_start_pc = pc;
immed_first = instr_get_next(instr);
if (label_data->data[1] > 1)
ptr_int_t immed_count = label_data->data[1];
/* For A64 we should have 4 immeds to handle any address. */
IF_AARCH64(ASSERT(immed_count == 4));
if (immed_count > 1) {
immed_last = instr_get_next(immed_first);
--immed_count;
while (immed_count > 1) {
immed_last = instr_get_next(immed_last);
--immed_count;
}
}
break;
default: ASSERT_NOT_REACHED();
}
}
pc += instr_length(dcontext, instr);
}
LOG(THREAD, LOG_INTERP, 4, "%s: start=" PFX ", end=" PFX ", abort=" PFX "\n",
__FUNCTION__, rseq_start, rseq_end, rseq_abort);
ASSERT(rseq_start != NULL && rseq_end != NULL && rseq_abort != NULL);

byte *rseq_cs_alloc, *rseq_cs;
Expand All @@ -1445,6 +1500,8 @@ mangle_rseq_finalize(dcontext_t *dcontext, instrlist_t *ilist, fragment_t *f)
rseq_cs_alloc = rseq_get_rseq_cs_alloc(&rseq_cs);
rseq_record_rseq_cs(rseq_cs_alloc, f, rseq_start, rseq_end, rseq_abort);
ASSERT(immed_start_pc != NULL && immed_first != NULL);
LOG(THREAD, LOG_INTERP, 4, "%s: start=%p, end=%p, abort=%p stored @%p\n",
__FUNCTION__, rseq_start, rseq_end, rseq_abort, rseq_cs);
patch_mov_immed_ptrsz(dcontext, (ptr_int_t)rseq_cs, immed_start_pc, immed_first,
immed_last);
}
Expand Down
3 changes: 2 additions & 1 deletion core/ir/aarchxx/ir_utils.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2014-2020 Google, Inc. All rights reserved.
* Copyright (c) 2014-2021 Google, Inc. All rights reserved.
* Copyright (c) 2016 ARM Limited. All rights reserved.
* **********************************************************/

Expand Down Expand Up @@ -162,6 +162,7 @@ convert_to_near_rel_arch(dcontext_t *dcontext, instrlist_t *ilist, instr_t *inst
#endif
}

/* Keep this in sync with patch_mov_immed_arch(). */
void
insert_mov_immed_arch(dcontext_t *dcontext, instr_t *src_inst, byte *encode_estimate,
ptr_int_t val, opnd_t dst, instrlist_t *ilist, instr_t *instr,
Expand Down
4 changes: 4 additions & 0 deletions core/ir/instr.h
Original file line number Diff line number Diff line change
Expand Up @@ -865,6 +865,10 @@ enum {
CBZ_BYTE_A = 0xb1, /* this assumes the top bit of the disp is 0 */
CBNZ_BYTE_A = 0xb9, /* this assumes the top bit of the disp is 0 */
};
#elif defined(AARCH64)
enum {
RAW_NOP_INST = 0xd503201f,
};
#endif

#include "instr_inline_api.h"
Expand Down
Loading

0 comments on commit af7719a

Please sign in to comment.