Skip to content

Commit

Permalink
i#1569, i#3544 Port sample 'cbr' to AARCH64 and RISCV64.
Browse files Browse the repository at this point in the history
1. For RISCV64, conditional branch instruction of 'C' extension may not reach after adding clean call. So like X86, we add support to detect and convert compressed cbr to longer version.
2. For AARCH64 and RISCV64, a cbr may use the stolen reg and can not be mangled later as it is meta. So we check whether a cbr uses the stolen reg and replace it with a scratch reg.
Now cbr works for AARCH64 and RISCV64. Still not work for ARM32 because of some bugs unrelated to cbr sample.

Issue: DynamoRIO#1569, DynamoRIO#3544
  • Loading branch information
chenhy0106 committed Jun 3, 2024
1 parent 8419c47 commit 82c8bef
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 11 deletions.
4 changes: 3 additions & 1 deletion api/samples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,10 @@ add_sample_client(memtrace_simple "memtrace_simple.c;utils.c" "drmgr;drreg;druti
add_sample_client(memval_simple "memval_simple.c;utils.c" "drmgr;drreg;drutil;drx")
add_sample_client(instrace_simple "instrace_simple.c;utils.c" "drmgr;drreg;drx")
add_sample_client(opcode_count "opcode_count.cpp" "drmgr;drreg;drx;droption")
if (NOT ARM)
add_sample_client(cbr "cbr.c" "drmgr")
endif (NOT ARM)
if (X86) # FIXME i#1551, i#1569, i#3544: port to ARM/AArch64/RISCV64
add_sample_client(cbr "cbr.c" "drmgr")
add_sample_client(countcalls "countcalls.c" "drmgr;drreg")
add_sample_client(inc2add "inc2add.c" "drmgr;drreg")
add_sample_client(memtrace_x86_binary "memtrace_x86.c;utils.c"
Expand Down
77 changes: 74 additions & 3 deletions api/samples/cbr.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,17 @@
/* Possible cbr states */
typedef enum { CBR_NEITHER = 0x00, CBR_TAKEN = 0x01, CBR_NOT_TAKEN = 0x10 } cbr_state_t;

#if defined(AARCH64) || defined(RISCV64)
/* Scratch regs for mangling stolen reg used in cbr. */
reg_id_t stolen_reg;
reg_id_t scratch_for_stolen = IF_AARCHXX_ELSE(DR_REG_R0, DR_REG_X10);
reg_id_t scratch_for_stolen_alternate = IF_AARCHXX_ELSE(DR_REG_R1, DR_REG_X11);
#if defined(RISCV64)
reg_id_t scratch_for_tp = IF_AARCHXX_ELSE(DR_REG_R2, DR_REG_X12);
reg_id_t scratch_for_tp_alternate = IF_AARCHXX_ELSE(DR_REG_R3, DR_REG_X13);
#endif
#endif

/* Each bucket in the hash table is a list of the following elements.
* For each cbr, we store its address and its state.
*/
Expand Down Expand Up @@ -334,14 +345,49 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst
app_pc fall = (app_pc)decode_next_pc(drcontext, (byte *)src);
app_pc targ = instr_get_branch_target_pc(instr);

#if defined(AARCH64) || defined(RISCV64)
/* For AARCH64 and RISCV, if instr uses the stolen regs, we have to replace it with a
* scratch reg here, because instr will be set meta and can not be mangled later.
*
* We use DynamoRIO's interfaces instead of drreg extension in cbr sample,
* because drreg is designed for linear control flow.
*
* For AARCH64, We can not use drreg_get_app_value() as DR will refuse to load into the same
* reg returned by dr_get_stolen_reg(). See comment in drreg_get_app_value().
*/
bool use_stolen_reg = instr_uses_reg(instr, stolen_reg);
reg_id_t scratch1 = scratch_for_stolen;
if (use_stolen_reg) {
if (instr_uses_reg(instr, scratch1))
scratch1 = scratch_for_stolen_alternate;

dr_save_reg(drcontext, bb, instr, scratch1, SPILL_SLOT_1);
dr_insert_get_stolen_reg_value(drcontext, bb, instr, scratch1);
instr_replace_reg_resize(instr, stolen_reg, scratch1);
}
#if defined(RISCV64)
/* For RISCV64, thread pointer register also should be restored. */
bool use_tp_reg = instr_uses_reg(instr, DR_REG_TP);
reg_id_t scratch2 = scratch_for_tp;
if (use_tp_reg) {
if (instr_uses_reg(instr, scratch2))
scratch2 = scratch_for_tp_alternate;

dr_save_reg(drcontext, bb, instr, scratch2, SPILL_SLOT_2);
dr_insert_get_tp_reg_value(drcontext, bb, instr, scratch2);
instr_replace_reg_resize(instr, DR_REG_TP, scratch2);
}
#endif
#endif

/* Redirect the existing cbr to jump to a callout for
* the 'taken' case. We'll insert a 'not-taken'
* callout at the fallthrough address.
*/
instr_t *label = INSTR_CREATE_label(drcontext);
/* should be meta, and meta-instrs shouldn't have translations */
instr_set_meta_no_translation(instr);
/* it may not reach (in particular for x64) w/ our added clean call */
/* it may not reach w/ our added clean call */
if (instr_is_cti_short(instr)) {
/* if jecxz/loop we want to set the target of the long-taken
* so set instr to the return value
Expand All @@ -350,6 +396,15 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst
}
instr_set_target(instr, opnd_create_instr(label));

#if defined(AARCH64) || defined(RISCV64)
if (use_stolen_reg)
dr_restore_reg(drcontext, bb, NULL, scratch1, SPILL_SLOT_1);
#if defined(RISCV64)
if (use_tp_reg)
dr_restore_reg(drcontext, bb, NULL, scratch2, SPILL_SLOT_2);
#endif
#endif

if (insert_not_taken) {
/* Callout for the not-taken case. Insert after
* the cbr (i.e., 3rd argument is NULL).
Expand All @@ -374,11 +429,21 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst
* well as Linux.
*/
instrlist_preinsert(
bb, NULL, INSTR_XL8(INSTR_CREATE_jmp(drcontext, opnd_create_pc(fall)), fall));
bb, NULL,
INSTR_XL8(XINST_CREATE_jump(drcontext, opnd_create_pc(fall)), fall));

/* label goes before the 'taken' callout */
MINSERT(bb, NULL, label);

#if defined(AARCH64) || defined(RISCV64)
if (use_stolen_reg)
dr_restore_reg(drcontext, bb, NULL, scratch1, SPILL_SLOT_1);
#if defined(RISCV64)
if (use_tp_reg)
dr_restore_reg(drcontext, bb, NULL, scratch2, SPILL_SLOT_2);
#endif
#endif

if (insert_taken) {
/* Callout for the taken case */
dr_insert_clean_call_ex(drcontext, bb, NULL, (void *)at_taken,
Expand All @@ -392,7 +457,8 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst
* block (this should not be a meta-instruction).
*/
instrlist_preinsert(
bb, NULL, INSTR_XL8(INSTR_CREATE_jmp(drcontext, opnd_create_pc(targ)), targ));
bb, NULL,
INSTR_XL8(XINST_CREATE_jump(drcontext, opnd_create_pc(targ)), targ));
}
/* since our added instrumentation is not constant, we ask to store
* translations now
Expand Down Expand Up @@ -440,6 +506,11 @@ dr_client_main(client_id_t id, int argc, const char *argv[])
DR_ASSERT_MSG(false, "drmgr_init failed!");

global_table = new_table();

#if defined(AARCH64) || defined(RISCV64)
stolen_reg = dr_get_stolen_reg();
#endif

if (!drmgr_register_bb_instrumentation_event(NULL, event_app_instruction, NULL))
DR_ASSERT_MSG(false, "fail to register event_app_instruction!");
dr_register_exit_event(dr_exit);
Expand Down
6 changes: 2 additions & 4 deletions core/ir/riscv64/instr.c
Original file line number Diff line number Diff line change
Expand Up @@ -244,10 +244,8 @@ instr_is_near_ubr(instr_t *instr)
bool
instr_is_cti_short(instr_t *instr)
{
/* The branch with smallest reach is direct branch, with range +/- 4 KiB.
* We have restricted MAX_FRAGMENT_SIZE on RISCV64 accordingly.
*/
return false;
int opc = instr_get_opcode(instr);
return (opc == OP_c_beqz || opc == OP_c_bnez);
}

bool
Expand Down
10 changes: 7 additions & 3 deletions core/ir/riscv64/ir_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,13 @@ remangle_short_rewrite(dcontext_t *dcontext, instr_t *instr, byte *pc, app_pc ta
instr_t *
convert_to_near_rel_arch(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr)
{
/* FIXME i#3544: Not implemented */
ASSERT_NOT_IMPLEMENTED(false);
return NULL;
int opc = instr_get_opcode(instr);
if (opc == OP_c_beqz)
instr_set_opcode(instr, OP_beq);
else if (opc == OP_c_bnez)
instr_set_opcode(instr, OP_bne);

return instr;
}

static int
Expand Down
13 changes: 13 additions & 0 deletions core/lib/dr_ir_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -1137,6 +1137,19 @@ DR_API
int
dr_insert_it_instrs(void *drcontext, instrlist_t *ilist);

DR_API
/**
* Insert code to get the application value of the thread pointer register
* into register \p reg.
*
* \return whether successful.
*
* \note RISCV-only
*/
bool
dr_insert_get_tp_reg_value(void *drcontext, instrlist_t *ilist, instr_t *instr,
reg_id_t reg);

DR_API
/**
* Inserts into \p ilist prior to \p where meta-instruction(s) to save the
Expand Down
20 changes: 20 additions & 0 deletions core/lib/instrument.c
Original file line number Diff line number Diff line change
Expand Up @@ -7451,6 +7451,26 @@ dr_insert_it_instrs(void *drcontext, instrlist_t *ilist)
#endif
}

DR_API
bool
dr_insert_get_tp_reg_value(void *drcontext, instrlist_t *ilist, instr_t *instr,
reg_id_t reg)
{
#if !defined(RISCV64)
return false;
#else
CLIENT_ASSERT(reg_is_pointer_sized(reg),
"dr_insert_get_tp_reg_value: reg has wrong size\n");
CLIENT_ASSERT(reg != DR_REG_TP,
"dr_insert_get_tp_reg_value: reg should not be tp itself\n");
instrlist_meta_preinsert(
ilist, instr, instr_create_restore_from_tls(drcontext, reg,
os_get_app_tls_base_offset(TLS_REG_LIB)));

return true;
#endif
}

DR_API
bool
dr_prepopulate_cache(app_pc *tags, size_t tags_count)
Expand Down

0 comments on commit 82c8bef

Please sign in to comment.