From 82c8bef87e539a73db443280a9c79fbc01b92968 Mon Sep 17 00:00:00 2001 From: chenhy0106 <1002138131@qq.com> Date: Mon, 3 Jun 2024 21:25:05 +0800 Subject: [PATCH] i#1569, i#3544 Port sample 'cbr' to AARCH64 and RISCV64. 1. For RISCV64, conditional branch instruction of 'C' extension may not reach after adding clean call. So like X86, we add support to detect and convert compressed cbr to longer version. 2. For AARCH64 and RISCV64, a cbr may use the stolen reg and can not be mangled later as it is meta. So we check whether a cbr uses the stolen reg and replace it with a scratch reg. Now cbr works for AARCH64 and RISCV64. Still not work for ARM32 because of some bugs unrelated to cbr sample. Issue: #1569, #3544 --- api/samples/CMakeLists.txt | 4 +- api/samples/cbr.c | 77 ++++++++++++++++++++++++++++++++++++-- core/ir/riscv64/instr.c | 6 +-- core/ir/riscv64/ir_utils.c | 10 +++-- core/lib/dr_ir_utils.h | 13 +++++++ core/lib/instrument.c | 20 ++++++++++ 6 files changed, 119 insertions(+), 11 deletions(-) diff --git a/api/samples/CMakeLists.txt b/api/samples/CMakeLists.txt index a70bd58e858..49b39050c3f 100644 --- a/api/samples/CMakeLists.txt +++ b/api/samples/CMakeLists.txt @@ -241,8 +241,10 @@ add_sample_client(memtrace_simple "memtrace_simple.c;utils.c" "drmgr;drreg;druti add_sample_client(memval_simple "memval_simple.c;utils.c" "drmgr;drreg;drutil;drx") add_sample_client(instrace_simple "instrace_simple.c;utils.c" "drmgr;drreg;drx") add_sample_client(opcode_count "opcode_count.cpp" "drmgr;drreg;drx;droption") +if (NOT ARM) +add_sample_client(cbr "cbr.c" "drmgr") +endif (NOT ARM) if (X86) # FIXME i#1551, i#1569, i#3544: port to ARM/AArch64/RISCV64 - add_sample_client(cbr "cbr.c" "drmgr") add_sample_client(countcalls "countcalls.c" "drmgr;drreg") add_sample_client(inc2add "inc2add.c" "drmgr;drreg") add_sample_client(memtrace_x86_binary "memtrace_x86.c;utils.c" diff --git a/api/samples/cbr.c b/api/samples/cbr.c index 6159ec51102..1fc62b9bd6e 100644 --- a/api/samples/cbr.c +++ b/api/samples/cbr.c @@ -81,6 +81,17 @@ /* Possible cbr states */ typedef enum { CBR_NEITHER = 0x00, CBR_TAKEN = 0x01, CBR_NOT_TAKEN = 0x10 } cbr_state_t; +#if defined(AARCH64) || defined(RISCV64) +/* Scratch regs for mangling stolen reg used in cbr. */ +reg_id_t stolen_reg; +reg_id_t scratch_for_stolen = IF_AARCHXX_ELSE(DR_REG_R0, DR_REG_X10); +reg_id_t scratch_for_stolen_alternate = IF_AARCHXX_ELSE(DR_REG_R1, DR_REG_X11); +#if defined(RISCV64) +reg_id_t scratch_for_tp = IF_AARCHXX_ELSE(DR_REG_R2, DR_REG_X12); +reg_id_t scratch_for_tp_alternate = IF_AARCHXX_ELSE(DR_REG_R3, DR_REG_X13); +#endif +#endif + /* Each bucket in the hash table is a list of the following elements. * For each cbr, we store its address and its state. */ @@ -334,6 +345,41 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst app_pc fall = (app_pc)decode_next_pc(drcontext, (byte *)src); app_pc targ = instr_get_branch_target_pc(instr); +#if defined(AARCH64) || defined(RISCV64) + /* For AARCH64 and RISCV, if instr uses the stolen regs, we have to replace it with a + * scratch reg here, because instr will be set meta and can not be mangled later. + * + * We use DynamoRIO's interfaces instead of drreg extension in cbr sample, + * because drreg is designed for linear control flow. + * + * For AARCH64, We can not use drreg_get_app_value() as DR will refuse to load into the same + * reg returned by dr_get_stolen_reg(). See comment in drreg_get_app_value(). + */ + bool use_stolen_reg = instr_uses_reg(instr, stolen_reg); + reg_id_t scratch1 = scratch_for_stolen; + if (use_stolen_reg) { + if (instr_uses_reg(instr, scratch1)) + scratch1 = scratch_for_stolen_alternate; + + dr_save_reg(drcontext, bb, instr, scratch1, SPILL_SLOT_1); + dr_insert_get_stolen_reg_value(drcontext, bb, instr, scratch1); + instr_replace_reg_resize(instr, stolen_reg, scratch1); + } +#if defined(RISCV64) + /* For RISCV64, thread pointer register also should be restored. */ + bool use_tp_reg = instr_uses_reg(instr, DR_REG_TP); + reg_id_t scratch2 = scratch_for_tp; + if (use_tp_reg) { + if (instr_uses_reg(instr, scratch2)) + scratch2 = scratch_for_tp_alternate; + + dr_save_reg(drcontext, bb, instr, scratch2, SPILL_SLOT_2); + dr_insert_get_tp_reg_value(drcontext, bb, instr, scratch2); + instr_replace_reg_resize(instr, DR_REG_TP, scratch2); + } +#endif +#endif + /* Redirect the existing cbr to jump to a callout for * the 'taken' case. We'll insert a 'not-taken' * callout at the fallthrough address. @@ -341,7 +387,7 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst instr_t *label = INSTR_CREATE_label(drcontext); /* should be meta, and meta-instrs shouldn't have translations */ instr_set_meta_no_translation(instr); - /* it may not reach (in particular for x64) w/ our added clean call */ + /* it may not reach w/ our added clean call */ if (instr_is_cti_short(instr)) { /* if jecxz/loop we want to set the target of the long-taken * so set instr to the return value @@ -350,6 +396,15 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst } instr_set_target(instr, opnd_create_instr(label)); +#if defined(AARCH64) || defined(RISCV64) + if (use_stolen_reg) + dr_restore_reg(drcontext, bb, NULL, scratch1, SPILL_SLOT_1); +#if defined(RISCV64) + if (use_tp_reg) + dr_restore_reg(drcontext, bb, NULL, scratch2, SPILL_SLOT_2); +#endif +#endif + if (insert_not_taken) { /* Callout for the not-taken case. Insert after * the cbr (i.e., 3rd argument is NULL). @@ -374,11 +429,21 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst * well as Linux. */ instrlist_preinsert( - bb, NULL, INSTR_XL8(INSTR_CREATE_jmp(drcontext, opnd_create_pc(fall)), fall)); + bb, NULL, + INSTR_XL8(XINST_CREATE_jump(drcontext, opnd_create_pc(fall)), fall)); /* label goes before the 'taken' callout */ MINSERT(bb, NULL, label); +#if defined(AARCH64) || defined(RISCV64) + if (use_stolen_reg) + dr_restore_reg(drcontext, bb, NULL, scratch1, SPILL_SLOT_1); +#if defined(RISCV64) + if (use_tp_reg) + dr_restore_reg(drcontext, bb, NULL, scratch2, SPILL_SLOT_2); +#endif +#endif + if (insert_taken) { /* Callout for the taken case */ dr_insert_clean_call_ex(drcontext, bb, NULL, (void *)at_taken, @@ -392,7 +457,8 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst * block (this should not be a meta-instruction). */ instrlist_preinsert( - bb, NULL, INSTR_XL8(INSTR_CREATE_jmp(drcontext, opnd_create_pc(targ)), targ)); + bb, NULL, + INSTR_XL8(XINST_CREATE_jump(drcontext, opnd_create_pc(targ)), targ)); } /* since our added instrumentation is not constant, we ask to store * translations now @@ -440,6 +506,11 @@ dr_client_main(client_id_t id, int argc, const char *argv[]) DR_ASSERT_MSG(false, "drmgr_init failed!"); global_table = new_table(); + +#if defined(AARCH64) || defined(RISCV64) + stolen_reg = dr_get_stolen_reg(); +#endif + if (!drmgr_register_bb_instrumentation_event(NULL, event_app_instruction, NULL)) DR_ASSERT_MSG(false, "fail to register event_app_instruction!"); dr_register_exit_event(dr_exit); diff --git a/core/ir/riscv64/instr.c b/core/ir/riscv64/instr.c index c8e597a1e29..baece8a4adf 100644 --- a/core/ir/riscv64/instr.c +++ b/core/ir/riscv64/instr.c @@ -244,10 +244,8 @@ instr_is_near_ubr(instr_t *instr) bool instr_is_cti_short(instr_t *instr) { - /* The branch with smallest reach is direct branch, with range +/- 4 KiB. - * We have restricted MAX_FRAGMENT_SIZE on RISCV64 accordingly. - */ - return false; + int opc = instr_get_opcode(instr); + return (opc == OP_c_beqz || opc == OP_c_bnez); } bool diff --git a/core/ir/riscv64/ir_utils.c b/core/ir/riscv64/ir_utils.c index 05b60f24aee..c65f51df391 100644 --- a/core/ir/riscv64/ir_utils.c +++ b/core/ir/riscv64/ir_utils.c @@ -50,9 +50,13 @@ remangle_short_rewrite(dcontext_t *dcontext, instr_t *instr, byte *pc, app_pc ta instr_t * convert_to_near_rel_arch(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr) { - /* FIXME i#3544: Not implemented */ - ASSERT_NOT_IMPLEMENTED(false); - return NULL; + int opc = instr_get_opcode(instr); + if (opc == OP_c_beqz) + instr_set_opcode(instr, OP_beq); + else if (opc == OP_c_bnez) + instr_set_opcode(instr, OP_bne); + + return instr; } static int diff --git a/core/lib/dr_ir_utils.h b/core/lib/dr_ir_utils.h index c5f19de9692..7d4bf686b7a 100644 --- a/core/lib/dr_ir_utils.h +++ b/core/lib/dr_ir_utils.h @@ -1137,6 +1137,19 @@ DR_API int dr_insert_it_instrs(void *drcontext, instrlist_t *ilist); +DR_API +/** + * Insert code to get the application value of the thread pointer register + * into register \p reg. + * + * \return whether successful. + * + * \note RISCV-only + */ +bool +dr_insert_get_tp_reg_value(void *drcontext, instrlist_t *ilist, instr_t *instr, + reg_id_t reg); + DR_API /** * Inserts into \p ilist prior to \p where meta-instruction(s) to save the diff --git a/core/lib/instrument.c b/core/lib/instrument.c index 3b15055ff18..d02bbe05f3f 100644 --- a/core/lib/instrument.c +++ b/core/lib/instrument.c @@ -7451,6 +7451,26 @@ dr_insert_it_instrs(void *drcontext, instrlist_t *ilist) #endif } +DR_API +bool +dr_insert_get_tp_reg_value(void *drcontext, instrlist_t *ilist, instr_t *instr, + reg_id_t reg) +{ +#if !defined(RISCV64) + return false; +#else + CLIENT_ASSERT(reg_is_pointer_sized(reg), + "dr_insert_get_tp_reg_value: reg has wrong size\n"); + CLIENT_ASSERT(reg != DR_REG_TP, + "dr_insert_get_tp_reg_value: reg should not be tp itself\n"); + instrlist_meta_preinsert( + ilist, instr, instr_create_restore_from_tls(drcontext, reg, + os_get_app_tls_base_offset(TLS_REG_LIB))); + + return true; +#endif +} + DR_API bool dr_prepopulate_cache(app_pc *tags, size_t tags_count)