From e43cb86e832ba072eac241b1ec71e77aef31635e Mon Sep 17 00:00:00 2001 From: chenhy0106 <42792255+chenhy0106@users.noreply.github.com> Date: Thu, 20 Jun 2024 19:39:21 +0800 Subject: [PATCH] i#3544 RV64: Port sample 'cbr' to RISCV64 (#6837) 1. Conditional branch instruction of RISC-V 'C' extension may not reach after adding clean call. So like X86, we add support to detect and convert compressed cbr to longer version. 2. For RISCV64, when a cbr instruction uses the stolen reg, we need to replace it with a scratch reg. Now cbr works for RISCV64. Still does not work for ARM32/ARM64 because of some bugs unrelated to cbr sample. Issue: #3544 --- api/samples/CMakeLists.txt | 4 ++- api/samples/cbr.c | 65 ++++++++++++++++++++++++++++++++++++-- core/ir/riscv64/instr.c | 6 ++-- core/ir/riscv64/ir_utils.c | 10 ++++-- core/lib/dr_ir_utils.h | 13 ++++++++ core/lib/instrument.c | 22 +++++++++++++ suite/tests/CMakeLists.txt | 1 + 7 files changed, 110 insertions(+), 11 deletions(-) diff --git a/api/samples/CMakeLists.txt b/api/samples/CMakeLists.txt index a70bd58e858..39a95f45537 100644 --- a/api/samples/CMakeLists.txt +++ b/api/samples/CMakeLists.txt @@ -241,8 +241,10 @@ add_sample_client(memtrace_simple "memtrace_simple.c;utils.c" "drmgr;drreg;druti add_sample_client(memval_simple "memval_simple.c;utils.c" "drmgr;drreg;drutil;drx") add_sample_client(instrace_simple "instrace_simple.c;utils.c" "drmgr;drreg;drx") add_sample_client(opcode_count "opcode_count.cpp" "drmgr;drreg;drx;droption") +if (X86 OR RISCV64) # TODO i#1551, #1569: Port to AArchXX. +add_sample_client(cbr "cbr.c" "drmgr") +endif () if (X86) # FIXME i#1551, i#1569, i#3544: port to ARM/AArch64/RISCV64 - add_sample_client(cbr "cbr.c" "drmgr") add_sample_client(countcalls "countcalls.c" "drmgr;drreg") add_sample_client(inc2add "inc2add.c" "drmgr;drreg") add_sample_client(memtrace_x86_binary "memtrace_x86.c;utils.c" diff --git a/api/samples/cbr.c b/api/samples/cbr.c index 6159ec51102..08e94c25361 100644 --- a/api/samples/cbr.c +++ b/api/samples/cbr.c @@ -81,6 +81,10 @@ /* Possible cbr states */ typedef enum { CBR_NEITHER = 0x00, CBR_TAKEN = 0x01, CBR_NOT_TAKEN = 0x10 } cbr_state_t; +#if defined(RISCV64) +reg_id_t stolen_reg; +#endif + /* Each bucket in the hash table is a list of the following elements. * For each cbr, we store its address and its state. */ @@ -334,6 +338,38 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst app_pc fall = (app_pc)decode_next_pc(drcontext, (byte *)src); app_pc targ = instr_get_branch_target_pc(instr); +#if defined(RISCV64) + /* For RISCV64, if instr uses the stolen reg, we have to replace it + * with a scratch reg here, because instr will be set meta and can not be mangled + * later. + * + * We use DynamoRIO's interfaces instead of drreg extension in cbr sample, + * because drreg is designed for linear control flow. + */ + bool uses_stolen_reg = instr_uses_reg(instr, stolen_reg); + reg_id_t scratch1 = DR_REG_A0; + if (uses_stolen_reg) { + if (instr_uses_reg(instr, scratch1)) + scratch1 = DR_REG_A1; + + dr_save_reg(drcontext, bb, instr, scratch1, SPILL_SLOT_1); + dr_insert_get_stolen_reg_value(drcontext, bb, instr, scratch1); + instr_replace_reg_resize(instr, stolen_reg, scratch1); + } + + /* For RISCV64, thread pointer register also should be restored. */ + bool uses_tp_reg = instr_uses_reg(instr, DR_REG_TP); + reg_id_t scratch2 = DR_REG_A2; + if (uses_tp_reg) { + if (instr_uses_reg(instr, scratch2)) + scratch2 = DR_REG_A3; + + dr_save_reg(drcontext, bb, instr, scratch2, SPILL_SLOT_2); + dr_insert_get_app_tls(drcontext, bb, instr, DR_REG_TP, scratch2); + instr_replace_reg_resize(instr, DR_REG_TP, scratch2); + } +#endif + /* Redirect the existing cbr to jump to a callout for * the 'taken' case. We'll insert a 'not-taken' * callout at the fallthrough address. @@ -341,7 +377,7 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst instr_t *label = INSTR_CREATE_label(drcontext); /* should be meta, and meta-instrs shouldn't have translations */ instr_set_meta_no_translation(instr); - /* it may not reach (in particular for x64) w/ our added clean call */ + /* it may not reach w/ our added clean call */ if (instr_is_cti_short(instr)) { /* if jecxz/loop we want to set the target of the long-taken * so set instr to the return value @@ -350,6 +386,14 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst } instr_set_target(instr, opnd_create_instr(label)); +#if defined(RISCV64) + if (uses_stolen_reg) + dr_restore_reg(drcontext, bb, NULL, scratch1, SPILL_SLOT_1); + + if (uses_tp_reg) + dr_restore_reg(drcontext, bb, NULL, scratch2, SPILL_SLOT_2); +#endif + if (insert_not_taken) { /* Callout for the not-taken case. Insert after * the cbr (i.e., 3rd argument is NULL). @@ -374,11 +418,20 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst * well as Linux. */ instrlist_preinsert( - bb, NULL, INSTR_XL8(INSTR_CREATE_jmp(drcontext, opnd_create_pc(fall)), fall)); + bb, NULL, + INSTR_XL8(XINST_CREATE_jump(drcontext, opnd_create_pc(fall)), fall)); /* label goes before the 'taken' callout */ MINSERT(bb, NULL, label); +#if defined(RISCV64) + if (uses_stolen_reg) + dr_restore_reg(drcontext, bb, NULL, scratch1, SPILL_SLOT_1); + + if (uses_tp_reg) + dr_restore_reg(drcontext, bb, NULL, scratch2, SPILL_SLOT_2); +#endif + if (insert_taken) { /* Callout for the taken case */ dr_insert_clean_call_ex(drcontext, bb, NULL, (void *)at_taken, @@ -392,7 +445,8 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst * block (this should not be a meta-instruction). */ instrlist_preinsert( - bb, NULL, INSTR_XL8(INSTR_CREATE_jmp(drcontext, opnd_create_pc(targ)), targ)); + bb, NULL, + INSTR_XL8(XINST_CREATE_jump(drcontext, opnd_create_pc(targ)), targ)); } /* since our added instrumentation is not constant, we ask to store * translations now @@ -440,6 +494,11 @@ dr_client_main(client_id_t id, int argc, const char *argv[]) DR_ASSERT_MSG(false, "drmgr_init failed!"); global_table = new_table(); + +#if defined(RISCV64) + stolen_reg = dr_get_stolen_reg(); +#endif + if (!drmgr_register_bb_instrumentation_event(NULL, event_app_instruction, NULL)) DR_ASSERT_MSG(false, "fail to register event_app_instruction!"); dr_register_exit_event(dr_exit); diff --git a/core/ir/riscv64/instr.c b/core/ir/riscv64/instr.c index c8e597a1e29..baece8a4adf 100644 --- a/core/ir/riscv64/instr.c +++ b/core/ir/riscv64/instr.c @@ -244,10 +244,8 @@ instr_is_near_ubr(instr_t *instr) bool instr_is_cti_short(instr_t *instr) { - /* The branch with smallest reach is direct branch, with range +/- 4 KiB. - * We have restricted MAX_FRAGMENT_SIZE on RISCV64 accordingly. - */ - return false; + int opc = instr_get_opcode(instr); + return (opc == OP_c_beqz || opc == OP_c_bnez); } bool diff --git a/core/ir/riscv64/ir_utils.c b/core/ir/riscv64/ir_utils.c index 05b60f24aee..c65f51df391 100644 --- a/core/ir/riscv64/ir_utils.c +++ b/core/ir/riscv64/ir_utils.c @@ -50,9 +50,13 @@ remangle_short_rewrite(dcontext_t *dcontext, instr_t *instr, byte *pc, app_pc ta instr_t * convert_to_near_rel_arch(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr) { - /* FIXME i#3544: Not implemented */ - ASSERT_NOT_IMPLEMENTED(false); - return NULL; + int opc = instr_get_opcode(instr); + if (opc == OP_c_beqz) + instr_set_opcode(instr, OP_beq); + else if (opc == OP_c_bnez) + instr_set_opcode(instr, OP_bne); + + return instr; } static int diff --git a/core/lib/dr_ir_utils.h b/core/lib/dr_ir_utils.h index c5f19de9692..8dd606ec609 100644 --- a/core/lib/dr_ir_utils.h +++ b/core/lib/dr_ir_utils.h @@ -1137,6 +1137,19 @@ DR_API int dr_insert_it_instrs(void *drcontext, instrlist_t *ilist); +DR_API +/** + * Insert code to get the application value of the thread pointer register + * into register \p reg. + * + * \return whether successful. + * + * \note RISCV-only + */ +bool +dr_insert_get_app_tls(void *drcontext, instrlist_t *ilist, instr_t *instr, + reg_id_t tls_reg, reg_id_t reg); + DR_API /** * Inserts into \p ilist prior to \p where meta-instruction(s) to save the diff --git a/core/lib/instrument.c b/core/lib/instrument.c index 3b15055ff18..e7c2f9b1446 100644 --- a/core/lib/instrument.c +++ b/core/lib/instrument.c @@ -7451,6 +7451,28 @@ dr_insert_it_instrs(void *drcontext, instrlist_t *ilist) #endif } +DR_API +bool +dr_insert_get_app_tls(void *drcontext, instrlist_t *ilist, instr_t *instr, + reg_id_t tls_reg, reg_id_t reg) +{ +#if defined(X86) + return dr_insert_get_seg_base(drcontext, ilist, instr, tls_reg, reg); +#elif defined(RISCV64) + CLIENT_ASSERT(reg_is_pointer_sized(reg), + "dr_insert_get_app_tls: reg has wrong size\n"); + CLIENT_ASSERT(reg != tls_reg, + "dr_insert_get_app_tls: reg should not be tls_reg itself\n"); + instrlist_meta_preinsert(ilist, instr, + instr_create_restore_from_tls( + drcontext, reg, os_get_app_tls_base_offset(tls_reg))); + + return true; +#else + return false; +#endif +} + DR_API bool dr_prepopulate_cache(app_pc *tags, size_t tags_count) diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 080b7b796a8..b5f2e730d33 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -6223,6 +6223,7 @@ if (RISCV64) code_api|sample.bbbuf code_api|sample.bbcount code_api|sample.bbsize + code_api|sample.cbr code_api|sample.div code_api|sample.empty code_api|sample.inline