From 0ed9f41b22cac1b589cdb252bc3e210b7993e27e Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Tue, 4 Oct 2022 14:59:09 -0400 Subject: [PATCH] i#5520 memtrace encodings: Add AArch32 support (#5672) Adds support for Thumb versus Arm modes in AArch32 with respect to drmemtrace stored encodings by setting the LSB when storing in the tracer and raw2trace, and by properly setting the base mode to Arm when decoding. Augments decode_from_copy() to switch locally to Thumb if either the read or original PC has LSB=1 as that better fits most use cases (including reading from a drmemtrace encoding buffer). Adds AArch32 support to the burst_gencode test. Fixes 3 outstanding AArch32 issues: + Removes a quote in a comment in third_party/libgcc/arm/lib1funcs.S code not handled by old toolchains. + Removes an ASSERT_NOT_TESTED path now hit. + Fixes an AArch32 tracer bug where a 2nd temp register is needed for a 2nd conditional skip. Tested by running the drcacheoff.gencode test on an AArch32 machine which now passes. Issue: #5520 --- clients/drcachesim/tests/burst_gencode.cpp | 18 +++++++++++++++++- clients/drcachesim/tracer/instru_offline.cpp | 4 +++- clients/drcachesim/tracer/raw2trace.cpp | 10 ++++------ clients/drcachesim/tracer/raw2trace.h | 19 ++++++++++++++++--- clients/drcachesim/tracer/tracer.cpp | 6 +++--- core/arch/emit_utils_shared.c | 4 ---- core/ir/arm/decode.c | 10 ++++------ third_party/libgcc/README.dynamorio | 7 +++++++ third_party/libgcc/arm/lib1funcs.S | 2 +- 9 files changed, 55 insertions(+), 25 deletions(-) diff --git a/clients/drcachesim/tests/burst_gencode.cpp b/clients/drcachesim/tests/burst_gencode.cpp index 9c4eff7b9e4..cd1911dbbdf 100644 --- a/clients/drcachesim/tests/burst_gencode.cpp +++ b/clients/drcachesim/tests/burst_gencode.cpp @@ -102,6 +102,10 @@ class code_generator_t { allocate_mem(map_size_, ALLOW_EXEC | ALLOW_READ | ALLOW_WRITE)); assert(map_ != nullptr); +#ifdef ARM + bool res = dr_set_isa_mode(dc, DR_ISA_ARM_A32, nullptr); + assert(res); +#endif instrlist_t *ilist = instrlist_create(dc); reg_id_t base = IF_X86_ELSE(IF_X64_ELSE(DR_REG_RAX, DR_REG_EAX), DR_REG_R0); reg_id_t base4imm = IF_X86_64_ELSE(reg_64_to_32(base), base); @@ -123,7 +127,13 @@ class code_generator_t { instrlist_append(ilist, XINST_CREATE_store(dc, OPND_CREATE_MEMPTR(base, -ptrsz), opnd_create_reg(base))); +#ifdef ARM + // XXX: Maybe XINST_CREATE_return should create "bx lr" like we need here + // instead of the pop into pc which assumes the entry pushed lr? + instrlist_append(ilist, INSTR_CREATE_bx(dc, opnd_create_reg(DR_REG_LR))); +#else instrlist_append(ilist, XINST_CREATE_return(dc)); +#endif byte *last_pc = instrlist_encode(dc, ilist, map_, true); assert(last_pc <= map_ + map_size_); @@ -263,6 +273,10 @@ look_for_gencode(std::string trace_dir) } bool found_magic1 = false, found_magic2 = false; bool have_instr_encodings = false; +#ifdef ARM + // DR will auto-switch locally to Thumb for LSB=1 but not to ARM so we start as ARM. + dr_set_isa_mode(dr_context, DR_ISA_ARM_A32, nullptr); +#endif for (reader_t &iter = analyzer.begin(); iter != analyzer.end(); ++iter) { memref_t memref = *iter; if (memref.marker.type == TRACE_TYPE_MARKER && @@ -281,7 +295,9 @@ look_for_gencode(std::string trace_dir) } instr_t instr; instr_init(dr_context, &instr); - app_pc next_pc = decode(dr_context, memref.instr.encoding, &instr); + app_pc next_pc = + decode_from_copy(dr_context, memref.instr.encoding, + reinterpret_cast(memref.instr.addr), &instr); assert(next_pc != nullptr && instr_valid(&instr)); ptr_int_t immed; if (!found_magic1 && instr_is_mov_constant(&instr, &immed) && diff --git a/clients/drcachesim/tracer/instru_offline.cpp b/clients/drcachesim/tracer/instru_offline.cpp index 0e8365a2ecb..8dae0994f1f 100644 --- a/clients/drcachesim/tracer/instru_offline.cpp +++ b/clients/drcachesim/tracer/instru_offline.cpp @@ -504,7 +504,9 @@ offline_instru_t::record_instr_encodings(void *drcontext, app_pc tag_pc, encoding_entry_t *enc = reinterpret_cast(buf_start); enc->length = buf - buf_start; enc->id = per_block->id; - enc->start_pc = reinterpret_cast(tag_pc); + // We put the ARM vs Thumb mode into the modoffs to ensure proper decoding. + enc->start_pc = reinterpret_cast( + dr_app_pc_as_jump_target(instr_get_isa_mode(instrlist_first(ilist)), tag_pc)); log_(2, "%s: Recorded %zu bytes for id " UINT64_FORMAT_STRING " @ %p\n", __FUNCTION__, enc->length, enc->id, tag_pc); diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index 4e0889bbbfb..46f8390a08c 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -1281,7 +1281,7 @@ raw2trace_t::log_instruction(uint level, app_pc decode_pc, app_pc orig_pc) { if (verbosity_ >= level) { disassemble_from_copy(dcontext_, decode_pc, orig_pc, STDOUT, true /*pc*/, - false /*bytes*/); + true /*bytes*/); } } @@ -1346,13 +1346,11 @@ raw2trace_t::raw2trace_t(const char *module_map, VPRINT(0, "Invalid input and output file lists\n"); return; } - if (dcontext == NULL) { #ifdef ARM - // We keep the mode at ARM and rely on LSB=1 offsets in the modoffs fields - // to trigger Thumb decoding. - dr_set_isa_mode(dcontext, DR_ISA_ARM_A32, NULL); + // We keep the mode at ARM and rely on LSB=1 offsets in the modoffs fields + // and encoding_file to trigger Thumb decoding. + dr_set_isa_mode(dcontext == NULL ? GLOBAL_DCONTEXT : dcontext, DR_ISA_ARM_A32, NULL); #endif - } thread_data_.resize(thread_files.size()); for (size_t i = 0; i < thread_data_.size(); ++i) { thread_data_[i].index = static_cast(i); diff --git a/clients/drcachesim/tracer/raw2trace.h b/clients/drcachesim/tracer/raw2trace.h index 4a4eda54b84..f4198867efe 100644 --- a/clients/drcachesim/tracer/raw2trace.h +++ b/clients/drcachesim/tracer/raw2trace.h @@ -442,7 +442,13 @@ class module_mapper_t { reinterpret_cast(entry->start_pc); } else { size_t idx = static_cast(modidx); // Avoid win32 warnings. - return map_pc - modvec_[idx].map_seg_base + modvec_[idx].orig_seg_base; + app_pc res = map_pc - modvec_[idx].map_seg_base + modvec_[idx].orig_seg_base; +#ifdef ARM + // Match Thumb vs Arm mode by setting LSB. + if (TESTANY(1, modoffs)) + res = reinterpret_cast(reinterpret_cast(res) | 1); +#endif + return res; } } @@ -1139,7 +1145,9 @@ template class trace_converter_t { app_pc pc, decode_pc = start_pc; if (in_entry->pc.modidx == PC_MODIDX_INVALID) { impl()->log(3, "Appending %u instrs in bb " PFX " in generated code\n", - instr_count, (ptr_uint_t)start_pc); + instr_count, + reinterpret_cast(modmap_().get_orig_pc( + in_entry->pc.modidx, in_entry->pc.modoffs))); } else if ((in_entry->pc.modidx == 0 && in_entry->pc.modoffs == 0) || modvec_()[in_entry->pc.modidx].map_seg_base == NULL) { if (impl()->get_version(tls) >= OFFLINE_FILE_VERSION_ENCODINGS) { @@ -1362,6 +1370,10 @@ template class trace_converter_t { { size_t size_left = instr->length(); size_t offs = 0; +#ifdef ARM + // Remove any Thumb LSB. + pc = dr_app_pc_as_load_target(DR_ISA_ARM_THUMB, pc); +#endif do { buf->type = TRACE_TYPE_ENCODING; buf->size = @@ -1371,12 +1383,13 @@ template class trace_converter_t { // We don't have to set the rest to 0 but it is nice. memset(buf->encoding + buf->size, 0, sizeof(buf->encoding) - buf->size); } + impl()->log(4, "Appended encoding entry for %p sz=%zu 0x%08x...\n", pc, + buf->size, *(int *)buf->encoding); offs += buf->size; size_left -= buf->size; ++buf; DR_CHECK(static_cast(buf - buf_start) < WRITE_BUFFER_SIZE, "Too many entries for write buffer"); - impl()->log(4, "Appended encoding entry for %p\n", pc); } while (size_left > 0); return ""; } diff --git a/clients/drcachesim/tracer/tracer.cpp b/clients/drcachesim/tracer/tracer.cpp index 6ba64481bdf..7ae55f62b0b 100644 --- a/clients/drcachesim/tracer/tracer.cpp +++ b/clients/drcachesim/tracer/tracer.cpp @@ -667,12 +667,12 @@ instrument_clean_call(void *drcontext, instrlist_t *ilist, instr_t *where, FATAL("Fatal error: failed to unreserve scratch reg.\n"); } - reg_id_t reg_tmp = DR_REG_NULL; + reg_id_t reg_tmp = DR_REG_NULL, reg_tmp2 = DR_REG_NULL; instr_t *skip_thread = INSTR_CREATE_label(drcontext); reg_id_set_t app_regs_at_skip_thread; if ((op_L0I_filter.get_value() || op_L0D_filter.get_value()) && thread_filtering_enabled) { - insert_conditional_skip(drcontext, ilist, where, reg_ptr, ®_tmp, skip_thread, + insert_conditional_skip(drcontext, ilist, where, reg_ptr, ®_tmp2, skip_thread, short_reaches, app_regs_at_skip_thread); } MINSERT(ilist, where, @@ -686,7 +686,7 @@ instrument_clean_call(void *drcontext, instrlist_t *ilist, instr_t *where, DR_CLEANCALL_ALWAYS_OUT_OF_LINE, 0); insert_conditional_skip_target(drcontext, ilist, where, skip_call, reg_tmp, app_regs_at_skip_call); - insert_conditional_skip_target(drcontext, ilist, where, skip_thread, reg_tmp, + insert_conditional_skip_target(drcontext, ilist, where, skip_thread, reg_tmp2, app_regs_at_skip_thread); } diff --git a/core/arch/emit_utils_shared.c b/core/arch/emit_utils_shared.c index 1ba03622483..277f571f4c8 100644 --- a/core/arch/emit_utils_shared.c +++ b/core/arch/emit_utils_shared.c @@ -5593,10 +5593,6 @@ emit_special_ibl_xfer(dcontext_t *dcontext, byte *pc, generated_code_t *code, ui OPND_TLS_FIELD(get_ibl_entry_tls_offs(dcontext, ibl_unlinked_tgt)))); APP(&ilist, XINST_CREATE_jump_reg(dcontext, opnd_create_reg(SCRATCH_REG1))); # else /* ARM */ - /* i#4670: The unlinking case is observed to hit very infrequently on x86. - * The fix has been tested on AArch64 but not on ARM yet. - */ - ASSERT_NOT_TESTED(); /* i#1906: loads to PC must use word-aligned addresses */ ASSERT( ALIGNED(get_ibl_entry_tls_offs(dcontext, ibl_unlinked_tgt), PC_LOAD_ADDR_ALIGN)); diff --git a/core/ir/arm/decode.c b/core/ir/arm/decode.c index 06f2d69b759..44d9dd6909e 100644 --- a/core/ir/arm/decode.c +++ b/core/ir/arm/decode.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2014-2021 Google, Inc. All rights reserved. + * Copyright (c) 2014-2022 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -2251,12 +2251,10 @@ read_instruction(dcontext_t *dcontext, byte *pc, byte *orig_pc, di->decorated_pc = pc; /* We support auto-decoding an LSB=1 address as Thumb (i#1688). We don't * change the thread mode, just the local mode, and we return an LSB=1 next pc. + * We allow either of the copy or orig to have the LSB set and do not require + * them to match as some uses cases have a local buffer for pc. */ - if (TEST(0x1, (ptr_uint_t)pc)) { - if (!TEST(0x1, (ptr_uint_t)orig_pc)) { - CLIENT_ASSERT(false, "must have consistent LSB for decode pc and orig_pc"); - return NULL; - } + if (TEST(0x1, (ptr_uint_t)pc) || TEST(0x1, (ptr_uint_t)orig_pc)) { di->isa_mode = DR_ISA_ARM_THUMB; pc = PC_AS_LOAD_TGT(DR_ISA_ARM_THUMB, pc); orig_pc = PC_AS_LOAD_TGT(DR_ISA_ARM_THUMB, orig_pc); diff --git a/third_party/libgcc/README.dynamorio b/third_party/libgcc/README.dynamorio index a30efe1670a..8c2bf27a237 100644 --- a/third_party/libgcc/README.dynamorio +++ b/third_party/libgcc/README.dynamorio @@ -41,6 +41,13 @@ Here are the changes to lib1funcs.S: > /*--------------------------------------------------*/ > +Additionally, a single quote was removed from this comment line in lib1funcs.S +as it breaks some old assemblers: + +< @ the sign bit into all of AL. That's not what we want... +> @ the sign bit into all of AL. That is not what we want... + + For udivmoddi4.c, it contains the __udivmoddi4 and __moddi3 functions extracted from libgcc2.c with the following lines added: > /* This is extracted from gcc's libgcc/libgcc2.c with these typedefs added: */ diff --git a/third_party/libgcc/arm/lib1funcs.S b/third_party/libgcc/arm/lib1funcs.S index 241414ce82e..6fba427b4b0 100644 --- a/third_party/libgcc/arm/lib1funcs.S +++ b/third_party/libgcc/arm/lib1funcs.S @@ -1465,7 +1465,7 @@ LSYM(Lover12): asr ah, r2 sub r2, #32 @ If r2 is negative at this point the following step would OR - @ the sign bit into all of AL. That's not what we want... + @ the sign bit into all of AL. That is not what we want... bmi 1f mov ip, r3 asr r3, r2