From 9d56b6321112f10941bb58acf93b71d4ababd5a8 Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Thu, 14 Dec 2017 16:52:09 -0500 Subject: [PATCH] i#2001 trace perf: do not store disp For offline traces for a "disp(base)" memref, only stores the base and adds the disp in raw2trace post-processing, as it's statically known. The base can be directly written as it's already in a register, reducing scratch register pressure. Moves the second scratch register reservation into the instru_t routines so we can skip it for this optimization of just writing the base reg for "disp(base)" memrefs. Issue: #2001 --- clients/drcachesim/tracer/instru.h | 33 +++--- clients/drcachesim/tracer/instru_offline.cpp | 59 +++++++--- clients/drcachesim/tracer/instru_online.cpp | 29 ++++- clients/drcachesim/tracer/raw2trace.cpp | 8 +- clients/drcachesim/tracer/tracer.cpp | 110 +++++++++++-------- ext/drreg/drreg.c | 5 +- 6 files changed, 160 insertions(+), 84 deletions(-) diff --git a/clients/drcachesim/tracer/instru.h b/clients/drcachesim/tracer/instru.h index ff81bf288ac..6af84bc2db1 100644 --- a/clients/drcachesim/tracer/instru.h +++ b/clients/drcachesim/tracer/instru.h @@ -37,6 +37,7 @@ #define _INSTRU_H_ 1 #include +#include "drvector.h" #include "../common/trace_entry.h" #define MINSERT instrlist_meta_preinsert @@ -52,9 +53,11 @@ class instru_t // We require that this is passed at construction time: instru_t(void (*insert_load_buf)(void *, instrlist_t *, instr_t *, reg_id_t), - bool memref_needs_info) + bool memref_needs_info, + drvector_t *reg_vector_in) : insert_load_buf_ptr(insert_load_buf), - memref_needs_full_info(memref_needs_info) {} + memref_needs_full_info(memref_needs_info), + reg_vector(reg_vector_in) {} virtual ~instru_t() {} virtual size_t sizeof_entry() const = 0; @@ -77,15 +80,15 @@ class instru_t // These insert inlined code to add an entry into the trace buffer. virtual int instrument_memref(void *drcontext, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust, + reg_id_t reg_ptr, int adjust, instr_t *app, opnd_t ref, bool write, dr_pred_type_t pred) = 0; virtual int instrument_instr(void *drcontext, void *tag, void **bb_field, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust, + reg_id_t reg_ptr, int adjust, instr_t *app) = 0; virtual int instrument_ibundle(void *drcontext, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust, + reg_id_t reg_ptr, int adjust, instr_t **delay_instrs, int num_delay_instrs) = 0; virtual void bb_analysis(void *drcontext, void *tag, void **bb_field, @@ -105,6 +108,7 @@ class instru_t // Whether each data ref needs its own PC and type entry (i.e., // this info cannot be inferred from surrounding icache entries). bool memref_needs_full_info; + drvector_t *reg_vector; private: instru_t() {} @@ -115,7 +119,8 @@ class online_instru_t : public instru_t public: online_instru_t(void (*insert_load_buf)(void *, instrlist_t *, instr_t *, reg_id_t), - bool memref_needs_info); + bool memref_needs_info, + drvector_t *reg_vector); virtual ~online_instru_t(); virtual size_t sizeof_entry() const; @@ -134,15 +139,15 @@ class online_instru_t : public instru_t virtual int append_unit_header(byte *buf_ptr, thread_id_t tid); virtual int instrument_memref(void *drcontext, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust, + reg_id_t reg_ptr, int adjust, instr_t *app, opnd_t ref, bool write, dr_pred_type_t pred); virtual int instrument_instr(void *drcontext, void *tag, void **bb_field, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust, + reg_id_t reg_ptr, int adjust, instr_t *app); virtual int instrument_ibundle(void *drcontext, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust, + reg_id_t reg_ptr, int adjust, instr_t **delay_instrs, int num_delay_instrs); virtual void bb_analysis(void *drcontext, void *tag, void **bb_field, @@ -164,6 +169,7 @@ class offline_instru_t : public instru_t offline_instru_t(void (*insert_load_buf)(void *, instrlist_t *, instr_t *, reg_id_t), bool memref_needs_info, + drvector_t *reg_vector, ssize_t (*write_file)(file_t file, const void *data, size_t count), @@ -186,15 +192,15 @@ class offline_instru_t : public instru_t virtual int append_unit_header(byte *buf_ptr, thread_id_t tid); virtual int instrument_memref(void *drcontext, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust, + reg_id_t reg_ptr, int adjust, instr_t *app, opnd_t ref, bool write, dr_pred_type_t pred); virtual int instrument_instr(void *drcontext, void *tag, void **bb_field, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust, + reg_id_t reg_ptr, int adjust, instr_t *app); virtual int instrument_ibundle(void *drcontext, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust, + reg_id_t reg_ptr, int adjust, instr_t **delay_instrs, int num_delay_instrs); virtual void bb_analysis(void *drcontext, void *tag, void **bb_field, @@ -222,8 +228,7 @@ class offline_instru_t : public instru_t reg_id_t reg_ptr, reg_id_t scratch, int adjust, app_pc pc, uint instr_count); int insert_save_addr(void *drcontext, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_addr, int adjust, opnd_t ref, - bool write); + reg_id_t reg_ptr, int adjust, opnd_t ref, bool write); int insert_save_type_and_size(void *drcontext, instrlist_t *ilist, instr_t *where, reg_id_t reg_ptr, reg_id_t scratch, int adjust, instr_t *app, opnd_t ref, bool write); diff --git a/clients/drcachesim/tracer/instru_offline.cpp b/clients/drcachesim/tracer/instru_offline.cpp index 2092cf658a7..7ea6508e109 100644 --- a/clients/drcachesim/tracer/instru_offline.cpp +++ b/clients/drcachesim/tracer/instru_offline.cpp @@ -52,12 +52,13 @@ void (*offline_instru_t::user_free)(void *data); offline_instru_t::offline_instru_t(void (*insert_load_buf)(void *, instrlist_t *, instr_t *, reg_id_t), bool memref_needs_info, + drvector_t *reg_vector, ssize_t (*write_file)(file_t file, const void *data, size_t count), file_t module_file) - : instru_t(insert_load_buf, memref_needs_info), - write_file_func(write_file), modfile(module_file) + : instru_t(insert_load_buf, memref_needs_info, reg_vector), + write_file_func(write_file), modfile(module_file) { drcovlib_status_t res = drmodtrack_init(); DR_ASSERT(res == DRCOVLIB_SUCCESS); @@ -366,20 +367,38 @@ offline_instru_t::insert_save_type_and_size(void *drcontext, instrlist_t *ilist, int offline_instru_t::insert_save_addr(void *drcontext, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_addr, int adjust, - opnd_t ref, bool write) + reg_id_t reg_ptr, int adjust, opnd_t ref, bool write) { int disp = adjust; - bool reg_ptr_used; - insert_obtain_addr(drcontext, ilist, where, reg_addr, reg_ptr, ref, ®_ptr_used); - if (reg_ptr_used) { - // Re-load because reg_ptr was clobbered. - insert_load_buf_ptr(drcontext, ilist, where, reg_ptr); + reg_id_t reg_addr; + bool reserved = false; + drreg_status_t res; + if (opnd_is_near_base_disp(ref) && opnd_get_index(ref) == DR_REG_NULL) { + /* Optimization: to avoid needing a scratch reg to lea into, we simply + * store the base reg directly and add the disp during post-processing. + */ + reg_addr = opnd_get_base(ref); + } else { + res = drreg_reserve_register(drcontext, ilist, where, reg_vector, ®_addr); + DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. + reserved = true; + bool reg_ptr_used; + insert_obtain_addr(drcontext, ilist, where, reg_addr, reg_ptr, ref, + ®_ptr_used); + if (reg_ptr_used) { + // Re-load because reg_ptr was clobbered. + insert_load_buf_ptr(drcontext, ilist, where, reg_ptr); + } + reserved = true; } MINSERT(ilist, where, XINST_CREATE_store(drcontext, OPND_CREATE_MEMPTR(reg_ptr, disp), opnd_create_reg(reg_addr))); + if (reserved) { + res = drreg_unreserve_register(drcontext, ilist, where, reg_addr); + DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. + } return sizeof(offline_entry_t); } @@ -415,7 +434,7 @@ offline_instru_t::instr_has_multiple_different_memrefs(instr_t *instr) int offline_instru_t::instrument_memref(void *drcontext, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust, + reg_id_t reg_ptr, int adjust, instr_t *app, opnd_t ref, bool write, dr_pred_type_t pred) { @@ -425,6 +444,10 @@ offline_instru_t::instrument_memref(void *drcontext, instrlist_t *ilist, instr_t // We allow either 0 or all 1's as the type so no need to write anything else, // unless a filter is in place in which case we need a PC entry. if (memref_needs_full_info) { + reg_id_t reg_tmp; + drreg_status_t res = + drreg_reserve_register(drcontext, ilist, where, reg_vector, ®_tmp); + DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. adjust += insert_save_pc(drcontext, ilist, where, reg_ptr, reg_tmp, adjust, instr_get_app_pc(app), 0); if (instr_has_multiple_different_memrefs(app)) { @@ -435,9 +458,10 @@ offline_instru_t::instrument_memref(void *drcontext, instrlist_t *ilist, instr_t adjust += insert_save_type_and_size(drcontext, ilist, where, reg_ptr, reg_tmp, adjust, app, ref, write); } + res = drreg_unreserve_register(drcontext, ilist, where, reg_tmp); + DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. } - adjust += insert_save_addr(drcontext, ilist, where, reg_ptr, reg_tmp, adjust, ref, - write); + adjust += insert_save_addr(drcontext, ilist, where, reg_ptr, adjust, ref, write); instrlist_set_auto_predicate(ilist, DR_PRED_NONE); return adjust; } @@ -446,10 +470,10 @@ offline_instru_t::instrument_memref(void *drcontext, instrlist_t *ilist, instr_t int offline_instru_t::instrument_instr(void *drcontext, void *tag, void **bb_field, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust, - instr_t *app) + reg_id_t reg_ptr, int adjust, instr_t *app) { app_pc pc; + reg_id_t reg_tmp; if (!memref_needs_full_info) { // We write just once per bb, if not filtering. if ((ptr_uint_t)*bb_field > MAX_INSTR_COUNT) @@ -459,16 +483,21 @@ offline_instru_t::instrument_instr(void *drcontext, void *tag, void **bb_field, // XXX: For repstr do we want tag insted of skipping rep prefix? pc = instr_get_app_pc(app); } + drreg_status_t res = + drreg_reserve_register(drcontext, ilist, where, reg_vector, ®_tmp); + DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. adjust += insert_save_pc(drcontext, ilist, where, reg_ptr, reg_tmp, adjust, pc, memref_needs_full_info ? 1 : (uint)(ptr_uint_t)*bb_field); if (!memref_needs_full_info) *(ptr_uint_t*)bb_field = MAX_INSTR_COUNT + 1; + res = drreg_unreserve_register(drcontext, ilist, where, reg_tmp); + DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. return adjust; } int offline_instru_t::instrument_ibundle(void *drcontext, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust, + reg_id_t reg_ptr, int adjust, instr_t **delay_instrs, int num_delay_instrs) { // The post-processor fills in all instr info other than our once-per-bb entry. diff --git a/clients/drcachesim/tracer/instru_online.cpp b/clients/drcachesim/tracer/instru_online.cpp index 7904b1b5698..9ce893a89fd 100644 --- a/clients/drcachesim/tracer/instru_online.cpp +++ b/clients/drcachesim/tracer/instru_online.cpp @@ -43,8 +43,9 @@ online_instru_t::online_instru_t(void (*insert_load_buf)(void *, instrlist_t *, instr_t *, reg_id_t), - bool memref_needs_info) - : instru_t(insert_load_buf, memref_needs_info) + bool memref_needs_info, + drvector_t *reg_vector) + : instru_t(insert_load_buf, memref_needs_info, reg_vector) { } @@ -272,12 +273,16 @@ online_instru_t::insert_save_type_and_size(void *drcontext, instrlist_t *ilist, int online_instru_t::instrument_memref(void *drcontext, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust, + reg_id_t reg_ptr, int adjust, instr_t *app, opnd_t ref, bool write, dr_pred_type_t pred) { ushort type = (ushort)(write ? TRACE_TYPE_WRITE : TRACE_TYPE_READ); ushort size = (ushort)drutil_opnd_mem_size_in_bytes(ref, app); + reg_id_t reg_tmp; + drreg_status_t res = + drreg_reserve_register(drcontext, ilist, where, reg_vector, ®_tmp); + DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. if (!memref_needs_full_info) // For full info we skip this for !pred instrlist_set_auto_predicate(ilist, pred); if (memref_needs_full_info) { @@ -304,17 +309,23 @@ online_instru_t::instrument_memref(void *drcontext, instrlist_t *ilist, instr_t insert_save_type_and_size(drcontext, ilist, where, reg_ptr, reg_tmp, type, size, adjust); instrlist_set_auto_predicate(ilist, DR_PRED_NONE); + res = drreg_unreserve_register(drcontext, ilist, where, reg_tmp); + DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. return (adjust + sizeof(trace_entry_t)); } int online_instru_t::instrument_instr(void *drcontext, void *tag, void **bb_field, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust, + reg_id_t reg_ptr, int adjust, instr_t *app) { bool repstr_expanded = *bb_field != 0; // Avoid cl warning C4800. app_pc pc = repstr_expanded ? dr_fragment_app_pc(tag) : instr_get_app_pc(app); + reg_id_t reg_tmp; + drreg_status_t res = + drreg_reserve_register(drcontext, ilist, where, reg_vector, ®_tmp); + DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. // To handle zero-iter repstr loops this routine is called at the top of the bb // where "app" is jecxz so we have to hardcode the rep str type and get length // from the tag. @@ -326,17 +337,23 @@ online_instru_t::instrument_instr(void *drcontext, void *tag, void **bb_field, insert_save_type_and_size(drcontext, ilist, where, reg_ptr, reg_tmp, type, size, adjust); insert_save_pc(drcontext, ilist, where, reg_ptr, reg_tmp, pc, adjust); + res = drreg_unreserve_register(drcontext, ilist, where, reg_tmp); + DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. return (adjust + sizeof(trace_entry_t)); } int online_instru_t::instrument_ibundle(void *drcontext, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust, + reg_id_t reg_ptr, int adjust, instr_t **delay_instrs, int num_delay_instrs) { // Create and instrument for INSTR_BUNDLE trace_entry_t entry; int i; + reg_id_t reg_tmp; + drreg_status_t res = + drreg_reserve_register(drcontext, ilist, where, reg_vector, ®_tmp); + DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. entry.type = TRACE_TYPE_INSTR_BUNDLE; entry.size = 0; for (i = 0; i < num_delay_instrs; i++) { @@ -352,6 +369,8 @@ online_instru_t::instrument_ibundle(void *drcontext, instrlist_t *ilist, instr_t entry.size = 0; } } + res = drreg_unreserve_register(drcontext, ilist, where, reg_tmp); + DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. return adjust; } diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index ea6d86a1e65..a1b519e6f34 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2017 Google, Inc. All rights reserved. + * Copyright (c) 2016-2018 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -376,6 +376,10 @@ raw2trace_t::append_memref(INOUT trace_entry_t **buf_in, uint tidx, instr_t *ins } // We take the full value, to handle low or high. buf->addr = (addr_t) in_entry.combined_value; + if (opnd_is_near_base_disp(ref) && opnd_get_index(ref) == DR_REG_NULL) { + // We stored only the base reg, as an optimization. + buf->addr += opnd_get_disp(ref); + } VPRINT(4, "Appended memref type %d size %d to " PFX "\n", buf->type, buf->size, (ptr_uint_t)buf->addr); *buf_in = ++buf; @@ -550,7 +554,7 @@ raw2trace_t::merge_and_process_thread_files() uint tidx = (uint)thread_files.size(); uint thread_count = (uint)thread_files.size(); offline_entry_t in_entry; - online_instru_t instru(NULL, false); + online_instru_t instru(NULL, false, NULL); bool last_bb_handled = true; std::vector tids(thread_files.size(), INVALID_THREAD_ID); std::vector times(thread_files.size(), 0); diff --git a/clients/drcachesim/tracer/tracer.cpp b/clients/drcachesim/tracer/tracer.cpp index 2e041fd9c8a..7a97a1eb127 100644 --- a/clients/drcachesim/tracer/tracer.cpp +++ b/clients/drcachesim/tracer/tracer.cpp @@ -1,5 +1,5 @@ /* ****************************************************************************** - * Copyright (c) 2011-2017 Google, Inc. All rights reserved. + * Copyright (c) 2011-2018 Google, Inc. All rights reserved. * Copyright (c) 2010 Massachusetts Institute of Technology All rights reserved. * ******************************************************************************/ @@ -99,6 +99,8 @@ static size_t trace_buf_size; static size_t redzone_size; static size_t max_buf_size; +static drvector_t scratch_reserve_vec; + /* thread private buffer and counter */ typedef struct { byte *seg_base; @@ -501,7 +503,7 @@ insert_update_buf_ptr(void *drcontext, instrlist_t *ilist, instr_t *where, static int instrument_delay_instrs(void *drcontext, void *tag, instrlist_t *ilist, user_data_t *ud, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust) + reg_id_t reg_ptr, int adjust) { if (ud->repstr) { // We assume that drutil restricts repstr to a single bb on its own, and @@ -514,7 +516,7 @@ instrument_delay_instrs(void *drcontext, void *tag, instrlist_t *ilist, } // Instrument to add a full instr entry for the first instr. adjust = instru->instrument_instr(drcontext, tag, &ud->instru_field, - ilist, where, reg_ptr, reg_tmp, adjust, + ilist, where, reg_ptr, adjust, ud->delay_instrs[0]); if (have_phys && op_use_physical.get_value()) { // No instr bundle if physical-2-virtual since instr bundle may @@ -522,11 +524,11 @@ instrument_delay_instrs(void *drcontext, void *tag, instrlist_t *ilist, int i; for (i = 1; i < ud->num_delay_instrs; i++) { adjust = instru->instrument_instr(drcontext, tag, &ud->instru_field, - ilist, where, reg_ptr, reg_tmp, + ilist, where, reg_ptr, adjust, ud->delay_instrs[i]); } } else { - adjust = instru->instrument_ibundle(drcontext, ilist, where, reg_ptr, reg_tmp, + adjust = instru->instrument_ibundle(drcontext, ilist, where, reg_ptr, adjust, ud->delay_instrs + 1, ud->num_delay_instrs - 1); } @@ -539,7 +541,7 @@ instrument_delay_instrs(void *drcontext, void *tag, instrlist_t *ilist, */ static void instrument_clean_call(void *drcontext, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp) + reg_id_t reg_ptr) { instr_t *skip_call = INSTR_CREATE_label(drcontext); IF_X86(uint64 prof_pcs;) @@ -574,6 +576,7 @@ instrument_clean_call(void *drcontext, instrlist_t *ilist, instr_t *where, INSTR_CREATE_jecxz(drcontext, opnd_create_instr(skip_call))); } #elif defined(ARM) + reg_id_t reg_tmp = DR_REG_NULL; if (dr_get_isa_mode(drcontext) == DR_ISA_ARM_THUMB) { instr_t *noskip = INSTR_CREATE_label(drcontext); /* XXX: clean call is too long to use cbz to skip. */ @@ -588,9 +591,12 @@ instrument_clean_call(void *drcontext, instrlist_t *ilist, instr_t *where, MINSERT(ilist, where, noskip); } else { /* There is no jecxz/cbz like instr on ARM-A32 mode, so we have to - * save aflags to reg_tmp before check. + * save aflags to a temp reg before check. * XXX optimization: use drreg to avoid aflags save/restore. */ + if (drreg_reserve_register(drcontext, bb, where, reg_vector, ®_tmp) != + DRREG_SUCCESS) + FATAL("Fatal error: failed to reserve reg."); dr_save_arith_flags_to_reg(drcontext, ilist, where, reg_tmp); MINSERT(ilist, where, INSTR_CREATE_cmp(drcontext, @@ -611,8 +617,12 @@ instrument_clean_call(void *drcontext, instrlist_t *ilist, instr_t *where, DR_CLEANCALL_ALWAYS_OUT_OF_LINE, 0); MINSERT(ilist, where, skip_call); #ifdef ARM - if (dr_get_isa_mode(drcontext) == DR_ISA_ARM_A32) + if (dr_get_isa_mode(drcontext) == DR_ISA_ARM_A32) { dr_restore_arith_flags_from_reg(drcontext, ilist, where, reg_tmp); + DR_ASSERT(reg_tmp != DR_REG_NULL); + if (drreg_unreserve_register(drcontext, bb, where, reg_tmp) != DRREG_SUCCESS) + FATAL("Fatal error: failed to unreserve reg.\n"); + } #endif } @@ -625,7 +635,7 @@ instrument_clean_call(void *drcontext, instrlist_t *ilist, instr_t *where, // limitations.) static reg_id_t insert_filter_addr(void *drcontext, instrlist_t *ilist, instr_t *where, - user_data_t *ud, reg_id_t reg_ptr, reg_id_t reg_addr, + user_data_t *ud, reg_id_t reg_ptr, opnd_t ref, instr_t *app, instr_t *skip, dr_pred_type_t pred) { // Our "level 0" inlined direct-mapped cache filter. @@ -636,6 +646,7 @@ insert_filter_addr(void *drcontext, instrlist_t *ilist, instr_t *where, ptr_int_t mask = (ptr_int_t)(cache_size / op_line_size.get_value()) - 1; int line_bits = compute_log2(op_line_size.get_value()); uint offs = is_icache ? MEMTRACE_TLS_OFFS_ICACHE : MEMTRACE_TLS_OFFS_DCACHE; + reg_id_t reg_addr; if (is_icache) { // For filtering the icache, we disable bundles + delays and call here on // every instr. We skip if we're still on the same cache line. @@ -654,6 +665,9 @@ insert_filter_addr(void *drcontext, instrlist_t *ilist, instr_t *where, } ud->last_app_pc = instr_get_app_pc(app); } + if (drreg_reserve_register(drcontext, ilist, where, &scratch_reserve_vec, ®_addr) + != DRREG_SUCCESS) + FATAL("Fatal error: failed to reserve scratch reg\n"); if (drreg_reserve_aflags(drcontext, ilist, where) != DRREG_SUCCESS) FATAL("Fatal error: failed to reserve aflags\n"); // We need a 3rd scratch register. We can avoid clobbering the app address @@ -730,18 +744,20 @@ insert_filter_addr(void *drcontext, instrlist_t *ilist, instr_t *where, // path shorter, so we clobber reg_addr with the tag and recompute on a miss. if (!is_icache && opnd_uses_reg(ref, reg_idx)) drreg_get_app_value(drcontext, ilist, where, reg_idx, reg_idx); + if (drreg_unreserve_register(drcontext, ilist, where, reg_addr) != DRREG_SUCCESS) + FATAL("Fatal error: failed to unreserve scratch reg\n"); return reg_idx; } static int instrument_memref(void *drcontext, user_data_t *ud, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust, + reg_id_t reg_ptr, int adjust, instr_t *app, opnd_t ref, bool write, dr_pred_type_t pred) { instr_t *skip = INSTR_CREATE_label(drcontext); reg_id_t reg_third = DR_REG_NULL; if (op_L0_filter.get_value()) { - reg_third = insert_filter_addr(drcontext, ilist, where, ud, reg_ptr, reg_tmp, + reg_third = insert_filter_addr(drcontext, ilist, where, ud, reg_ptr, ref, NULL, skip, pred); if (reg_third == DR_REG_NULL) { instr_destroy(drcontext, skip); @@ -751,7 +767,7 @@ instrument_memref(void *drcontext, user_data_t *ud, instrlist_t *ilist, instr_t if (op_L0_filter.get_value()) insert_load_buf_ptr(drcontext, ilist, where, reg_ptr); adjust = instru->instrument_memref(drcontext, ilist, where, reg_ptr, - reg_tmp, adjust, app, ref, write, pred); + adjust, app, ref, write, pred); if (op_L0_filter.get_value() && adjust != 0) { // When filtering we can't combine buf_ptr adjustments. insert_update_buf_ptr(drcontext, ilist, where, reg_ptr, pred, adjust); @@ -773,13 +789,12 @@ instrument_memref(void *drcontext, user_data_t *ud, instrlist_t *ilist, instr_t static int instrument_instr(void *drcontext, void *tag, user_data_t *ud, instrlist_t *ilist, instr_t *where, - reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust, - instr_t *app) + reg_id_t reg_ptr, int adjust, instr_t *app) { instr_t *skip = INSTR_CREATE_label(drcontext); reg_id_t reg_third = DR_REG_NULL; if (op_L0_filter.get_value()) { - reg_third = insert_filter_addr(drcontext, ilist, where, ud, reg_ptr, reg_tmp, + reg_third = insert_filter_addr(drcontext, ilist, where, ud, reg_ptr, opnd_create_null(), app, skip, DR_PRED_NONE); if (reg_third == DR_REG_NULL) { instr_destroy(drcontext, skip); @@ -789,7 +804,7 @@ instrument_instr(void *drcontext, void *tag, user_data_t *ud, if (op_L0_filter.get_value()) // Else already loaded. insert_load_buf_ptr(drcontext, ilist, where, reg_ptr); adjust = instru->instrument_instr(drcontext, tag, &ud->instru_field, ilist, - where, reg_ptr, reg_tmp, adjust, app); + where, reg_ptr, adjust, app); if (op_L0_filter.get_value() && adjust != 0) { // When filtering we can't combine buf_ptr adjustments. insert_update_buf_ptr(drcontext, ilist, where, reg_ptr, DR_PRED_NONE, adjust); @@ -819,8 +834,8 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, int i, adjust = 0; user_data_t *ud = (user_data_t *) user_data; dr_pred_type_t pred; - reg_id_t reg_ptr, reg_tmp = DR_REG_NULL; - drvector_t rvec1, rvec2; + reg_id_t reg_ptr; + drvector_t rvec; bool is_memref; drmgr_disable_auto_predication(drcontext, bb); @@ -891,28 +906,22 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, pred = instr_get_predicate(instr); /* opt: save/restore reg per instr instead of per entry */ - /* We need two scratch registers. + /* We usually need two scratch registers, but not always, so we push the 2nd + * out into the instru_t routines. * reg_ptr must be ECX or RCX for jecxz on x86, and must be <= r7 for cbnz on ARM. */ - drreg_init_and_fill_vector(&rvec1, false); - drreg_init_and_fill_vector(&rvec2, true); + drreg_init_and_fill_vector(&rvec, false); #ifdef X86 - drreg_set_vector_entry(&rvec1, DR_REG_XCX, true); - if (op_L0_filter.get_value()) { - /* We need to preserve the flags so we need xax. */ - drreg_set_vector_entry(&rvec2, DR_REG_XAX, false); - } + drreg_set_vector_entry(&rvec, DR_REG_XCX, true); #else for (reg_ptr = DR_REG_R0; reg_ptr <= DR_REG_R7; reg_ptr++) - drreg_set_vector_entry(&rvec1, reg_ptr, true); + drreg_set_vector_entry(&rvec, reg_ptr, true); #endif - if (drreg_reserve_register(drcontext, bb, instr, &rvec1, ®_ptr) != DRREG_SUCCESS || - drreg_reserve_register(drcontext, bb, instr, &rvec2, ®_tmp) != DRREG_SUCCESS) { + if (drreg_reserve_register(drcontext, bb, instr, &rvec, ®_ptr) != DRREG_SUCCESS) { // We can't recover. FATAL("Fatal error: failed to reserve scratch registers\n"); } - drvector_delete(&rvec1); - drvector_delete(&rvec2); + drvector_delete(&rvec); /* load buf ptr into reg_ptr, unless we're filtering */ if (!op_L0_filter.get_value()) @@ -920,14 +929,14 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, if (ud->num_delay_instrs != 0) { adjust = instrument_delay_instrs(drcontext, tag, bb, ud, instr, - reg_ptr, reg_tmp, adjust); + reg_ptr, adjust); } if (ud->strex != NULL) { DR_ASSERT(instr_is_exclusive_store(ud->strex)); adjust = instrument_instr(drcontext, tag, ud, bb, - instr, reg_ptr, reg_tmp, adjust, ud->strex); - adjust = instrument_memref(drcontext, ud, bb, instr, reg_ptr, reg_tmp, + instr, reg_ptr, adjust, ud->strex); + adjust = instrument_memref(drcontext, ud, bb, instr, reg_ptr, adjust, ud->strex, instr_get_dst(ud->strex, 0), true, instr_get_predicate(ud->strex)); ud->strex = NULL; @@ -950,7 +959,7 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, // online we have to ignore "instr" here in instru_online::instrument_instr(). if (!ud->repstr || drmgr_is_first_instr(drcontext, instr)) { adjust = instrument_instr(drcontext, tag, ud, bb, - instr, reg_ptr, reg_tmp, adjust, instr); + instr, reg_ptr, adjust, instr); } ud->last_app_pc = instr_get_app_pc(instr); @@ -966,17 +975,15 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, /* insert code to add an entry for each memory reference opnd */ for (i = 0; i < instr_num_srcs(instr); i++) { if (opnd_is_memory_reference(instr_get_src(instr, i))) { - adjust = instrument_memref(drcontext, ud, bb, instr, reg_ptr, - reg_tmp, adjust, instr, - instr_get_src(instr, i), false, pred); + adjust = instrument_memref(drcontext, ud, bb, instr, reg_ptr, adjust, + instr, instr_get_src(instr, i), false, pred); } } for (i = 0; i < instr_num_dsts(instr); i++) { if (opnd_is_memory_reference(instr_get_dst(instr, i))) { - adjust = instrument_memref(drcontext, ud, bb, instr, reg_ptr, - reg_tmp, adjust, instr, - instr_get_dst(instr, i), true, pred); + adjust = instrument_memref(drcontext, ud, bb, instr, reg_ptr, adjust, + instr, instr_get_dst(instr, i), true, pred); } } if (adjust != 0) @@ -991,12 +998,11 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, if (drmgr_is_last_instr(drcontext, instr)) { if (op_L0_filter.get_value()) insert_load_buf_ptr(drcontext, bb, instr, reg_ptr); - instrument_clean_call(drcontext, bb, instr, reg_ptr, reg_tmp); + instrument_clean_call(drcontext, bb, instr, reg_ptr); } - /* restore scratch registers */ - if (drreg_unreserve_register(drcontext, bb, instr, reg_ptr) != DRREG_SUCCESS || - drreg_unreserve_register(drcontext, bb, instr, reg_tmp) != DRREG_SUCCESS) + /* restore scratch register */ + if (drreg_unreserve_register(drcontext, bb, instr, reg_ptr) != DRREG_SUCCESS) DR_ASSERT(false); return DR_EMIT_DEFAULT; } @@ -1414,6 +1420,8 @@ event_exit(void) if (!dr_raw_tls_cfree(tls_offs, MEMTRACE_TLS_COUNT)) DR_ASSERT(false); + drvector_delete(&scratch_reserve_vec); + if (tracing_enabled) { if (!drmgr_unregister_pre_syscall_event(event_pre_syscall) || !drmgr_unregister_kernel_xfer_event(event_kernel_xfer) || @@ -1541,6 +1549,14 @@ drmemtrace_client_main(client_id_t id, int argc, const char *argv[]) FATAL("Usage error: L0I_size and L0D_size must be powers of 2."); } + drreg_init_and_fill_vector(&scratch_reserve_vec, true); +#ifdef X86 + if (op_L0_filter.get_value()) { + /* We need to preserve the flags so we need xax. */ + drreg_set_vector_entry(&scratch_reserve_vec, DR_REG_XAX, false); + } +#endif + if (op_offline.get_value()) { void *buf; if (!init_offline_dir()) { @@ -1551,6 +1567,7 @@ drmemtrace_client_main(client_id_t id, int argc, const char *argv[]) buf = dr_global_alloc(MAX_INSTRU_SIZE); instru = new(buf) offline_instru_t(insert_load_buf_ptr, op_L0_filter.get_value(), + &scratch_reserve_vec, file_ops_func.write_file, module_file); } else { @@ -1559,7 +1576,8 @@ drmemtrace_client_main(client_id_t id, int argc, const char *argv[]) DR_ASSERT(MAX_INSTRU_SIZE >= sizeof(online_instru_t)); buf = dr_global_alloc(MAX_INSTRU_SIZE); instru = new(buf) online_instru_t(insert_load_buf_ptr, - op_L0_filter.get_value()); + op_L0_filter.get_value(), + &scratch_reserve_vec); if (!ipc_pipe.set_name(op_ipc_name.get_value().c_str())) DR_ASSERT(false); #ifdef UNIX diff --git a/ext/drreg/drreg.c b/ext/drreg/drreg.c index 47c81393868..c692c4f9b1c 100644 --- a/ext/drreg/drreg.c +++ b/ext/drreg/drreg.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2013-2017 Google, Inc. All rights reserved. + * Copyright (c) 2013-2018 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -210,7 +210,8 @@ restore_reg(void *drcontext, per_thread_t *pt, reg_id_t reg, uint slot, { LOG(drcontext, LOG_ALL, 3, "%s @%d."PFX" %s slot=%d release=%d\n", __FUNCTION__, pt->live_idx, - instr_get_app_pc(where), get_register_name(reg), slot, release); + where == NULL ? 0 : instr_get_app_pc(where), + get_register_name(reg), slot, release); ASSERT(pt->slot_use[slot] == reg || /* aflags can be saved and restored using different regs */ (slot == AFLAGS_SLOT && pt->slot_use[slot] != DR_REG_NULL),