Skip to content

Commit

Permalink
i#2001 trace perf: do not store disp
Browse files Browse the repository at this point in the history
For offline traces for a "disp(base)" memref, only stores the base and adds
the disp in raw2trace post-processing, as it's statically known.  The base
can be directly written as it's already in a register, reducing scratch
register pressure.

Moves the second scratch register reservation into the instru_t routines so
we can skip it for this optimization of just writing the base reg for
"disp(base)" memrefs.

Issue: #2001
  • Loading branch information
derekbruening committed Jan 5, 2018
1 parent 72b9ea4 commit 9d56b63
Show file tree
Hide file tree
Showing 6 changed files with 160 additions and 84 deletions.
33 changes: 19 additions & 14 deletions clients/drcachesim/tracer/instru.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#define _INSTRU_H_ 1

#include <stdint.h>
#include "drvector.h"
#include "../common/trace_entry.h"

#define MINSERT instrlist_meta_preinsert
Expand All @@ -52,9 +53,11 @@ class instru_t
// We require that this is passed at construction time:
instru_t(void (*insert_load_buf)(void *, instrlist_t *,
instr_t *, reg_id_t),
bool memref_needs_info)
bool memref_needs_info,
drvector_t *reg_vector_in)
: insert_load_buf_ptr(insert_load_buf),
memref_needs_full_info(memref_needs_info) {}
memref_needs_full_info(memref_needs_info),
reg_vector(reg_vector_in) {}
virtual ~instru_t() {}

virtual size_t sizeof_entry() const = 0;
Expand All @@ -77,15 +80,15 @@ class instru_t

// These insert inlined code to add an entry into the trace buffer.
virtual int instrument_memref(void *drcontext, instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust,
reg_id_t reg_ptr, int adjust,
instr_t *app, opnd_t ref, bool write,
dr_pred_type_t pred) = 0;
virtual int instrument_instr(void *drcontext, void *tag, void **bb_field,
instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust,
reg_id_t reg_ptr, int adjust,
instr_t *app) = 0;
virtual int instrument_ibundle(void *drcontext, instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust,
reg_id_t reg_ptr, int adjust,
instr_t **delay_instrs, int num_delay_instrs) = 0;

virtual void bb_analysis(void *drcontext, void *tag, void **bb_field,
Expand All @@ -105,6 +108,7 @@ class instru_t
// Whether each data ref needs its own PC and type entry (i.e.,
// this info cannot be inferred from surrounding icache entries).
bool memref_needs_full_info;
drvector_t *reg_vector;

private:
instru_t() {}
Expand All @@ -115,7 +119,8 @@ class online_instru_t : public instru_t
public:
online_instru_t(void (*insert_load_buf)(void *, instrlist_t *,
instr_t *, reg_id_t),
bool memref_needs_info);
bool memref_needs_info,
drvector_t *reg_vector);
virtual ~online_instru_t();

virtual size_t sizeof_entry() const;
Expand All @@ -134,15 +139,15 @@ class online_instru_t : public instru_t
virtual int append_unit_header(byte *buf_ptr, thread_id_t tid);

virtual int instrument_memref(void *drcontext, instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust,
reg_id_t reg_ptr, int adjust,
instr_t *app, opnd_t ref, bool write,
dr_pred_type_t pred);
virtual int instrument_instr(void *drcontext, void *tag, void **bb_field,
instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust,
reg_id_t reg_ptr, int adjust,
instr_t *app);
virtual int instrument_ibundle(void *drcontext, instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust,
reg_id_t reg_ptr, int adjust,
instr_t **delay_instrs, int num_delay_instrs);

virtual void bb_analysis(void *drcontext, void *tag, void **bb_field,
Expand All @@ -164,6 +169,7 @@ class offline_instru_t : public instru_t
offline_instru_t(void (*insert_load_buf)(void *, instrlist_t *,
instr_t *, reg_id_t),
bool memref_needs_info,
drvector_t *reg_vector,
ssize_t (*write_file)(file_t file,
const void *data,
size_t count),
Expand All @@ -186,15 +192,15 @@ class offline_instru_t : public instru_t
virtual int append_unit_header(byte *buf_ptr, thread_id_t tid);

virtual int instrument_memref(void *drcontext, instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust,
reg_id_t reg_ptr, int adjust,
instr_t *app, opnd_t ref, bool write,
dr_pred_type_t pred);
virtual int instrument_instr(void *drcontext, void *tag, void **bb_field,
instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust,
reg_id_t reg_ptr, int adjust,
instr_t *app);
virtual int instrument_ibundle(void *drcontext, instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust,
reg_id_t reg_ptr, int adjust,
instr_t **delay_instrs, int num_delay_instrs);

virtual void bb_analysis(void *drcontext, void *tag, void **bb_field,
Expand Down Expand Up @@ -222,8 +228,7 @@ class offline_instru_t : public instru_t
reg_id_t reg_ptr, reg_id_t scratch, int adjust, app_pc pc,
uint instr_count);
int insert_save_addr(void *drcontext, instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t reg_addr, int adjust, opnd_t ref,
bool write);
reg_id_t reg_ptr, int adjust, opnd_t ref, bool write);
int insert_save_type_and_size(void *drcontext, instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t scratch, int adjust,
instr_t *app, opnd_t ref, bool write);
Expand Down
59 changes: 44 additions & 15 deletions clients/drcachesim/tracer/instru_offline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,13 @@ void (*offline_instru_t::user_free)(void *data);
offline_instru_t::offline_instru_t(void (*insert_load_buf)(void *, instrlist_t *,
instr_t *, reg_id_t),
bool memref_needs_info,
drvector_t *reg_vector,
ssize_t (*write_file)(file_t file,
const void *data,
size_t count),
file_t module_file)
: instru_t(insert_load_buf, memref_needs_info),
write_file_func(write_file), modfile(module_file)
: instru_t(insert_load_buf, memref_needs_info, reg_vector),
write_file_func(write_file), modfile(module_file)
{
drcovlib_status_t res = drmodtrack_init();
DR_ASSERT(res == DRCOVLIB_SUCCESS);
Expand Down Expand Up @@ -366,20 +367,38 @@ offline_instru_t::insert_save_type_and_size(void *drcontext, instrlist_t *ilist,

int
offline_instru_t::insert_save_addr(void *drcontext, instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t reg_addr, int adjust,
opnd_t ref, bool write)
reg_id_t reg_ptr, int adjust, opnd_t ref, bool write)
{
int disp = adjust;
bool reg_ptr_used;
insert_obtain_addr(drcontext, ilist, where, reg_addr, reg_ptr, ref, &reg_ptr_used);
if (reg_ptr_used) {
// Re-load because reg_ptr was clobbered.
insert_load_buf_ptr(drcontext, ilist, where, reg_ptr);
reg_id_t reg_addr;
bool reserved = false;
drreg_status_t res;
if (opnd_is_near_base_disp(ref) && opnd_get_index(ref) == DR_REG_NULL) {
/* Optimization: to avoid needing a scratch reg to lea into, we simply
* store the base reg directly and add the disp during post-processing.
*/
reg_addr = opnd_get_base(ref);
} else {
res = drreg_reserve_register(drcontext, ilist, where, reg_vector, &reg_addr);
DR_ASSERT(res == DRREG_SUCCESS); // Can't recover.
reserved = true;
bool reg_ptr_used;
insert_obtain_addr(drcontext, ilist, where, reg_addr, reg_ptr, ref,
&reg_ptr_used);
if (reg_ptr_used) {
// Re-load because reg_ptr was clobbered.
insert_load_buf_ptr(drcontext, ilist, where, reg_ptr);
}
reserved = true;
}
MINSERT(ilist, where,
XINST_CREATE_store(drcontext,
OPND_CREATE_MEMPTR(reg_ptr, disp),
opnd_create_reg(reg_addr)));
if (reserved) {
res = drreg_unreserve_register(drcontext, ilist, where, reg_addr);
DR_ASSERT(res == DRREG_SUCCESS); // Can't recover.
}
return sizeof(offline_entry_t);
}

Expand Down Expand Up @@ -415,7 +434,7 @@ offline_instru_t::instr_has_multiple_different_memrefs(instr_t *instr)

int
offline_instru_t::instrument_memref(void *drcontext, instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust,
reg_id_t reg_ptr, int adjust,
instr_t *app, opnd_t ref, bool write,
dr_pred_type_t pred)
{
Expand All @@ -425,6 +444,10 @@ offline_instru_t::instrument_memref(void *drcontext, instrlist_t *ilist, instr_t
// We allow either 0 or all 1's as the type so no need to write anything else,
// unless a filter is in place in which case we need a PC entry.
if (memref_needs_full_info) {
reg_id_t reg_tmp;
drreg_status_t res =
drreg_reserve_register(drcontext, ilist, where, reg_vector, &reg_tmp);
DR_ASSERT(res == DRREG_SUCCESS); // Can't recover.
adjust += insert_save_pc(drcontext, ilist, where, reg_ptr, reg_tmp, adjust,
instr_get_app_pc(app), 0);
if (instr_has_multiple_different_memrefs(app)) {
Expand All @@ -435,9 +458,10 @@ offline_instru_t::instrument_memref(void *drcontext, instrlist_t *ilist, instr_t
adjust += insert_save_type_and_size(drcontext, ilist, where, reg_ptr,
reg_tmp, adjust, app, ref, write);
}
res = drreg_unreserve_register(drcontext, ilist, where, reg_tmp);
DR_ASSERT(res == DRREG_SUCCESS); // Can't recover.
}
adjust += insert_save_addr(drcontext, ilist, where, reg_ptr, reg_tmp, adjust, ref,
write);
adjust += insert_save_addr(drcontext, ilist, where, reg_ptr, adjust, ref, write);
instrlist_set_auto_predicate(ilist, DR_PRED_NONE);
return adjust;
}
Expand All @@ -446,10 +470,10 @@ offline_instru_t::instrument_memref(void *drcontext, instrlist_t *ilist, instr_t
int
offline_instru_t::instrument_instr(void *drcontext, void *tag, void **bb_field,
instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust,
instr_t *app)
reg_id_t reg_ptr, int adjust, instr_t *app)
{
app_pc pc;
reg_id_t reg_tmp;
if (!memref_needs_full_info) {
// We write just once per bb, if not filtering.
if ((ptr_uint_t)*bb_field > MAX_INSTR_COUNT)
Expand All @@ -459,16 +483,21 @@ offline_instru_t::instrument_instr(void *drcontext, void *tag, void **bb_field,
// XXX: For repstr do we want tag insted of skipping rep prefix?
pc = instr_get_app_pc(app);
}
drreg_status_t res =
drreg_reserve_register(drcontext, ilist, where, reg_vector, &reg_tmp);
DR_ASSERT(res == DRREG_SUCCESS); // Can't recover.
adjust += insert_save_pc(drcontext, ilist, where, reg_ptr, reg_tmp, adjust,
pc, memref_needs_full_info ? 1 : (uint)(ptr_uint_t)*bb_field);
if (!memref_needs_full_info)
*(ptr_uint_t*)bb_field = MAX_INSTR_COUNT + 1;
res = drreg_unreserve_register(drcontext, ilist, where, reg_tmp);
DR_ASSERT(res == DRREG_SUCCESS); // Can't recover.
return adjust;
}

int
offline_instru_t::instrument_ibundle(void *drcontext, instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust,
reg_id_t reg_ptr, int adjust,
instr_t **delay_instrs, int num_delay_instrs)
{
// The post-processor fills in all instr info other than our once-per-bb entry.
Expand Down
29 changes: 24 additions & 5 deletions clients/drcachesim/tracer/instru_online.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@

online_instru_t::online_instru_t(void (*insert_load_buf)(void *, instrlist_t *,
instr_t *, reg_id_t),
bool memref_needs_info)
: instru_t(insert_load_buf, memref_needs_info)
bool memref_needs_info,
drvector_t *reg_vector)
: instru_t(insert_load_buf, memref_needs_info, reg_vector)
{
}

Expand Down Expand Up @@ -272,12 +273,16 @@ online_instru_t::insert_save_type_and_size(void *drcontext, instrlist_t *ilist,

int
online_instru_t::instrument_memref(void *drcontext, instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust,
reg_id_t reg_ptr, int adjust,
instr_t *app, opnd_t ref, bool write,
dr_pred_type_t pred)
{
ushort type = (ushort)(write ? TRACE_TYPE_WRITE : TRACE_TYPE_READ);
ushort size = (ushort)drutil_opnd_mem_size_in_bytes(ref, app);
reg_id_t reg_tmp;
drreg_status_t res =
drreg_reserve_register(drcontext, ilist, where, reg_vector, &reg_tmp);
DR_ASSERT(res == DRREG_SUCCESS); // Can't recover.
if (!memref_needs_full_info) // For full info we skip this for !pred
instrlist_set_auto_predicate(ilist, pred);
if (memref_needs_full_info) {
Expand All @@ -304,17 +309,23 @@ online_instru_t::instrument_memref(void *drcontext, instrlist_t *ilist, instr_t
insert_save_type_and_size(drcontext, ilist, where, reg_ptr, reg_tmp,
type, size, adjust);
instrlist_set_auto_predicate(ilist, DR_PRED_NONE);
res = drreg_unreserve_register(drcontext, ilist, where, reg_tmp);
DR_ASSERT(res == DRREG_SUCCESS); // Can't recover.
return (adjust + sizeof(trace_entry_t));
}

int
online_instru_t::instrument_instr(void *drcontext, void *tag, void **bb_field,
instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust,
reg_id_t reg_ptr, int adjust,
instr_t *app)
{
bool repstr_expanded = *bb_field != 0; // Avoid cl warning C4800.
app_pc pc = repstr_expanded ? dr_fragment_app_pc(tag) : instr_get_app_pc(app);
reg_id_t reg_tmp;
drreg_status_t res =
drreg_reserve_register(drcontext, ilist, where, reg_vector, &reg_tmp);
DR_ASSERT(res == DRREG_SUCCESS); // Can't recover.
// To handle zero-iter repstr loops this routine is called at the top of the bb
// where "app" is jecxz so we have to hardcode the rep str type and get length
// from the tag.
Expand All @@ -326,17 +337,23 @@ online_instru_t::instrument_instr(void *drcontext, void *tag, void **bb_field,
insert_save_type_and_size(drcontext, ilist, where, reg_ptr, reg_tmp,
type, size, adjust);
insert_save_pc(drcontext, ilist, where, reg_ptr, reg_tmp, pc, adjust);
res = drreg_unreserve_register(drcontext, ilist, where, reg_tmp);
DR_ASSERT(res == DRREG_SUCCESS); // Can't recover.
return (adjust + sizeof(trace_entry_t));
}

int
online_instru_t::instrument_ibundle(void *drcontext, instrlist_t *ilist, instr_t *where,
reg_id_t reg_ptr, reg_id_t reg_tmp, int adjust,
reg_id_t reg_ptr, int adjust,
instr_t **delay_instrs, int num_delay_instrs)
{
// Create and instrument for INSTR_BUNDLE
trace_entry_t entry;
int i;
reg_id_t reg_tmp;
drreg_status_t res =
drreg_reserve_register(drcontext, ilist, where, reg_vector, &reg_tmp);
DR_ASSERT(res == DRREG_SUCCESS); // Can't recover.
entry.type = TRACE_TYPE_INSTR_BUNDLE;
entry.size = 0;
for (i = 0; i < num_delay_instrs; i++) {
Expand All @@ -352,6 +369,8 @@ online_instru_t::instrument_ibundle(void *drcontext, instrlist_t *ilist, instr_t
entry.size = 0;
}
}
res = drreg_unreserve_register(drcontext, ilist, where, reg_tmp);
DR_ASSERT(res == DRREG_SUCCESS); // Can't recover.
return adjust;
}

Expand Down
8 changes: 6 additions & 2 deletions clients/drcachesim/tracer/raw2trace.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2016-2017 Google, Inc. All rights reserved.
* Copyright (c) 2016-2018 Google, Inc. All rights reserved.
* **********************************************************/

/*
Expand Down Expand Up @@ -376,6 +376,10 @@ raw2trace_t::append_memref(INOUT trace_entry_t **buf_in, uint tidx, instr_t *ins
}
// We take the full value, to handle low or high.
buf->addr = (addr_t) in_entry.combined_value;
if (opnd_is_near_base_disp(ref) && opnd_get_index(ref) == DR_REG_NULL) {
// We stored only the base reg, as an optimization.
buf->addr += opnd_get_disp(ref);
}
VPRINT(4, "Appended memref type %d size %d to " PFX "\n", buf->type, buf->size,
(ptr_uint_t)buf->addr);
*buf_in = ++buf;
Expand Down Expand Up @@ -550,7 +554,7 @@ raw2trace_t::merge_and_process_thread_files()
uint tidx = (uint)thread_files.size();
uint thread_count = (uint)thread_files.size();
offline_entry_t in_entry;
online_instru_t instru(NULL, false);
online_instru_t instru(NULL, false, NULL);
bool last_bb_handled = true;
std::vector<thread_id_t> tids(thread_files.size(), INVALID_THREAD_ID);
std::vector<uint64> times(thread_files.size(), 0);
Expand Down
Loading

0 comments on commit 9d56b63

Please sign in to comment.