Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#3129 raw2trace perf: summarize instr_t during raw2trace conversion #3130

Merged
merged 10 commits into from
Aug 21, 2018
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 130 additions & 53 deletions clients/drcachesim/tracer/raw2trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@
} \
} while (0)

#define DO_VERBOSE(level, x) \
#define DO_VERBOSE(level, x, verbosity) \
do { \
if (this->verbosity >= (level)) { \
if ((verbosity) >= (level)) { \
x; /* ; makes vera++ happy */ \
} \
} while (0)
Expand Down Expand Up @@ -422,8 +422,8 @@ raw2trace_t::thread_file_at_eof(uint tidx)
// Returns FAULT_INTERRUPTED_BB if a fault occurred on this memref.
// Any other non-empty string is a fatal error.
std::string
raw2trace_t::append_memref(INOUT trace_entry_t **buf_in, uint tidx, instr_t *instr,
opnd_t ref, bool write)
raw2trace_t::append_memref(INOUT trace_entry_t **buf_in, uint tidx,
const instr_summary_t *instr, opnd_t ref, bool write)
{
trace_entry_t *buf = *buf_in;
offline_entry_t in_entry;
Expand Down Expand Up @@ -458,10 +458,10 @@ raw2trace_t::append_memref(INOUT trace_entry_t **buf_in, uint tidx, instr_t *ins
return "";
}
if (!have_type) {
if (instr_is_prefetch(instr)) {
buf->type = instru_t::instr_to_prefetch_type(instr);
if (instr->is_prefetch()) {
buf->type = instr->prefetch_type();
buf->size = 1;
} else if (instru_t::instr_is_flush(instr)) {
} else if (instr->is_flush()) {
buf->type = TRACE_TYPE_DATA_FLUSH;
buf->size = (ushort)opnd_size_in_bytes(opnd_get_size(ref));
} else {
Expand Down Expand Up @@ -504,7 +504,7 @@ std::string
raw2trace_t::append_bb_entries(uint tidx, offline_entry_t *in_entry, OUT bool *handled)
{
uint instr_count = in_entry->pc.instr_count;
instr_t *instr;
const instr_summary_t *instr;
trace_entry_t buf_start[MAX_COMBINED_ENTRIES];
app_pc start_pc = modvec()[in_entry->pc.modidx].map_base + in_entry->pc.modoffs;
app_pc pc, decode_pc = start_pc;
Expand Down Expand Up @@ -538,29 +538,16 @@ raw2trace_t::append_bb_entries(uint tidx, offline_entry_t *in_entry, OUT bool *h
modvec()[in_entry->pc.modidx].orig_base;
// To avoid repeatedly decoding the same instruction on every one of its
// dynamic executions, we cache the decoding in a hashtable.
instr = (instr_t *)hashtable_lookup(&decode_cache, decode_pc);
if (instr == NULL) {
instr = instr_create(dcontext);
// We assume the default ISA mode and currently require the 32-bit
// postprocessor for 32-bit applications.
pc = decode(dcontext, decode_pc, instr);
if (pc == NULL || !instr_valid(instr)) {
WARN("Encountered invalid/undecodable instr @ %s+" PFX,
modvec()[in_entry->pc.modidx].path,
(ptr_uint_t)in_entry->pc.modoffs);
break;
}
hashtable_add(&decode_cache, decode_pc, instr);
} else {
pc = instr_get_raw_bits(instr) + instr_length(dcontext, instr);
pc = decode_pc;
instr =
get_instr_summary(in_entry->pc.modidx, in_entry->pc.modoffs, &pc, orig_pc);
if (instr == nullptr) {
// We hit some error somewhere, and already reported it. Just exit the loop.
break;
}
DO_VERBOSE(3, {
instr_set_translation(instr, orig_pc);
dr_print_instr(dcontext, STDOUT, instr, "");
});
CHECK(!instr_is_cti(instr) || i == instr_count - 1, "invalid cti");
CHECK(!instr->is_cti() || i == instr_count - 1, "invalid cti");
// FIXME i#1729: make bundles via lazy accum until hit memref/end.
buf->type = instru_t::instr_to_instr_type(instr);
buf->type = instr->type();
if (buf->type == TRACE_TYPE_INSTR_MAYBE_FETCH) {
// We want it to look like the original rep string, with just one instr
// fetch for the whole loop, instead of the drutil-expanded loop.
Expand All @@ -577,40 +564,36 @@ raw2trace_t::append_bb_entries(uint tidx, offline_entry_t *in_entry, OUT bool *h
}
} else
prev_instr_was_rep_string = false;
buf->size = (ushort)(skip_icache ? 0 : instr_length(dcontext, instr));
buf->size = (ushort)(skip_icache ? 0 : instr->length());
buf->addr = (addr_t)orig_pc;
++buf;
decode_pc = pc;
// We need to interleave instrs with memrefs.
// There is no following memref for (instrs_are_separate && !skip_icache).
if ((!instrs_are_separate || skip_icache) &&
// Rule out OP_lea.
(instr_reads_memory(instr) || instr_writes_memory(instr))) {
for (int j = 0; j < instr_num_srcs(instr); j++) {
if (opnd_is_memory_reference(instr_get_src(instr, j))) {
std::string error =
append_memref(&buf, tidx, instr, instr_get_src(instr, j), false);
if (error == FAULT_INTERRUPTED_BB) {
truncated = true;
break;
} else if (!error.empty())
return error;
}
(instr->reads_memory() || instr->writes_memory())) {
for (uint j = 0; j < instr->num_mem_srcs(); j++) {
std::string error =
append_memref(&buf, tidx, instr, instr->mem_src_at(j), false);
if (error == FAULT_INTERRUPTED_BB) {
truncated = true;
break;
} else if (!error.empty())
return error;
}
for (int j = 0; !truncated && j < instr_num_dsts(instr); j++) {
if (opnd_is_memory_reference(instr_get_dst(instr, j))) {
std::string error =
append_memref(&buf, tidx, instr, instr_get_dst(instr, j), true);
if (error == FAULT_INTERRUPTED_BB) {
truncated = true;
break;
} else if (!error.empty())
return error;
}
for (uint j = 0; !truncated && j < instr->num_mem_dests(); j++) {
std::string error =
append_memref(&buf, tidx, instr, instr->mem_dest_at(j), true);
if (error == FAULT_INTERRUPTED_BB) {
truncated = true;
break;
} else if (!error.empty())
return error;
}
}
CHECK((size_t)(buf - buf_start) < MAX_COMBINED_ENTRIES, "Too many entries");
if (instr_is_cti(instr)) {
if (instr->is_cti()) {
CHECK(delayed_branch[tidx].empty(), "Failed to flush delayed branch");
// In case this is the last branch prior to a thread switch, buffer it. We
// avoid swapping threads immediately after a branch so that analyzers can
Expand Down Expand Up @@ -878,6 +861,100 @@ raw2trace_t::do_conversion()
return "";
}

const instr_summary_t *
raw2trace_t::get_instr_summary(uint64 modidx, uint64 modoffs, INOUT app_pc *pc,
app_pc orig)
{
const app_pc decode_pc = *pc;
const instr_summary_t *ret =
static_cast<const instr_summary_t *>(hashtable_lookup(&decode_cache, decode_pc));
if (ret == nullptr) {
instr_summary_t *desc = new instr_summary_t();
if (!instr_summary_t::construct(dcontext, pc, orig, desc, verbosity)) {
WARN("Encountered invalid/undecodable instr @ %s+" PFX,
modvec()[static_cast<size_t>(modidx)].path, (ptr_uint_t)modoffs);
return nullptr;
}
hashtable_add(&decode_cache, decode_pc, desc);
ret = desc;
} else {
/* XXX i#3129 Log some rendering of the instruction summary that will be returned.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: missing :

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

*/
*pc = ret->next_pc();
}
return ret;
}

bool
instr_summary_t::construct(void *dcontext, INOUT app_pc *pc, app_pc orig_pc,
OUT instr_summary_t *desc, uint verbosity)
{
struct instr_destroy_t {
instr_destroy_t(void *dcontext_in, instr_t *instr_in)
: dcontext(dcontext_in)
, instr(instr_in)
{
}
void *dcontext;
instr_t *instr;
~instr_destroy_t()
{
instr_destroy(dcontext, instr);
}
};

instr_t *instr = instr_create(dcontext);
instr_destroy_t instr_collector(dcontext, instr);

*pc = decode(dcontext, *pc, instr);
if (*pc == nullptr || !instr_valid(instr)) {
return false;
}
DO_VERBOSE(3,
{
instr_set_translation(instr, orig_pc);
dr_print_instr(dcontext, STDOUT, instr, "");
},
verbosity);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems easier to have verbosity ordered before the statement?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The macro is used exactly in one place, so chose to delete the macro and write the explicit check - imho, easier to understand, and one less macro.

desc->next_pc_ = *pc;
desc->packed_ = 0;

bool is_prefetch = instr_is_prefetch(instr);
bool reads_memory = instr_reads_memory(instr);
bool writes_memory = instr_writes_memory(instr);

if (reads_memory)
desc->packed_ |= kReadsMemMask;
if (writes_memory)
desc->packed_ |= kWritesMemMask;
if (is_prefetch)
desc->packed_ |= kIsPrefetchMask;
if (instru_t::instr_is_flush(instr))
desc->packed_ |= kIsFlushMask;
if (instr_is_cti(instr))
desc->packed_ |= kIsCtiMask;

desc->type_ = instru_t::instr_to_instr_type(instr);
desc->prefetch_type_ = is_prefetch ? instru_t::instr_to_prefetch_type(instr) : 0;
desc->length_ = static_cast<byte>(instr_length(dcontext, instr));

if (reads_memory || writes_memory) {
for (int i = 0, e = instr_num_srcs(instr); i < e; ++i) {
opnd_t op = instr_get_src(instr, i);
if (opnd_is_memory_reference(op))
desc->mem_srcs_and_dests_.push_back(op);
}
desc->num_mem_srcs_ = static_cast<uint8_t>(desc->mem_srcs_and_dests_.size());

for (int i = 0, e = instr_num_dsts(instr); i < e; ++i) {
opnd_t op = instr_get_dst(instr, i);
if (opnd_is_memory_reference(op))
desc->mem_srcs_and_dests_.push_back(op);
}
}
return true;
}

raw2trace_t::raw2trace_t(const char *module_map_in,
const std::vector<std::istream *> &thread_files_in,
std::ostream *out_file_in, void *dcontext_in,
Expand Down Expand Up @@ -919,7 +996,7 @@ raw2trace_t::~raw2trace_t()
// so we have to explicitly free the payloads.
for (uint i = 0; i < HASHTABLE_SIZE(decode_cache.table_bits); i++) {
for (hash_entry_t *e = decode_cache.table[i]; e != NULL; e = e->next) {
instr_destroy(dcontext, (instr_t *)e->payload);
delete (static_cast<instr_summary_t *>(e->payload));
}
}
hashtable_delete(&decode_cache);
Expand Down
Loading