From 1965352dc591799f6cb4686d2b4601ec0f3d4009 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 28 Aug 2018 15:16:19 -0700 Subject: [PATCH] i#3129 raw2trace perf: factored trace_converter_t out of raw2trace_t (#3149) Introducing trace_converter_t, a template factoring out of raw2trace_t the functionality necessary for building custom trace converters. Issue #3129 --- api/docs/release.dox | 1 + clients/drcachesim/tracer/raw2trace.cpp | 15 +- clients/drcachesim/tracer/raw2trace.h | 434 +++++++++++++++--------- 3 files changed, 284 insertions(+), 166 deletions(-) diff --git a/api/docs/release.dox b/api/docs/release.dox index c079ead973e..7195e1a3b63 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -247,6 +247,7 @@ Further non-compatibility-affecting changes include: #raw2trace_t for writing trace metadata: process/thread ids, timestamps, etc. - Added #trace_metadata_reader_t, a set of utilities for checking and validating thread start successions of offline entries in a raw data file. + - Added #trace_converter_t, an extensibility mechanism for raw trace conversion. - Added drmemtrace_get_timestamp_from_offline_trace(), an API for fetching the timestamp from the beginning of a raw trace bundle (regardless of whether it is a thread start or just a subsequent bundle). diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index 28dc3d728bd..ecaf3e39e5a 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -116,9 +116,6 @@ const char *(*module_mapper_t::user_parse)(const char *src, OUT void **data) = n void (*module_mapper_t::user_free)(void *data) = nullptr; bool module_mapper_t::has_custom_data_global = true; -const char *raw2trace_t::FAULT_INTERRUPTED_BB = "INTERRUPTED"; -const thread_id_t raw2trace_t::INVALID_THREAD_ID = 0; - module_mapper_t::module_mapper_t( const char *module_map_in, const char *(*parse_cb)(const char *src, OUT void **data), std::string (*process_cb)(drmodtrack_info_t *info, void *data, void *user_data), @@ -299,7 +296,7 @@ raw2trace_t::read_and_map_modules() return err; } - modvec_ptr = &module_mapper->get_loaded_modules(); + set_modvec(&module_mapper->get_loaded_modules()); return module_mapper->get_last_error(); } @@ -543,7 +540,7 @@ raw2trace_t::merge_and_process_thread_files() buf += trace_metadata_writer_t::write_timestamp(buf, (uintptr_t)times[tidx]); // We have to write this now before we append any bb entries. size = buf - buf_base; - CHECK((uint)size < MAX_COMBINED_ENTRIES, "Too many entries"); + CHECK((uint)size < WRITE_BUFFER_SIZE, "Too many entries"); if (!out_file->write((char *)buf_base, size)) return "Failed to write to output file"; buf = buf_base; @@ -780,19 +777,15 @@ raw2trace_t::raw2trace_t(const char *module_map_in, const std::vector &thread_files_in, std::ostream *out_file_in, void *dcontext_in, unsigned int verbosity_in) - : modmap(module_map_in) - , modvec_ptr(nullptr) + : trace_converter_t(dcontext_in) + , modmap(module_map_in) , thread_files(thread_files_in) , out_file(out_file_in) - , dcontext(dcontext_in) - , prev_instr_was_rep_string(false) - , instrs_are_separate(false) , verbosity(verbosity_in) , user_process(nullptr) , user_process_data(nullptr) { if (dcontext == NULL) { - dcontext = dr_standalone_init(); #ifdef ARM // We keep the mode at ARM and rely on LSB=1 offsets in the modoffs fields // to trigger Thumb decoding. diff --git a/clients/drcachesim/tracer/raw2trace.h b/clients/drcachesim/tracer/raw2trace.h index 1685b146bae..a1c5b798380 100644 --- a/clients/drcachesim/tracer/raw2trace.h +++ b/clients/drcachesim/tracer/raw2trace.h @@ -99,7 +99,7 @@ struct instr_summary_t final { } private: - friend class raw2trace_t; + template friend class trace_converter_t; byte length() const @@ -356,119 +356,94 @@ struct trace_header_t { uint64 timestamp; }; +// XXX: DR should export this +#define INVALID_THREAD_ID 0 + /** - * The raw2trace class converts the raw offline trace format to the format - * expected by analysis tools. It requires access to the binary files for the - * libraries and executable that were present during tracing. + * #trace_converter_t is a reusable component that encapsulates raw trace conversion. + * + * Conversion happens from a data source abstracted by the type parameter T. We make no + * assumption about how thread buffers are organized. We do assume the internal + * composition of thread buffers is "as written" by the thread. For example, all thread + * buffers belonging to different threads may be in a separate files; or buffers may be + * co-located in one large file, or spread accross multiple, mixed-thread files. + * + * #trace_converter_t expects to be instantiated with its type template T which should + * provide the following APIs: + * + *
  • const offline_entry_t *get_next_entry() + * + * Point to the next offline entry_t. There is no assumption about the underlying source + * of the data, and #trace_converter_t will not attempt to dereference past the provided + * pointer.
  • + * + *
  • void unread_last_entry() + * + * Ensure that the next call to get_next_entry() re-reads the last value.
  • + * + *
  • trace_entry_t *get_write_buffer() + * + * Return a writable buffer guaranteed to be at least #WRITE_BUFFER_SIZE large. + * get_write_buffer() may reuse the same buffer after write() or write_delayed_branches() + * is called.
  • + * + *
  • bool write(const trace_entry_t *start, const trace_entry_t *end) + * + * Writes the converted traces between start and end, where end is past the last + * item to write. Both start and end are assumed to be pointers inside a buffer + * returned by get_write_buffer().
  • + * + *
  • std::string write_delayed_branches(const trace_entry_t *start, const trace_entry_t + * *end) + * + * Similar to write(), but treat the provided traces as delayed branches: if they + * are the last values in a record, they belong to the next record of the same + * thread.
  • + * + *
  • std::string on_thread_end() + * + * Callback notifying the currently-processed thread has exited. #trace_converter_t + * extenders are expected to track record metadata themselves. #trace_converter_t offers + * APIs for extracting that metadata.
  • + * + *
  • void log(uint level, const char *fmt, ...) + * + * Implementers are given the opportunity to implement their own logging. The level + * parameter represents severity: the lower the level, the higher the severity.
  • + * + *
  • const instr_summary_t *get_instr_summary(uint64 modx, uint64 modoffs, INOUT app_pc + * *pc, app_pc orig) + * + * Return the #instr_summary_t representation of the instruction at *pc, + * updating the value at pc to the PC of the next instruction. It is assumed the app + * binaries have already been loaded using #module_mapper_t, and the values at *pc point + * within memory mapped by the module mapper. This API provides an opportunity to cache + * decoded instructions.
  • + *
*/ -class raw2trace_t { +template class trace_converter_t { #define DR_CHECK(val, msg) \ do { \ if (!(val)) \ return msg; \ } while (0) -public: - // module_map, thread_files and out_file are all owned and opened/closed by the - // caller. module_map is not a string and can contain binary data. - raw2trace_t(const char *module_map, const std::vector &thread_files, - std::ostream *out_file, void *dcontext = NULL, - unsigned int verbosity = 0); - ~raw2trace_t(); - +protected: /** - * Adds handling for custom data fields that were stored with each module via - * drmemtrace_custom_module_data() during trace generation. When do_conversion() - * or do_module_parsing() is subsequently called, its parsing of the module data - * will invoke \p parse_cb, which should advance the module data pointer passed - * in \p src and return it as its return value (or nullptr on error), returning - * the resulting parsed data in \p data. The \p data pointer will later be - * passed to both \p process_cb, which can update the module path inside \p info - * (and return a non-empty string on error), and \b free_cb, which can perform - * cleanup. - * - * A custom callback value \p process_cb_user_data can be passed to \p - * process_cb. The same is not provided for the other callbacks as they end up - * using the drmodtrack_add_custom_data() framework where there is no support for - * custom callback parameters. - * - * Returns a non-empty error message on failure. - */ - std::string - handle_custom_data(const char *(*parse_cb)(const char *src, OUT void **data), - std::string (*process_cb)(drmodtrack_info_t *info, void *data, - void *user_data), - void *process_cb_user_data, void (*free_cb)(void *data)); - - /** - * Performs the first step of do_conversion() without further action: parses and - * iterates over the list of modules. This is provided to give the user a method - * for iterating modules in the presence of the custom field used by drmemtrace - * that prevents direct use of drmodtrack_offline_read(). - * On success, calls the \p process_cb function passed to handle_custom_data() - * for every module in the list, and returns an empty string at the end. - * Returns a non-empty error message on failure. - * - * \deprecated #module_mapper_t should be used instead. + * Construct a new #trace_converter_t object. If a nullptr dcontext_in is passed, + * creates a new DR context va dr_standalone_init(). */ - std::string - do_module_parsing(); - - /** - * This interface is meant to be used with a final trace rather than a raw - * trace, using the module log file saved from the raw2trace conversion. - * This routine first calls do_module_parsing() and then maps each module into - * the current address space, allowing the user to augment the instruction - * information in the trace with additional information by decoding the - * instruction bytes. The routine find_mapped_trace_address() should be used - * to convert from memref_t.instr.addr to the corresponding mapped address in - * the current process. - * Returns a non-empty error message on failure. - * - * \deprecated #module_mapper_t::get_loaded_modules() should be used instead. - */ - std::string - do_module_parsing_and_mapping(); - - /** - * This interface is meant to be used with a final trace rather than a raw - * trace, using the module log file saved from the raw2trace conversion. - * When do_module_parsing_and_mapping() has been called, this routine can be used - * to convert an instruction program counter in a trace into an address in the - * current process where the instruction bytes for that instruction are mapped, - * allowing decoding for obtaining further information than is stored in the trace. - * Returns a non-empty error message on failure. - * - * \deprecated #module_mapper_t::find_mapped_trace_address() should be used instead. - */ - std::string - find_mapped_trace_address(app_pc trace_address, OUT app_pc *mapped_address); + trace_converter_t(void *dcontext_in) + : dcontext(dcontext_in == nullptr ? dr_standalone_init() : dcontext_in) + { + } /** - * Performs the conversion from raw data to finished trace files. - * Returns a non-empty error message on failure. + * Convert starting from in_entry, and reading more entries as required. + * Sets end_of_record to true if processing hit the end of a record. + * set_modvec() must have been called by the implementation before calling this API. */ std::string - do_conversion(); - - static std::string - check_thread_file(std::istream *f); - -private: - // We store this in drmodtrack_info_t.custom to combine our binary contents - // data with any user-added module data from drmemtrace_custom_module_data. - struct custom_module_data_t { - size_t contents_size; - const char *contents; - void *user_data; - }; - - raw2trace_t * - impl() - { - return this; - } - std::string process_offline_entry(const offline_entry_t *in_entry, thread_id_t tid, OUT bool *end_of_record, OUT bool *last_bb_handled) { @@ -535,13 +510,19 @@ class raw2trace_t { return ss.str(); } size_t size = reinterpret_cast(buf) - buf_base; - DR_CHECK((uint)size < MAX_COMBINED_ENTRIES, "Too many entries"); + DR_CHECK((uint)size < WRITE_BUFFER_SIZE, "Too many entries"); if (size > 0) { if (!impl()->write(buf_base, reinterpret_cast(buf))) return "Failed to write to output file"; } return ""; } + + /** + * Read the header of a thread, by calling T's get_next_entry() successively to + * populate the header values. The timestamp field is populated only + * for legacy traces. + */ std::string read_header(OUT trace_header_t *header) { @@ -565,27 +546,41 @@ class raw2trace_t { return ""; } - const offline_entry_t * - get_next_entry(); - void - unread_last_entry(); - trace_entry_t * - get_write_buffer(); - bool - write(const trace_entry_t *start, const trace_entry_t *end); - std::string - write_delayed_branches(const trace_entry_t *start, const trace_entry_t *end); - std::string - on_thread_end(); + /** + * The trace_entry_t buffer returned by get_write_buffer() is assumed to be at least + * #WRITE_BUFFER_SIZE large. + */ + static const uint WRITE_BUFFER_SIZE = 64; + + /** + * The pointer to the DR context. + */ + void *const dcontext; + + /** + * Get the module map. + */ + const std::vector & + modvec() const + { + return *modvec_ptr; + } + + /** + * Set the module map. Must be called before process_offline_entry() is called. + */ void - log(uint level, const char *fmt, ...); + set_modvec(const std::vector *modvec_in) + { + modvec_ptr = modvec_in; + } - const instr_summary_t * - get_instr_summary(uint64 modx, uint64 modoffs, INOUT app_pc *pc, app_pc orig); - std::string - read_and_map_modules(); - std::string - merge_and_process_thread_files(); +private: + T * + impl() + { + return static_cast(this); + } std::string append_bb_entries(const offline_entry_t *in_entry, OUT bool *handled) { @@ -628,8 +623,8 @@ class raw2trace_t { // To avoid repeatedly decoding the same instruction on every one of its // dynamic executions, we cache the decoding in a hashtable. pc = decode_pc; - instr = get_instr_summary(in_entry->pc.modidx, in_entry->pc.modoffs, &pc, - orig_pc); + instr = impl()->get_instr_summary(in_entry->pc.modidx, in_entry->pc.modoffs, + &pc, orig_pc); if (instr == nullptr) { // We hit some error somewhere, and already reported it. Just exit the // loop. @@ -664,25 +659,29 @@ class raw2trace_t { if ((!instrs_are_separate || skip_icache) && // Rule out OP_lea. (instr->reads_memory() || instr->writes_memory())) { + bool interrupted = false; for (uint j = 0; j < instr->num_mem_srcs(); j++) { - error = append_memref(&buf, tidx, instr, instr->mem_src_at(j), false); - if (error == FAULT_INTERRUPTED_BB) { + error = append_memref(&buf, instr, instr->mem_src_at(j), false, + &interrupted); + if (!error.empty()) + return error; + if (interrupted) { truncated = true; break; - } else if (!error.empty()) - return error; + } } for (uint j = 0; !truncated && j < instr->num_mem_dests(); j++) { - error = append_memref(&buf, tidx, instr, instr->mem_dest_at(j), true); - if (error == FAULT_INTERRUPTED_BB) { + error = append_memref(&buf, instr, instr->mem_dest_at(j), true, + &interrupted); + if (!error.empty()) + return error; + if (interrupted) { truncated = true; break; - } else if (!error.empty()) - return error; + } } } - DR_CHECK((size_t)(buf - buf_start) < MAX_COMBINED_ENTRIES, - "Too many entries"); + DR_CHECK((size_t)(buf - buf_start) < WRITE_BUFFER_SIZE, "Too many entries"); if (instr->is_cti()) { // In case this is the last branch prior to a thread switch, buffer it. We // avoid swapping threads immediately after a branch so that analyzers can @@ -704,12 +703,11 @@ class raw2trace_t { return ""; } - // Returns FAULT_INTERRUPTED_BB if a fault occurred on this memref. - // Any other non-empty string is a fatal error. std::string - append_memref(INOUT trace_entry_t **buf_in, uint tidx, const instr_summary_t *instr, - opnd_t ref, bool write) + append_memref(INOUT trace_entry_t **buf_in, const instr_summary_t *instr, opnd_t ref, + bool write, OUT bool *interrupted) { + *interrupted = false; trace_entry_t *buf = *buf_in; const offline_entry_t *in_entry = impl()->get_next_entry(); bool have_type = false; @@ -782,11 +780,153 @@ class raw2trace_t { in_entry->extended.valueB == TRACE_MARKER_TYPE_KERNEL_EVENT) { // A signal/exception interrupted the bb after the memref. impl()->log(4, "Signal/exception interrupted the bb\n"); - return FAULT_INTERRUPTED_BB; + *interrupted = true; } return ""; } + const std::vector *modvec_ptr = nullptr; + bool prev_instr_was_rep_string = false; + // This indicates that each memref has its own PC entry and that each + // icache entry does not need to be considered a memref PC entry as well. + bool instrs_are_separate = false; + + trace_converter_t(const trace_converter_t &) = delete; + trace_converter_t & + operator=(const trace_converter_t &) = delete; +#ifndef WINDOWS + trace_converter_t(trace_converter_t &&) = default; + trace_converter_t & + operator=(trace_converter_t &&) = default; +#endif +#undef DR_CHECK +}; + +/** + * The raw2trace class converts the raw offline trace format to the format + * expected by analysis tools. It requires access to the binary files for the + * libraries and executable that were present during tracing. + */ +class raw2trace_t : public trace_converter_t { +public: + // module_map, thread_files and out_file are all owned and opened/closed by the + // caller. module_map is not a string and can contain binary data. + raw2trace_t(const char *module_map, const std::vector &thread_files, + std::ostream *out_file, void *dcontext = NULL, + unsigned int verbosity = 0); + ~raw2trace_t(); + + /** + * Adds handling for custom data fields that were stored with each module via + * drmemtrace_custom_module_data() during trace generation. When do_conversion() + * or do_module_parsing() is subsequently called, its parsing of the module data + * will invoke \p parse_cb, which should advance the module data pointer passed + * in \p src and return it as its return value (or nullptr on error), returning + * the resulting parsed data in \p data. The \p data pointer will later be + * passed to both \p process_cb, which can update the module path inside \p info + * (and return a non-empty string on error), and \b free_cb, which can perform + * cleanup. + * + * A custom callback value \p process_cb_user_data can be passed to \p + * process_cb. The same is not provided for the other callbacks as they end up + * using the drmodtrack_add_custom_data() framework where there is no support for + * custom callback parameters. + * + * Returns a non-empty error message on failure. + */ + std::string + handle_custom_data(const char *(*parse_cb)(const char *src, OUT void **data), + std::string (*process_cb)(drmodtrack_info_t *info, void *data, + void *user_data), + void *process_cb_user_data, void (*free_cb)(void *data)); + + /** + * Performs the first step of do_conversion() without further action: parses and + * iterates over the list of modules. This is provided to give the user a method + * for iterating modules in the presence of the custom field used by drmemtrace + * that prevents direct use of drmodtrack_offline_read(). + * On success, calls the \p process_cb function passed to handle_custom_data() + * for every module in the list, and returns an empty string at the end. + * Returns a non-empty error message on failure. + * + * \deprecated #module_mapper_t should be used instead. + */ + std::string + do_module_parsing(); + + /** + * This interface is meant to be used with a final trace rather than a raw + * trace, using the module log file saved from the raw2trace conversion. + * This routine first calls do_module_parsing() and then maps each module into + * the current address space, allowing the user to augment the instruction + * information in the trace with additional information by decoding the + * instruction bytes. The routine find_mapped_trace_address() should be used + * to convert from memref_t.instr.addr to the corresponding mapped address in + * the current process. + * Returns a non-empty error message on failure. + * + * \deprecated #module_mapper_t::get_loaded_modules() should be used instead. + */ + std::string + do_module_parsing_and_mapping(); + + /** + * This interface is meant to be used with a final trace rather than a raw + * trace, using the module log file saved from the raw2trace conversion. + * When do_module_parsing_and_mapping() has been called, this routine can be used + * to convert an instruction program counter in a trace into an address in the + * current process where the instruction bytes for that instruction are mapped, + * allowing decoding for obtaining further information than is stored in the trace. + * Returns a non-empty error message on failure. + * + * \deprecated #module_mapper_t::find_mapped_trace_address() should be used instead. + */ + std::string + find_mapped_trace_address(app_pc trace_address, OUT app_pc *mapped_address); + + /** + * Performs the conversion from raw data to finished trace files. + * Returns a non-empty error message on failure. + */ + std::string + do_conversion(); + + static std::string + check_thread_file(std::istream *f); + +private: + friend class trace_converter_t; + + // We store this in drmodtrack_info_t.custom to combine our binary contents + // data with any user-added module data from drmemtrace_custom_module_data. + struct custom_module_data_t { + size_t contents_size; + const char *contents; + void *user_data; + }; + + // interface expected by trace_converter_t + const offline_entry_t * + get_next_entry(); + void + unread_last_entry(); + trace_entry_t * + get_write_buffer(); + bool + write(const trace_entry_t *start, const trace_entry_t *end); + std::string + write_delayed_branches(const trace_entry_t *start, const trace_entry_t *end); + std::string + on_thread_end(); + void + log(uint level, const char *fmt, ...); + const instr_summary_t * + get_instr_summary(uint64 modx, uint64 modoffs, INOUT app_pc *pc, app_pc orig); + + std::string + read_and_map_modules(); + std::string + merge_and_process_thread_files(); std::string append_delayed_branch(uint tidx); @@ -800,26 +940,12 @@ class raw2trace_t { bool thread_file_at_eof(uint tidx); - const std::vector & - modvec() const - { - return *modvec_ptr; - } std::vector> pre_read; - static const uint MAX_COMBINED_ENTRIES = 64; - static const char *FAULT_INTERRUPTED_BB; - static const thread_id_t INVALID_THREAD_ID; - const char *modmap; - const std::vector *modvec_ptr; std::vector thread_files; std::ostream *out_file; - void *dcontext; - bool prev_instr_was_rep_string; - // This indicates that each memref has its own PC entry and that each - // icache entry does not need to be considered a memref PC entry as well. - bool instrs_are_separate; + unsigned int verbosity; // We use a hashtable to cache decodings. We compared the performance of // hashtable_t to std::map.find, std::map.lower_bound, std::tr1::unordered_map, @@ -842,10 +968,8 @@ class raw2trace_t { // Current trace conversion state. offline_entry_t last_entry; uint tidx = 0; - trace_entry_t out_buf[MAX_COMBINED_ENTRIES]; + trace_entry_t out_buf[WRITE_BUFFER_SIZE]; uint thread_count = 0; - -#undef DR_CHECK }; #endif /* _RAW2TRACE_H_ */