diff --git a/api/docs/release.dox b/api/docs/release.dox index 87e9a33166b..3dd1c281755 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -183,6 +183,12 @@ Further non-compatibility-affecting changes include: They can be created with opnd_create_reg_element_vector(), detected with opnd_is_element_vector_reg() and have their element size retrieved by opnd_get_vector_element_size(). + - Deprecated the drmemtrace analysis tool functions initialize() and + parallel_shard_init(), replacing them with initialize_stream() and + parallel_shard_init_stream(). The old versions will continue to work. + +************************************************** +
The changes between version 9.0.1 and 9.0.0 include the following compatibility changes: @@ -208,6 +214,9 @@ Further non-compatibility-affecting changes include: - Added -tool_dir drrun/drconfig parameter to control where to look for tool config files. +************************************************** +
+ The changes between version 9.0.0 and 8.0.0 include the following compatibility changes: diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index 4b8b75845cc..3db2a9a9fbd 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -250,6 +250,7 @@ link_with_pthread(drmemtrace_analyzer) install_client_nonDR_header(drmemtrace common/utils.h) install_client_nonDR_header(drmemtrace common/trace_entry.h) install_client_nonDR_header(drmemtrace common/memref.h) +install_client_nonDR_header(drmemtrace common/memtrace_stream.h) install_client_nonDR_header(drmemtrace reader/reader.h) install_client_nonDR_header(drmemtrace analysis_tool.h) install_client_nonDR_header(drmemtrace analyzer.h) diff --git a/clients/drcachesim/analysis_tool.h b/clients/drcachesim/analysis_tool.h index 4d881060eca..aa43710d5bb 100644 --- a/clients/drcachesim/analysis_tool.h +++ b/clients/drcachesim/analysis_tool.h @@ -44,6 +44,7 @@ // To support installation of headers for analysis tools into a single // separate directory we omit common/ here and rely on -I. #include "memref.h" +#include "memtrace_stream.h" #include /** @@ -93,17 +94,31 @@ class analysis_tool_t { : success_(true) {}; virtual ~analysis_tool_t() {}; /**< Destructor. */ /** - * Tools are encouraged to perform any initialization that might fail here rather - * than in the constructor. On an error, this returns an error string. On success, - * it returns "". + * \deprecated The initialize_stream() function is called by the analyzer; this + * function is only called if the default implementation of initialize_stream() is + * left in place and it calls this version. On an error, this returns an error + * string. On success, it returns "". */ virtual std::string initialize() { return ""; } + /** + * Tools are encouraged to perform any initialization that might fail here rather + * than in the constructor. The \p serial_stream interface allows tools to query + * details of the underlying trace during serial operation; it is nullptr for + * parallel operation (a per-shard version is passed to parallel_shard_init_stream()). + * On an error, this returns an error string. On success, it returns "". + */ + virtual std::string + initialize_stream(memtrace_stream_t *serial_stream) + { + return initialize(); + } /** Returns whether the tool was created successfully. */ - virtual bool operator!() + virtual bool + operator!() { return !success_; } @@ -136,7 +151,7 @@ class analysis_tool_t { /** * Returns whether this tool supports analyzing trace shards concurrently, or * whether it needs to see a single thread-interleaved stream of traced - * events. + * events. This may be called prior to initialize(). */ virtual bool parallel_shard_supported() @@ -168,20 +183,32 @@ class analysis_tool_t { return ""; } /** - * Invoked once for each trace shard prior to calling parallel_shard_memref() for - * that shard, this allows a tool to create data local to a shard. The \p - * shard_index is a unique identifier allowing shard data to be stored into a - * global table if desired (typically for aggregation use in print_results()). - * The \p worker_data is the return value of parallel_worker_init() for the - * worker thread who will exclusively operate on this shard. The return value - * here will be passed to each invocation of parallel_shard_memref() for that - * same shard. + * \deprecated The parallel_shard_init_stream() is what is called by the analyzer; + * this function is only called if the default implementation of + * parallel_shard_init_stream() is left in place and it calls this version. */ virtual void * parallel_shard_init(int shard_index, void *worker_data) { return nullptr; } + /** + * Invoked once for each trace shard prior to calling parallel_shard_memref() for + * that shard, this allows a tool to create data local to a shard. The \p + * shard_index is a unique identifier allowing shard data to be stored into a global + * table if desired (typically for aggregation use in print_results()). The \p + * worker_data is the return value of parallel_worker_init() for the worker thread + * who will exclusively operate on this shard. The \p shard_stream allows tools to + * query details of the underlying trace shard during parallel operation; it is + * valid only until parallel_shard_exit() is called. The return value here will be + * passed to each invocation of parallel_shard_memref() for that same shard. + */ + virtual void * + parallel_shard_init_stream(int shard_index, void *worker_data, + memtrace_stream_t *shard_stream) + { + return parallel_shard_init(shard_index, worker_data); + } /** * Invoked once when all trace entries for a shard have been processed. \p * shard_data is the value returned by parallel_shard_init() for this shard. diff --git a/clients/drcachesim/analyzer.cpp b/clients/drcachesim/analyzer.cpp index 6ff4680db27..0a79ae7a9c4 100644 --- a/clients/drcachesim/analyzer.cpp +++ b/clients/drcachesim/analyzer.cpp @@ -45,6 +45,7 @@ # include "reader/snappy_file_reader.h" #endif #include "common/utils.h" +#include "memtrace_stream.h" #ifdef HAS_ZLIB // Even if the file is uncompressed, zlib's gzip interface is faster than @@ -182,6 +183,11 @@ analyzer_t::analyzer_t(const std::string &trace_path, analysis_tool_t **tools, , parallel_(true) , worker_count_(worker_count) { + if (!init_file_reader(trace_path)) { + success_ = false; + error_string_ = "Failed to create reader"; + return; + } for (int i = 0; i < num_tools; ++i) { if (tools_[i] == NULL || !*tools_[i]) { success_ = false; @@ -190,15 +196,13 @@ analyzer_t::analyzer_t(const std::string &trace_path, analysis_tool_t **tools, error_string_ += ": " + tools_[i]->get_error_string(); return; } - const std::string error = tools_[i]->initialize(); + const std::string error = tools_[i]->initialize_stream(serial_trace_iter_.get()); if (!error.empty()) { success_ = false; error_string_ = "Tool failed to initialize: " + error; return; } } - if (!init_file_reader(trace_path)) - success_ = false; } analyzer_t::analyzer_t(const std::string &trace_path) @@ -264,8 +268,10 @@ analyzer_t::process_tasks(std::vector *tasks) return; } std::vector shard_data(num_tools_); - for (int i = 0; i < num_tools_; ++i) - shard_data[i] = tools_[i]->parallel_shard_init(tdata->index, worker_data[i]); + for (int i = 0; i < num_tools_; ++i) { + shard_data[i] = tools_[i]->parallel_shard_init_stream( + tdata->index, worker_data[i], tdata->iter.get()); + } VPRINT(this, 1, "shard_data[0] is %p\n", shard_data[0]); for (; *tdata->iter != *trace_end_; ++(*tdata->iter)) { for (int i = 0; i < num_tools_; ++i) { diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp index c7ae3692c72..2ffc82f5c56 100644 --- a/clients/drcachesim/analyzer_multi.cpp +++ b/clients/drcachesim/analyzer_multi.cpp @@ -151,6 +151,10 @@ analyzer_multi_t::analyzer_multi_t() if (!init_file_reader(op_infile.get_value(), op_verbose.get_value())) success_ = false; } + if (!init_analysis_tools()) { + success_ = false; + return; + } // We can't call serial_trace_iter_->init() here as it blocks for ipc_reader_t. } @@ -170,14 +174,10 @@ analyzer_multi_t::create_analysis_tools() tools_[0] = drmemtrace_analysis_tool_create(); if (tools_[0] == NULL) return false; - std::string tool_error; if (!*tools_[0]) { - tool_error = tools_[0]->get_error_string(); + std::string tool_error = tools_[0]->get_error_string(); if (tool_error.empty()) tool_error = "no error message provided."; - } else - tool_error = tools_[0]->initialize(); - if (!tool_error.empty()) { error_string_ = "Tool failed to initialize: " + tool_error; delete tools_[0]; tools_[0] = NULL; @@ -229,8 +229,6 @@ analyzer_multi_t::create_analysis_tools() serial_schedule_file_.get(), cpu_schedule_file_.get()); if (tools_[1] == NULL) return false; - if (!!*tools_[1]) - tools_[1]->initialize(); if (!*tools_[1]) { error_string_ = tools_[1]->get_error_string(); delete tools_[1]; @@ -242,6 +240,28 @@ analyzer_multi_t::create_analysis_tools() return true; } +bool +analyzer_multi_t::init_analysis_tools() +{ + std::string tool_error = tools_[0]->initialize_stream(serial_trace_iter_.get()); + if (!tool_error.empty()) { + error_string_ = "Tool failed to initialize: " + tool_error; + delete tools_[0]; + tools_[0] = NULL; + return false; + } + if (op_test_mode.get_value()) { + tools_[1]->initialize_stream(serial_trace_iter_.get()); + if (!*tools_[1]) { + error_string_ = tools_[1]->get_error_string(); + delete tools_[1]; + tools_[1] = NULL; + return false; + } + } + return true; +} + void analyzer_multi_t::destroy_analysis_tools() { diff --git a/clients/drcachesim/analyzer_multi.h b/clients/drcachesim/analyzer_multi.h index 2d7e4b264ae..5f688765c9e 100644 --- a/clients/drcachesim/analyzer_multi.h +++ b/clients/drcachesim/analyzer_multi.h @@ -49,6 +49,8 @@ class analyzer_multi_t : public analyzer_t { protected: bool create_analysis_tools(); + bool + init_analysis_tools(); void destroy_analysis_tools(); diff --git a/clients/drcachesim/common/memtrace_stream.h b/clients/drcachesim/common/memtrace_stream.h new file mode 100644 index 00000000000..e25bba70d40 --- /dev/null +++ b/clients/drcachesim/common/memtrace_stream.h @@ -0,0 +1,78 @@ +/* ********************************************************** + * Copyright (c) 2022 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of Google, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +/* memtrace_stream: an interface to access aspects of the full stream of memory + * trace records. + * + * We had considered other avenues for analysis_tool_t to obtain things like + * the record and instruction ordinals within the stream, in the presence of + * skipping: we could add fields to memref but we'd either have to append + * and have them at different offsets for each type or we'd have to break + * compatbility to prepend every time we added more; or we could add parameters + * to process_memref(). Passing an interface to the init routines seems + * the simplest and most flexible. + */ + +#ifndef _MEMTRACE_STREAM_H_ +#define _MEMTRACE_STREAM_H_ 1 + +/** + * @file drmemtrace/memtrace_stream.h + * @brief DrMemtrace interface for obtaining information from analysis + * tools on the full stream of memory reference records. + */ + +/** + * This is an interface for obtaining information from analysis tools + * on the full stream of memory reference records. + */ +class memtrace_stream_t { +public: + /** Destructor. */ + virtual ~memtrace_stream_t() + { + } + /** + * Returns the count of #memref_t records from the start of the trace to this point. + * This includes records skipped over and not presented to any tool. + */ + virtual uint64_t + get_record_ordinal() = 0; + /** + * Returns the count of instructions from the start of the trace to this point. + * This includes instructions skipped over and not presented to any tool. + */ + virtual uint64_t + get_instruction_ordinal() = 0; +}; + +#endif /* _MEMTRACE_STREAM_H_ */ diff --git a/clients/drcachesim/docs/drcachesim.dox.in b/clients/drcachesim/docs/drcachesim.dox.in index d98d14dbdee..c75af89da19 100644 --- a/clients/drcachesim/docs/drcachesim.dox.in +++ b/clients/drcachesim/docs/drcachesim.dox.in @@ -638,87 +638,63 @@ disassembling instructions in AT&T, Intel, Arm, or DR format (to see disassembly for online traces, pass the `-instr_encodings` option). The -skip_refs and -sim_refs flags can be used to set a start point and end point for the disassembled view. Note that these -flags compute the number of instructions which are skipped or displayed which -is distinct from the number of trace entries. +flags compute the number of trace entry records which are skipped or displayed which +is distinct from the number of instruction records. The tool displays loads and stores, as well as metadata marker entries for timestamps, on which core and thread the subsequent instruction sequence was executed, and kernel and system call transfers (these correspond to signal or event handler interruptions of the regular execution flow). +In its first two columns, the tool displays the trace record ordinal +and the instruction fetch ordinal. + \code -$ bin64/drrun -t drcachesim -simulator_type view -sim_refs 20 -indir drmemtrace.*.dir -T80431 -T80431 -T80431 -T80431 -T80431 -T80431 0x00007f2ae335d090 48 89 e7 mov %rsp, %rdi -T80431 0x00007f2ae335d093 e8 48 0d 00 00 call $0x00007f2ae335dde0 -T80431 write 8 byte(s) @ 0x7ffdf5770ac8 -T80431 0x00007f2ae335dde0 55 push %rbp -T80431 write 8 byte(s) @ 0x7ffdf5770ac0 -T80431 0x00007f2ae335dde1 48 89 e5 mov %rsp, %rbp -T80431 0x00007f2ae335dde4 41 57 push %r15 -T80431 write 8 byte(s) @ 0x7ffdf5770ab8 -T80431 0x00007f2ae335dde6 49 89 ff mov %rdi, %r15 -T80431 0x00007f2ae335dde9 41 56 push %r14 -T80431 write 8 byte(s) @ 0x7ffdf5770ab0 -T80431 0x00007f2ae335ddeb 41 55 push %r13 -T80431 write 8 byte(s) @ 0x7ffdf5770aa8 -T80431 0x00007f2ae335dded 41 54 push %r12 -T80431 write 8 byte(s) @ 0x7ffdf5770aa0 -T80431 0x00007f2ae335ddef 53 push %rbx -T80431 write 8 byte(s) @ 0x7ffdf5770a98 -T80431 0x00007f2ae335ddf0 48 83 ec 38 sub $0x38, %rsp -T80431 0x00007f2ae335ddf4 0f 31 rdtsc -T80431 0x00007f2ae335ddf6 48 c1 e2 20 shl $0x20, %rdx -T80431 0x00007f2ae335ddfa 48 09 d0 or %rdx, %rax -T80431 0x00007f2ae335ddfd 48 8d 15 74 90 02 00 lea 0x00007f2ae3386e78, %rdx -T80431 0x00007f2ae335de04 48 89 05 75 87 02 00 mov %rax, 0x00007f2ae3386580 -T80431 write 8 byte(s) @ 0x7f2ae3386580 -T80431 0x00007f2ae335de0b 48 8b 05 66 90 02 00 mov 0x00007f2ae3386e78, %rax -T80431 read 8 byte(s) @ 0x7f2ae3386e78 -T80431 0x00007f2ae335de12 49 89 d4 mov %rdx, %r12 -T80431 0x00007f2ae335de15 4c 2b 25 e4 91 02 00 sub 0x00007f2ae3387000, %r12 -T80431 read 8 byte(s) @ 0x7f2ae3387000 -T80431 0x00007f2ae335de1c 48 89 15 d5 9b 02 00 mov %rdx, 0x00007f2ae33879f8 +$ $ bin64/drrun -t drcachesim -simulator_type view -indir drmemtrace.*.dir -sim_refs 20 +Output format: + : T +------------------------------------------------------------ + 1 0: T3256418 + 2 0: T3256418 + 3 0: T3256418 + 4 0: T3256418 + 5 0: T3256418 + 6 0: T3256418 + 7 0: T3256418 + 8 1: T3256418 ifetch 3 byte(s) @ 0x00007fc205a61940 48 89 e7 mov %rsp, %rdi + 9 2: T3256418 ifetch 5 byte(s) @ 0x00007fc205a61943 e8 b8 0c 00 00 call $0x00007fc205a62600 + 10 2: T3256418 write 8 byte(s) @ 0x00007fff9a9e3528 by PC 0x00007fc205a61943 + 11 3: T3256418 ifetch 1 byte(s) @ 0x00007fc205a62600 55 push %rbp + 12 3: T3256418 write 8 byte(s) @ 0x00007fff9a9e3520 by PC 0x00007fc205a62600 + 13 4: T3256418 ifetch 3 byte(s) @ 0x00007fc205a62601 48 89 e5 mov %rsp, %rbp + 14 5: T3256418 ifetch 2 byte(s) @ 0x00007fc205a62604 41 57 push %r15 + 15 5: T3256418 write 8 byte(s) @ 0x00007fff9a9e3518 by PC 0x00007fc205a62604 + 16 6: T3256418 ifetch 2 byte(s) @ 0x00007fc205a62606 41 56 push %r14 + 17 6: T3256418 write 8 byte(s) @ 0x00007fff9a9e3510 by PC 0x00007fc205a62606 + 18 7: T3256418 ifetch 2 byte(s) @ 0x00007fc205a62608 41 55 push %r13 + 19 7: T3256418 write 8 byte(s) @ 0x00007fff9a9e3508 by PC 0x00007fc205a62608 + 20 8: T3256418 ifetch 2 byte(s) @ 0x00007fc205a6260a 41 54 push %r12 View tool results: - 20 : total instructions + 8 : total instructions \endcode An example of thread switches: \code ------------------------------------------------------------ -T342625 -T342625 -T342625 0x0000000000402460 31 ed xor %ebp, %ebp -T342625 0x0000000000402462 49 89 d1 mov %rdx, %r9 -T342625 0x0000000000402465 5e pop %rsi -T342625 read 8 byte(s) @ 0x7ffe70dce480 -T342625 0x0000000000402466 48 89 e2 mov %rsp, %rdx + 46 0: T3264758 + 47 0: T3264758 + 48 1: T3264758 ifetch 3 byte(s) @ 0x00007f4ea89e4940 48 89 e7 mov %rsp, %rdi + 49 2: T3264758 ifetch 5 byte(s) @ 0x00007f4ea89e4943 e8 b8 0c 00 00 call $0x00007f4ea89e5600 + 50 2: T3264758 write 8 byte(s) @ 0x00007ffd93a0cf18 by PC 0x00007f4ea89e4943 ... -T342625 0x0000000000467c42 4d 89 c8 mov %r9, %r8 -T342625 0x0000000000467c45 4c 8b 54 24 08 mov 0x08(%rsp), %r10 -T342625 read 8 byte(s) @ 0x7ffe70dce100 -T342625 0x0000000000467c4a b8 38 00 00 00 mov $0x00000038, %eax -T342625 0x0000000000467c4f 0f 05 syscall + 2854543 2149665: T3264758 ifetch 5 byte(s) @ 0x00007f4ea7c87f8c b8 0e 00 00 00 mov $0x0000000e, %eax + 2854544 2149666: T3264758 ifetch 2 byte(s) @ 0x00007f4ea7c87f91 0f 05 syscall ------------------------------------------------------------ -T342626 -T342626 -T342626 0x0000000000467c51 48 85 c0 test %rax, %rax -T342626 0x0000000000467c54 7c 13 jl $0x0000000000467c69 -T342626 0x0000000000467c56 74 01 jz $0x0000000000467c59 -T342626 0x0000000000467c59 31 ed xor %ebp, %ebp -T342626 0x0000000000467c5b 58 pop %rax -T342626 read 8 byte(s) @ 0x7f899f928e70 -T342626 0x0000000000467c5c 5f pop %rdi -T342626 read 8 byte(s) @ 0x7f899f928e78 -T342626 0x0000000000467c5d ff d0 call %rax -T342626 write 8 byte(s) @ 0x7f899f928e78 -T342626 0x0000000000404a30 41 54 push %r12 -T342626 write 8 byte(s) @ 0x7f899f928e70 + 2854545 2149666: T3264760 + 2854546 2149666: T3264760 + 2854547 2149667: T3264760 ifetch 3 byte(s) @ 0x00007f4ea7d0b099 48 85 c0 test %rax, %rax + 2854548 2149668: T3264760 ifetch 2 byte(s) @ 0x00007f4ea7d0b09c 7c 18 jl $0x00007f4ea7d0b0b6 ... \endcode @@ -728,43 +704,40 @@ with metadata showing that the signal was delivered just after a non-taken conditional branch: \code -T585061 0x00007fdb4e95128f 41 f6 44 24 08 08 test 0x08(%r12), $0x08 -T585061 read 1 byte(s) @ 0x7ffd5af76b08 -T585061 0x00007fdb4e951295 0f 85 28 04 00 00 jnz $0x00007fdb4e9516c3 -T585061 -T585061 -T585061 -T585061 0x00007fdb4ace9dba 55 push %rbp -T585061 write 8 byte(s) @ 0x7ffd5af763d0 -T585061 0x00007fdb4ace9dbb 48 89 e5 mov %rsp, %rbp -T585061 0x00007fdb4ace9dbe 89 7d fc mov %edi, -0x04(%rbp) -T585061 write 4 byte(s) @ 0x7ffd5af763cc -T585061 0x00007fdb4ace9dc1 48 89 75 f0 mov %rsi, -0x10(%rbp) -T585061 write 8 byte(s) @ 0x7ffd5af763c0 -T585061 0x00007fdb4ace9dc5 48 89 55 e8 mov %rdx, -0x18(%rbp) -T585061 write 8 byte(s) @ 0x7ffd5af763b8 -T585061 0x00007fdb4ace9dc9 83 7d fc 1a cmp -0x04(%rbp), $0x1a -T585061 read 4 byte(s) @ 0x7ffd5af763cc -T585061 0x00007fdb4ace9dcd 75 0f jnz $0x00007fdb4ace9dde -T585061 0x00007fdb4ace9dcf 8b 05 7f 23 20 00 mov 0x00007fdb4aeec154, %eax -T585061 read 4 byte(s) @ 0x7fdb4aeec154 -T585061 0x00007fdb4ace9dd5 83 c0 01 add $0x01, %eax -T585061 0x00007fdb4ace9dd8 89 05 76 23 20 00 mov %eax, 0x00007fdb4aeec154 -T585061 write 4 byte(s) @ 0x7fdb4aeec154 -T585061 0x00007fdb4ace9dde 90 nop -T585061 0x00007fdb4ace9ddf 5d pop %rbp -T585061 read 8 byte(s) @ 0x7ffd5af763d0 -T585061 0x00007fdb4ace9de0 c3 ret -T585061 read 8 byte(s) @ 0x7ffd5af763d8 -T585061 0x00007fdb4e95c140 48 c7 c0 0f 00 00 00 mov $0x0000000f, %rax -T585061 0x00007fdb4e95c147 0f 05 syscall -T585061 -T585061 -T585061 -T585061 -T585061 -T585061 0x00007fdb4e95129b 48 8b 1d 8e 40 01 00 mov 0x00007fdb4e965330, %rbx -T585061 read 8 byte(s) @ 0x7fdb4e965330 + 2851502 2147588: T3264758 ifetch 2 byte(s) @ 0x00007f4ea7c87a54 eb 14 jmp $0x00007f4ea7c87a6a + 2851503 2147588: T3264758 + 2851504 2147588: T3264758 + 2851505 2147588: T3264758 + 2851506 2147589: T3264758 ifetch 1 byte(s) @ 0x00007f4ea47fbdba 55 push %rbp + 2851507 2147589: T3264758 write 8 byte(s) @ 0x00007ffd93a0be30 by PC 0x00007f4ea47fbdba + 2851508 2147590: T3264758 ifetch 3 byte(s) @ 0x00007f4ea47fbdbb 48 89 e5 mov %rsp, %rbp + 2851509 2147591: T3264758 ifetch 3 byte(s) @ 0x00007f4ea47fbdbe 89 7d fc mov %edi, -0x04(%rbp) + 2851510 2147591: T3264758 write 4 byte(s) @ 0x00007ffd93a0be2c by PC 0x00007f4ea47fbdbe + 2851511 2147592: T3264758 ifetch 4 byte(s) @ 0x00007f4ea47fbdc1 48 89 75 f0 mov %rsi, -0x10(%rbp) + 2851512 2147592: T3264758 write 8 byte(s) @ 0x00007ffd93a0be20 by PC 0x00007f4ea47fbdc1 + 2851513 2147593: T3264758 ifetch 4 byte(s) @ 0x00007f4ea47fbdc5 48 89 55 e8 mov %rdx, -0x18(%rbp) + 2851514 2147593: T3264758 write 8 byte(s) @ 0x00007ffd93a0be18 by PC 0x00007f4ea47fbdc5 + 2851515 2147594: T3264758 ifetch 4 byte(s) @ 0x00007f4ea47fbdc9 83 7d fc 1a cmp -0x04(%rbp), $0x1a + 2851516 2147594: T3264758 read 4 byte(s) @ 0x00007ffd93a0be2c by PC 0x00007f4ea47fbdc9 + 2851517 2147595: T3264758 ifetch 2 byte(s) @ 0x00007f4ea47fbdcd 75 0f jnz $0x00007f4ea47fbdde + 2851518 2147596: T3264758 ifetch 6 byte(s) @ 0x00007f4ea47fbdcf 8b 05 7f 23 20 00 mov 0x00007f4ea49fe154, %eax + 2851519 2147596: T3264758 read 4 byte(s) @ 0x00007f4ea49fe154 by PC 0x00007f4ea47fbdcf + 2851520 2147597: T3264758 ifetch 3 byte(s) @ 0x00007f4ea47fbdd5 83 c0 01 add $0x01, %eax + 2851521 2147598: T3264758 ifetch 6 byte(s) @ 0x00007f4ea47fbdd8 89 05 76 23 20 00 mov %eax, 0x00007f4ea49fe154 + 2851522 2147598: T3264758 write 4 byte(s) @ 0x00007f4ea49fe154 by PC 0x00007f4ea47fbdd8 + 2851523 2147599: T3264758 ifetch 1 byte(s) @ 0x00007f4ea47fbdde 90 nop + 2851524 2147600: T3264758 ifetch 1 byte(s) @ 0x00007f4ea47fbddf 5d pop %rbp + 2851525 2147600: T3264758 read 8 byte(s) @ 0x00007ffd93a0be30 by PC 0x00007f4ea47fbddf + 2851526 2147601: T3264758 ifetch 1 byte(s) @ 0x00007f4ea47fbde0 c3 ret + 2851527 2147601: T3264758 read 8 byte(s) @ 0x00007ffd93a0be38 by PC 0x00007f4ea47fbde0 + 2851528 2147602: T3264758 ifetch 7 byte(s) @ 0x00007f4ea7c3daa0 48 c7 c0 0f 00 00 00 mov $0x0000000f, %rax + 2851529 2147603: T3264758 ifetch 2 byte(s) @ 0x00007f4ea7c3daa7 0f 05 syscall + 2851530 2147603: T3264758 + 2851531 2147603: T3264758 + 2851532 2147603: T3264758 + 2851533 2147603: T3264758 + 2851534 2147603: T3264758 + 2851535 2147604: T3264758 ifetch 5 byte(s) @ 0x00007f4ea7c87a6a e8 11 8b 07 00 call $0x00007f4ea7d00580 \endcode \section sec_tool_func_view View Function Calls @@ -1417,7 +1390,7 @@ application thread, but the tool interface can support other divisions. For too that support concurrent processing of shards and do not need to see a single time-sorted interleaved merged trace, the interface functions with the parallel_ prefix should be overridden, and parallel_shard_supported() should return true. -parallel_shard_init() will be invoked for each shard prior to invoking +parallel_shard_init_stream() will be invoked for each shard prior to invoking parallel_shard_memref() for each entry in that shard; the data structure returned from parallel_shard_init() will be passed to parallel_shard_memref() for each trace entry for that shard. The concurrency model used guarantees that all @@ -1458,6 +1431,15 @@ on each thread transition. Other built-in markers indicate disruptions in user mode control flow such as signal handler entry and exit. +The absolute ordinals for trace records and instruction fetches are +available via the #memtrace_stream_t interface passed to the +initialize_stream() function for serial operation and +parallel_shard_init_stream() for parallel operation. If the iterator +skips over some records that are not passed to the tools, these +ordinals will include those skipped records. If a tool wishes to +count only those records or instructions that it sees, it can add its +own counters. + CMake support is provided for including the headers and linking the libraries of the \p drmemtrace framework. A new CMake function is defined in the DynamoRIO package which sets the include directory for using the \p diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp index b9217d0e275..26f9852fa70 100644 --- a/clients/drcachesim/reader/reader.cpp +++ b/clients/drcachesim/reader/reader.cpp @@ -287,8 +287,10 @@ reader_t::operator++() at_eof_ = true; // bail break; } - if (have_memref) + if (have_memref) { + ++cur_ref_count_; break; + } } return *this; diff --git a/clients/drcachesim/reader/reader.h b/clients/drcachesim/reader/reader.h index 4696ad9ba27..490f7203f58 100644 --- a/clients/drcachesim/reader/reader.h +++ b/clients/drcachesim/reader/reader.h @@ -42,6 +42,7 @@ #include // For exporting we avoid "../common" and rely on -I. #include "memref.h" +#include "memtrace_stream.h" #include "utils.h" #define OUT /* just a marker */ @@ -58,7 +59,8 @@ # define VPRINT(reader, level, ...) /* nothing */ #endif -class reader_t : public std::iterator { +class reader_t : public std::iterator, + public memtrace_stream_t { public: reader_t() { @@ -113,6 +115,17 @@ class reader_t : public std::iterator { // 2) It is difficult to implement for file_reader_t as streams do not // have a copy constructor. + uint64_t + get_record_ordinal() override + { + return cur_ref_count_; + } + uint64_t + get_instruction_ordinal() override + { + return cur_instr_count_; + } + protected: // This reads the next entry from the stream of entries from all threads interleaved // in timestamp order. @@ -150,6 +163,7 @@ class reader_t : public std::iterator { addr_t prev_instr_addr_ = 0; int bundle_idx_ = 0; std::unordered_map tid2pid_; + uint64_t cur_ref_count_ = 0; uint64_t cur_instr_count_ = 0; uint64_t chunk_instr_count_ = 0; // Unchanging once set to non-zero. uint64_t last_timestamp_instr_count_ = 0; diff --git a/clients/drcachesim/tests/offline-phys.templatex b/clients/drcachesim/tests/offline-phys.templatex index 0450a7d94d2..f6257fe695b 100644 --- a/clients/drcachesim/tests/offline-phys.templatex +++ b/clients/drcachesim/tests/offline-phys.templatex @@ -9,21 +9,21 @@ Adios world! Adios world! Adios world! Output format: -: T + : T ------------------------------------------------------------ - 1: T[0-9]+ - 2: T[0-9]+ - 3: T[0-9]+ - 4: T[0-9]+ - 5: T[0-9]+ - 6: T[0-9]+ - 7: T[0-9]+ - 8: T[0-9]+ - 9: T[0-9]+ - 10: T[0-9]+ - 11: T[0-9]+ - 12: T[0-9]+ - 13: T[0-9]+ - 14: T[0-9]+ - 15: T[0-9]+ - 16: T[0-9]+ ifetch .* + 1 0: T[0-9]+ + 2 0: T[0-9]+ + 3 0: T[0-9]+ + 4 0: T[0-9]+ + 5 0: T[0-9]+ + 6 0: T[0-9]+ + 7 0: T[0-9]+ + 8 0: T[0-9]+ + 9 0: T[0-9]+ + 10 0: T[0-9]+ + 11 0: T[0-9]+ + 12 0: T[0-9]+ + 13 0: T[0-9]+ + 14 0: T[0-9]+ + 15 0: T[0-9]+ + 16 1: T[0-9]+ ifetch .* diff --git a/clients/drcachesim/tests/offline-view.templatex b/clients/drcachesim/tests/offline-view.templatex index cbccfde801c..c5b4e2df4d5 100644 --- a/clients/drcachesim/tests/offline-view.templatex +++ b/clients/drcachesim/tests/offline-view.templatex @@ -1,7 +1,15 @@ Hello, world! -.* - *[0-9]*: T[0-9]* +Output format: + : T +------------------------------------------------------------ + 1 0: T[0-9]* + 2 0: T[0-9]* + 3 0: T[0-9]* + 4 0: T[0-9]* + 5 0: T[0-9]* + 6 0: T[0-9]* + 7 0: T[0-9]* + 8 1: T[0-9]* ifetch .* .* View tool results: *[0-9]* : total instructions diff --git a/clients/drcachesim/tests/view_test.cpp b/clients/drcachesim/tests/view_test.cpp index bf2cceddc5b..aace35c58d9 100644 --- a/clients/drcachesim/tests/view_test.cpp +++ b/clients/drcachesim/tests/view_test.cpp @@ -120,19 +120,55 @@ class view_nomod_test_t : public view_t { std::string run_test_helper(view_t &view, const std::vector &memrefs) { - view.initialize(); - // Capture cerr. - std::stringstream capture; - std::streambuf *prior = std::cerr.rdbuf(capture.rdbuf()); - // Run the tool. - for (const auto &memref : memrefs) { - if (!view.process_memref(memref)) - std::cout << "Hit error: " << view.get_error_string() << "\n"; - } - // Return the result. - std::string res = capture.str(); - std::cerr.rdbuf(prior); - return res; + class local_stream_t : public memtrace_stream_t { + public: + local_stream_t(view_t &view, const std::vector &memrefs) + : view_(view) + , memrefs_(memrefs) + { + } + + std::string + run() + { + view_.initialize_stream(this); + // Capture cerr. + std::stringstream capture; + std::streambuf *prior = std::cerr.rdbuf(capture.rdbuf()); + // Run the tool. + for (const auto &memref : memrefs_) { + ++ref_count_; + if (type_is_instr(memref.instr.type)) + ++instr_count_; + if (!view_.process_memref(memref)) + std::cout << "Hit error: " << view_.get_error_string() << "\n"; + } + // Return the result. + std::string res = capture.str(); + std::cerr.rdbuf(prior); + return res; + } + + uint64_t + get_record_ordinal() override + { + return ref_count_; + } + uint64_t + get_instruction_ordinal() override + { + return instr_count_; + } + + private: + view_t &view_; + const std::vector &memrefs_; + uint64_t ref_count_ = 0; + uint64_t instr_count_ = 0; + }; + + local_stream_t stream(view, memrefs); + return stream.run(); } bool @@ -217,7 +253,8 @@ test_skip_memrefs(void *drcontext, instrlist_t &ilist, ss >> prefix; if (prefix != 1 + skip_memrefs) { std::cerr << "Expect to start after skip count " << skip_memrefs << " but found " - << prefix << "\n"; + << prefix << "\n" + << res << "\n"; return false; } return true; diff --git a/clients/drcachesim/tools/view.cpp b/clients/drcachesim/tools/view.cpp index b133445d4ce..88cab6657ea 100644 --- a/clients/drcachesim/tools/view.cpp +++ b/clients/drcachesim/tools/view.cpp @@ -70,15 +70,15 @@ view_t::view_t(const std::string &module_file_path, memref_tid_t thread, , num_disasm_instrs_(0) , prev_tid_(-1) , filetype_(-1) - , num_refs_(0) , timestamp_(0) , has_modules_(true) { } std::string -view_t::initialize() +view_t::initialize_stream(memtrace_stream_t *serial_stream) { + serial_stream_ = serial_stream; print_header(); dcontext_.dcontext = dr_standalone_init(); if (module_file_path_.empty()) { @@ -124,9 +124,10 @@ view_t::parallel_shard_supported() } void * -view_t::parallel_shard_init(int shard_index, void *worker_data) +view_t::parallel_shard_init_stream(int shard_index, void *worker_data, + memtrace_stream_t *shard_stream) { - return nullptr; + return shard_stream; } bool @@ -144,15 +145,8 @@ view_t::parallel_shard_error(void *shard_data) } bool -view_t::parallel_shard_memref(void *shard_data, const memref_t &memref) -{ - return process_memref(memref); -} - -bool -view_t::should_skip(const memref_t &memref) +view_t::should_skip(memtrace_stream_t *memstream, const memref_t &memref) { - num_refs_++; if (skip_refs_left_ > 0) { skip_refs_left_--; // I considered printing the version and filetype even when skipped but @@ -165,7 +159,14 @@ view_t::should_skip(const memref_t &memref) return true; sim_refs_left_--; if (sim_refs_left_ == 0 && timestamp_ > 0) { - print_prefix(memref, -1); // Already incremented for timestamp. + // Print this timestamp right before the final record. + int adjust = -1; // Already incremented for timestamp. + if (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == TRACE_MARKER_TYPE_TIMESTAMP) { + // This is the final record so no adjustment needed. + adjust = 0; + } + print_prefix(memstream, memref, adjust); std::cerr << "\n"; timestamp_ = 0; } @@ -176,6 +177,13 @@ view_t::should_skip(const memref_t &memref) bool view_t::process_memref(const memref_t &memref) { + return parallel_shard_memref(serial_stream_, memref); +} + +bool +view_t::parallel_shard_memref(void *shard_data, const memref_t &memref) +{ + memtrace_stream_t *memstream = reinterpret_cast(shard_data); if (knob_thread_ > 0 && memref.data.tid > 0 && memref.data.tid != knob_thread_) return true; // Even for -skip_refs we need to process the up-front version and type. @@ -213,7 +221,7 @@ view_t::process_memref(const memref_t &memref) // We can't easily reorder and place window markers before timestamps // since memref iterators use the timestamps to order buffer units. timestamp_ = memref.marker.marker_value; - if (should_skip(memref)) + if (should_skip(memstream, memref)) timestamp_ = 0; return true; default: break; @@ -227,21 +235,21 @@ view_t::process_memref(const memref_t &memref) printed_header_.find(memref.marker.tid) == printed_header_.end()) { printed_header_.insert(memref.marker.tid); if (trace_version_ != -1) { // Old versions may not have a version marker. - if (!should_skip(memref)) { - print_prefix(memref); + if (!should_skip(memstream, memref)) { + print_prefix(memstream, memref, -2); std::cerr << "\n"; } } if (filetype_ != -1) { // Handle old/malformed versions. - if (!should_skip(memref)) { - print_prefix(memref); + if (!should_skip(memstream, memref)) { + print_prefix(memstream, memref, -1); std::cerr << "\n"; } } } - if (should_skip(memref)) + if (should_skip(memstream, memref)) return true; if (memref.marker.type == TRACE_TYPE_MARKER) { @@ -250,25 +258,27 @@ view_t::process_memref(const memref_t &memref) if (last_window_[memref.marker.tid] != memref.marker.marker_value) { std::cerr << "------------------------------------------------------------\n"; - print_prefix(memref, -1); // Already incremented for timestamp above. + print_prefix(memstream, memref, + -1); // Already incremented for timestamp above. } if (timestamp_ > 0) { std::cerr << "\n"; timestamp_ = 0; - print_prefix(memref); + print_prefix(memstream, memref); } std::cerr << "\n"; last_window_[memref.marker.tid] = memref.marker.marker_value; } if (timestamp_ > 0) { - print_prefix(memref, -1); // Already incremented for timestamp above. + print_prefix(memstream, memref, + -1); // Already incremented for timestamp above. std::cerr << "\n"; timestamp_ = 0; } } if (memref.instr.tid != 0) { - print_prefix(memref); + print_prefix(memstream, memref); } if (memref.marker.type == TRACE_TYPE_MARKER) { @@ -485,7 +495,7 @@ view_t::process_memref(const memref_t &memref) auto newline = disasm.find('\n'); if (newline != std::string::npos && newline < disasm.size() - 1) { std::stringstream prefix; - print_prefix(memref, 0, prefix); + print_prefix(memstream, memref, 0, prefix); std::string skip_name(name_width, ' '); disasm.insert(newline + 1, prefix.str() + skip_name + " "); diff --git a/clients/drcachesim/tools/view.h b/clients/drcachesim/tools/view.h index ec60700d12e..24819e81fe6 100644 --- a/clients/drcachesim/tools/view.h +++ b/clients/drcachesim/tools/view.h @@ -53,11 +53,12 @@ class view_t : public analysis_tool_t { uint64_t sim_refs, const std::string &syntax, unsigned int verbose, const std::string &alt_module_dir = ""); std::string - initialize() override; + initialize_stream(memtrace_stream_t *serial_stream) override; bool parallel_shard_supported() override; void * - parallel_shard_init(int shard_index, void *worker_data) override; + parallel_shard_init_stream(int shard_index, void *worker_data, + memtrace_stream_t *shard_stream) override; bool parallel_shard_exit(void *shard_data) override; bool @@ -81,25 +82,26 @@ class view_t : public analysis_tool_t { }; bool - should_skip(const memref_t &memref); + should_skip(memtrace_stream_t *memstream, const memref_t &memref); inline void print_header() { - std::cerr << std::setw(9) << "Output format:\n" + std::cerr << std::setw(9) << "Output format:\n " << ": T \n" << "------------------------------------------------------------\n"; } inline void - print_prefix(const memref_t &memref, int ref_adjust = 0, + print_prefix(memtrace_stream_t *memstream, const memref_t &memref, int ref_adjust = 0, std::ostream &stream = std::cerr) { if (prev_tid_ != -1 && prev_tid_ != memref.instr.tid) stream << "------------------------------------------------------------\n"; prev_tid_ = memref.instr.tid; - stream << std::setw(9) << (num_refs_ + ref_adjust) << ": T" << memref.marker.tid - << " "; + stream << std::setw(9) << (memstream->get_record_ordinal() + ref_adjust) + << std::setw(9) << memstream->get_instruction_ordinal() << ": T" + << memref.marker.tid << " "; } /* We make this the first field so that dr_standalone_exit() is called after @@ -130,10 +132,10 @@ class view_t : public analysis_tool_t { memref_tid_t prev_tid_; intptr_t filetype_; std::unordered_set printed_header_; - uint64_t num_refs_; std::unordered_map last_window_; uintptr_t timestamp_; bool has_modules_; + memtrace_stream_t *serial_stream_ = nullptr; }; #endif /* _VIEW_H_ */