i#5538 memtrace seek, part 9: Add accurate record count (#5737)

Adds a reader subclass to raw2trace for computing the memref_t record count for each chunk. A new marker is inserted in the chunk header which the zipfile skip code uses to obtain the correct ref count when skipping over chunks. Adds a count suppression feature where the view tool prints 0 for the record count for the synthetic timestamp+cpu added after a seek. Updates the seek test with a new trace and new expected output. Issue: #5538
DynamoRIO · Nov 17, 2022 · 9d2a012 · 9d2a012
1 parent 8d26b15
commit 9d2a012
Show file tree

Hide file tree

Showing 12 changed files with 167 additions and 60 deletions.
diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt
@@ -176,11 +176,11 @@ set(raw2trace_srcs
   tracer/instru.cpp
   tracer/instru_online.cpp
   tracer/instru_offline.cpp
+  reader/reader.cpp
   )
 if (libsnappy)
   set(raw2trace_srcs ${raw2trace_srcs}
     ${snappy_reader}
-    reader/reader.cpp
     reader/file_reader.cpp
     )
 endif ()

diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp
@@ -105,7 +105,7 @@ droption_t<bytesize_t> op_chunk_instr_count(
     // We do not support tiny chunks.  We do not support disabling chunks with a 0
     // value, to simplify testing: although we're still having to support generating
     // non-zip files for !HAS_ZLIB/!HAS_ZIP!
-    bytesize_t(1000),
+    bytesize_t(10),
 #ifdef X64
     bytesize_t(1ULL << 63),
 #else

diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h
@@ -120,7 +120,8 @@ typedef enum {
     // The trace_entry_t stream always has the instr fetch prior to data refs,
     // which the reader can use to obtain the PC for data references.
     // For memref_t, the instruction address is in the addr field.
-    // An instruction *not* of the types below:
+    // The base type is an instruction *not* of the other sub-types.
+    // Enum value == 10.
     TRACE_TYPE_INSTR, /**< A non-branch instruction. */
     // Particular categories of instructions:
     TRACE_TYPE_INSTR_DIRECT_JUMP,      /**< A direct unconditional jump instruction. */
@@ -147,6 +148,7 @@ typedef enum {
     // The _END entries are hidden by reader_t as memref_t has space for the size.
     TRACE_TYPE_INSTR_FLUSH, /**< An instruction cache flush. */
     TRACE_TYPE_INSTR_FLUSH_END,
+    // Enum value == 20.
     TRACE_TYPE_DATA_FLUSH, /**< A data cache flush. */
     TRACE_TYPE_DATA_FLUSH_END,
 
@@ -188,6 +190,7 @@ typedef enum {
     TRACE_TYPE_INSTR_NO_FETCH,
     // An internal value used for online traces and turned by reader_t into
     // either TRACE_TYPE_INSTR or TRACE_TYPE_INSTR_NO_FETCH.
+    // Enum value == 30.
     TRACE_TYPE_INSTR_MAYBE_FETCH,
 
     /**
@@ -408,6 +411,13 @@ typedef enum {
      */
     TRACE_MARKER_TYPE_CHUNK_FOOTER,
 
+    /**
+     * Indicates the record ordinal for this point in the trace.  This is used
+     * to identify the visible record ordinal when skipping over chunks, and is
+     * not exposed to analysis tools.
+     */
+    TRACE_MARKER_TYPE_RECORD_ORDINAL,
+
     // ...
     // These values are reserved for future built-in marker types.
     // ...

diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp
@@ -31,6 +31,7 @@
  */
 
 #include <assert.h>
+#include <inttypes.h>
 #include <string.h>
 #include "reader.h"
 #include "../common/memref.h"
@@ -267,6 +268,8 @@ reader_t::process_input_entry()
                    skip_next_cpu_) {
             VPRINT(this, 2, "skipping start-of-chunk dup cpu\n");
             skip_next_cpu_ = false;
+        } else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_RECORD_ORDINAL) {
+            // Not exposed to tools.
         } else {
             have_memref = true;
         }
@@ -291,8 +294,18 @@ reader_t::process_input_entry()
         at_eof_ = true; // bail
         break;
     }
-    if (have_memref)
-        ++cur_ref_count_;
+    if (have_memref) {
+        if (suppress_ref_count_ > 0) {
+            VPRINT(this, 4, "suppressing %" PRIu64 " ref counts\n", suppress_ref_count_);
+            --suppress_ref_count_;
+        } else {
+            if (suppress_ref_count_ == 0) {
+                // Ensure get_record_ordinal() ignores it.
+                suppress_ref_count_ = -1;
+            }
+            ++cur_ref_count_;
+        }
+    }
     return have_memref;
 }
 

diff --git a/clients/drcachesim/reader/reader.h b/clients/drcachesim/reader/reader.h
@@ -136,6 +136,8 @@ class reader_t : public std::iterator<std::input_iterator_tag, memref_t>,
     uint64_t
     get_record_ordinal() const override
     {
+        if (suppress_ref_count_ >= 0)
+            return 0;
         return cur_ref_count_;
     }
     uint64_t
@@ -195,6 +197,7 @@ class reader_t : public std::iterator<std::input_iterator_tag, memref_t>,
     bool online_ = true;
     const char *output_prefix_ = "[reader]";
     uint64_t cur_ref_count_ = 0;
+    int64_t suppress_ref_count_ = -1;
     uint64_t cur_instr_count_ = 0;
     uint64_t last_timestamp_instr_count_ = 0;
     trace_entry_t *input_entry_ = nullptr;

diff --git a/clients/drcachesim/reader/zipfile_file_reader.cpp b/clients/drcachesim/reader/zipfile_file_reader.cpp
@@ -186,9 +186,6 @@ file_reader_t<zipfile_reader_t>::skip_thread_instructions(size_t thread_index,
         zipfile->cur_buf = zipfile->max_buf;
     }
     // We have to linearly walk the last mile.
-    // We need to present a timestamp+cpu so we reset this field so process_input_entry()
-    // will not skip the first pair in this new chunk.
-    last_timestamp_instr_count_ = cur_instr_count_;
     while (cur_instr_count_ < stop_count_) { // End condition is never reached.
         if (!read_next_thread_entry(thread_index, &entry_copy_, eof))
             return false;
@@ -198,27 +195,28 @@ file_reader_t<zipfile_reader_t>::skip_thread_instructions(size_t thread_index,
         if (cur_instr_count_ + 1 == stop_count_ &&
             type_is_instr(static_cast<trace_type_t>(entry_copy_.type)))
             break;
-        // Update core state.
-        input_entry_ = &entry_copy_;
-        if (!process_input_entry())
-            continue;
-        const memref_t &memref = **this;
-        if (memref.marker.type == TRACE_TYPE_MARKER) {
-            if (memref.marker.marker_type == TRACE_MARKER_TYPE_TIMESTAMP)
+        // To examine the produced memrefs we'd have to have the base reader
+        // expose these hidden entries.  It is simpler for use to read the
+        // trace_entry_t directly prior to processing by the base class.
+        if (entry_copy_.type == TRACE_TYPE_MARKER) {
+            if (entry_copy_.size == TRACE_MARKER_TYPE_RECORD_ORDINAL)
+                cur_ref_count_ = entry_copy_.addr;
+            else if (entry_copy_.size == TRACE_MARKER_TYPE_TIMESTAMP)
                 timestamp = entry_copy_;
-            else if (memref.marker.marker_type == TRACE_MARKER_TYPE_CPU_ID)
+            else if (entry_copy_.size == TRACE_MARKER_TYPE_CPU_ID)
                 cpu = entry_copy_;
         }
-        // TODO i#5538: Have raw2trace insert a record ordinal marker at chunk entry
-        // and use it here to udpate the memtrace_stream_t.
+        // Update core state.
+        input_entry_ = &entry_copy_;
+        process_input_entry();
     }
     if (timestamp.type == TRACE_TYPE_MARKER && cpu.type == TRACE_TYPE_MARKER) {
         // Insert the two markers.
-        // TODO i#5538: These end up with record ordinals that belong to different
-        // records in the unskipped trace: we should instead not print them out
-        // at all, somehow.
         trace_entry_t instr = entry_copy_;
         entry_copy_ = timestamp;
+        // These synthetic entries are not real records in the unskipped trace, so we
+        // do not associate record counts with them.
+        suppress_ref_count_ = 2;
         process_input_entry();
         queues_[thread_index].push(cpu);
         queues_[thread_index].push(instr);

diff --git a/clients/drcachesim/tests/drmemtrace.allasm_x86_64.trace.zip b/clients/drcachesim/tests/drmemtrace.allasm_x86_64.trace.zip
diff --git a/clients/drcachesim/tests/skip_unit_tests.cpp b/clients/drcachesim/tests/skip_unit_tests.cpp
@@ -111,10 +111,10 @@ test_skip_initial()
         //    Output format:
         //    <record#> <instr#>: T<tid> <record details>
         //    ------------------------------------------------------------
-        //           18       49: T3854659 <marker: timestamp 13312570674112282>
-        //           19       49: T3854659 <marker: tid 3854659 on core 3>
-        //           20       50: T3854659 ifetch       2 byte(s) @ 0x0000000000401030 75
-        //           d9                jnz    $0x000000000040100b
+        //            0       49: T3854659 <marker: timestamp 13312570674112282>
+        //            0       49: T3854659 <marker: tid 3854659 on core 3>
+        //           62       50: T3854659 ifetch       2 byte(s) @ 0x0000000000401030 75
+        //                                   d9                jnz    $0x000000000040100b
         std::string line;
         // First we expect "Output format:"
         std::getline(res_stream, line, '\n');
@@ -126,24 +126,31 @@ test_skip_initial()
         std::getline(res_stream, line, '\n');
         CHECK(starts_with(line, "------"), "missing divider line");
         // Next we expect the timestamp entry with the instruction count before
-        // a colon: "       18       49: T3854659 <marker: timestamp 13312570674112282>"
-        // We expect the count to equal the -skip_instrs value.
+        // a colon: "        0       49: T3854659 <marker: timestamp 13312570674112282>"
+        // We expect the count to equal the -skip_instrs value, with a 0 ref count.
         std::getline(res_stream, line, '\n');
         std::stringstream expect_stream;
         expect_stream << skip_instrs << ":";
+        CHECK(skip_instrs == 0 || line.find(" 0 ") != std::string::npos,
+              "bad ref ordinal");
         CHECK(line.find(expect_stream.str()) != std::string::npos, "bad instr ordinal");
-        // TODO i#5538: Once we fix the record count, check it too to ensure it
-        // is > the instr count or other sanity checks.
         CHECK(skip_instrs == 0 || line.find("timestamp") != std::string::npos,
               "missing timestamp");
-        // Next we expect the cpuid entry.
+        // Next we expect the cpuid entry, with a 0 ref count too.
         std::getline(res_stream, line, '\n');
         CHECK(skip_instrs == 0 || line.find("on core") != std::string::npos,
               "missing cpuid");
+        CHECK(skip_instrs == 0 || line.find(" 0 ") != std::string::npos,
+              "bad ref ordinal");
         // Next we expect the target instruction fetch.
         std::getline(res_stream, line, '\n');
         CHECK(skip_instrs == 0 || line.find("ifetch") != std::string::npos,
               "missing ifetch");
+        // We don't know the precise record count but ensure it's > instr count.
+        int64_t ref_count;
+        std::istringstream ref_in(line);
+        ref_in >> ref_count;
+        CHECK(ref_count > skip_instrs, "invalid ref count");
         // Reset cerr.
         std::cerr.rdbuf(prior);
     }

diff --git a/clients/drcachesim/tools/view.cpp b/clients/drcachesim/tools/view.cpp
@@ -160,13 +160,7 @@ view_t::should_skip(memtrace_stream_t *memstream, const memref_t &memref)
         sim_refs_left_--;
         if (sim_refs_left_ == 0 && timestamp_ > 0) {
             // Print this timestamp right before the final record.
-            int adjust = -1; // Already incremented for timestamp.
-            if (memref.marker.type == TRACE_TYPE_MARKER &&
-                memref.marker.marker_type == TRACE_MARKER_TYPE_TIMESTAMP) {
-                // This is the final record so no adjustment needed.
-                adjust = 0;
-            }
-            print_prefix(memstream, memref, adjust);
+            print_prefix(memstream, memref, timestamp_record_ord_);
             std::cerr << "<marker: timestamp " << timestamp_ << ">\n";
             timestamp_ = 0;
         }
@@ -191,18 +185,20 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
         switch (memref.marker.marker_type) {
         case TRACE_MARKER_TYPE_VERSION:
             // We delay printing until we know the tid.
-            if (trace_version_ == -1)
+            if (trace_version_ == -1) {
                 trace_version_ = static_cast<int>(memref.marker.marker_value);
-            else if (trace_version_ != static_cast<int>(memref.marker.marker_value)) {
+                version_record_ord_ = memstream->get_record_ordinal();
+            } else if (trace_version_ != static_cast<int>(memref.marker.marker_value)) {
                 error_string_ = std::string("Version mismatch across files");
                 return false;
             }
             return true; // Do not count toward -sim_refs yet b/c we don't have tid.
         case TRACE_MARKER_TYPE_FILETYPE:
             // We delay printing until we know the tid.
-            if (filetype_ == -1)
+            if (filetype_ == -1) {
                 filetype_ = static_cast<intptr_t>(memref.marker.marker_value);
-            else if (filetype_ != static_cast<intptr_t>(memref.marker.marker_value)) {
+                filetype_record_ord_ = memstream->get_record_ordinal();
+            } else if (filetype_ != static_cast<intptr_t>(memref.marker.marker_value)) {
                 error_string_ = std::string("Filetype mismatch across files");
                 return false;
             }
@@ -221,6 +217,7 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
             // We can't easily reorder and place window markers before timestamps
             // since memref iterators use the timestamps to order buffer units.
             timestamp_ = memref.marker.marker_value;
+            timestamp_record_ord_ = memstream->get_record_ordinal();
             if (should_skip(memstream, memref))
                 timestamp_ = 0;
             return true;
@@ -236,13 +233,13 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
         printed_header_.insert(memref.marker.tid);
         if (trace_version_ != -1) { // Old versions may not have a version marker.
             if (!should_skip(memstream, memref)) {
-                print_prefix(memstream, memref, -2);
+                print_prefix(memstream, memref, version_record_ord_);
                 std::cerr << "<marker: version " << trace_version_ << ">\n";
             }
         }
         if (filetype_ != -1) { // Handle old/malformed versions.
             if (!should_skip(memstream, memref)) {
-                print_prefix(memstream, memref, -1);
+                print_prefix(memstream, memref, filetype_record_ord_);
                 std::cerr << "<marker: filetype 0x" << std::hex << filetype_ << std::dec
                           << ">\n";
             }
@@ -270,8 +267,7 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
             last_window_[memref.marker.tid] = memref.marker.marker_value;
         }
         if (timestamp_ > 0) {
-            print_prefix(memstream, memref,
-                         -1); // Already incremented for timestamp above.
+            print_prefix(memstream, memref, timestamp_record_ord_);
             std::cerr << "<marker: timestamp " << timestamp_ << ">\n";
             timestamp_ = 0;
         }
@@ -369,6 +365,10 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
             std::cerr << "<marker: function return value 0x" << std::hex
                       << memref.marker.marker_value << std::dec << ">\n";
             break;
+        case TRACE_MARKER_TYPE_RECORD_ORDINAL:
+            std::cerr << "<marker: record ordinal 0x" << std::hex
+                      << memref.marker.marker_value << std::dec << ">\n";
+            break;
         default:
             std::cerr << "<marker: type " << memref.marker.marker_type << "; value "
                       << memref.marker.marker_value << ">\n";
@@ -495,7 +495,7 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
     auto newline = disasm.find('\n');
     if (newline != std::string::npos && newline < disasm.size() - 1) {
         std::stringstream prefix;
-        print_prefix(memstream, memref, 0, prefix);
+        print_prefix(memstream, memref, -1, prefix);
         std::string skip_name(name_width, ' ');
         disasm.insert(newline + 1,
                       prefix.str() + skip_name + "                               ");

diff --git a/clients/drcachesim/tools/view.h b/clients/drcachesim/tools/view.h
@@ -93,21 +93,21 @@ class view_t : public analysis_tool_t {
     }
 
     inline void
-    print_prefix(memtrace_stream_t *memstream, const memref_t &memref, int ref_adjust = 0,
-                 std::ostream &stream = std::cerr)
+    print_prefix(memtrace_stream_t *memstream, const memref_t &memref,
+                 int64_t record_ord_subst = -1, std::ostream &stream = std::cerr)
     {
+        uint64_t record_ord = record_ord_subst > -1
+            ? static_cast<uint64_t>(record_ord_subst)
+            : memstream->get_record_ordinal();
         if ((prev_tid_ != -1 && prev_tid_ != memref.instr.tid) ||
             // Print a divider for a skip_instructions gap too.
-            (prev_record_ != 0 &&
-             prev_record_ + 1 < memstream->get_record_ordinal() + ref_adjust))
+            (prev_record_ != 0 && prev_record_ + 1 < record_ord))
             stream << "------------------------------------------------------------\n";
         prev_tid_ = memref.instr.tid;
-        // TODO i#5538: After skipping across a chunk, the record ordinal is
-        // incorrect.  We need raw2trace to insert a record ordinal at chunk entry.
-        prev_record_ = memstream->get_record_ordinal() + ref_adjust;
-        stream << std::setw(9) << (memstream->get_record_ordinal() + ref_adjust)
-               << std::setw(9) << memstream->get_instruction_ordinal() << ": T"
-               << memref.marker.tid << " ";
+        prev_record_ = record_ord;
+        stream << std::setw(9) << record_ord << std::setw(9)
+               << memstream->get_instruction_ordinal() << ": T" << memref.marker.tid
+               << " ";
     }
 
     /* We make this the first field so that dr_standalone_exit() is called after
@@ -141,6 +141,9 @@ class view_t : public analysis_tool_t {
     std::unordered_set<memref_tid_t> printed_header_;
     std::unordered_map<memref_tid_t, uintptr_t> last_window_;
     uintptr_t timestamp_;
+    int64_t timestamp_record_ord_ = -1;
+    int64_t version_record_ord_ = -1;
+    int64_t filetype_record_ord_ = -1;
     bool has_modules_;
     memtrace_stream_t *serial_stream_ = nullptr;
 };