i#5538 memtrace seek, part 8: Add skip_thread_instructions() (#5731)

Adds a new skip_instructions() reader iterator interface. It is a linear walk for every type of reader except a chunked zipfile walking a single thread. Adds a drcachesim command line option -skip_instrs which triggers the analyzer to skip from the start before passing anything to the tool. Refactors the reader_t++ to provide a process_input_entry to update state while skipping. Adds a unit test with an added trace file with a small chunk size. The test checks the view output for every skip value from 0 to over double the chunk size. Leaves several pieces for future work: + Full support for skipping from the midde: the timestamp,cpuid will not always be duplicated with the current code. + Recording the record count in each chunk so we have an accurate count after skipping. + Presenting global headers skipped over as memtrace_stream_t values that tools can query. + Reading the schedule files for serial skipping (or the planned cpu iterator and skipping). + Repeating the timestamp+cpu for non-zipfile skipping. Issue: #5538
DynamoRIO · Nov 11, 2022 · 2c81eb8 · 2c81eb8
1 parent 9fb14a7
commit 2c81eb8
Show file tree

Hide file tree

Showing 16 changed files with 647 additions and 236 deletions.
diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt
@@ -74,13 +74,15 @@ if (ZLIB_FOUND)
       "until then, disabling zip output and fast seeking")
     set(zip_reader "")
     set(zlib_libs ${ZLIB_LIBRARIES})
+    set(ZIP_FOUND OFF)
   else ()
     file(GLOB minizip_srcs "${minizip_dir}/*.c")
     if (NOT WIN32)
       list(REMOVE_ITEM minizip_srcs "${minizip_dir}/iowin32.c")
     endif ()
     add_library(minizip STATIC ${minizip_srcs})
     add_definitions(-DHAS_ZIP)
+    set(ZIP_FOUND ON)
     # We add "minizip/" to avoid collisions with system "zip.h" on Mac.
     include_directories(${minizip_dir}/..)
     DR_export_target(minizip)
@@ -768,6 +770,21 @@ if (BUILD_TESTS)
     use_DynamoRIO_extension(tool.drcacheoff.burst_traceopts drcovlib_static)
 
   endif ()
+
+  if (X86 AND X64 AND ZIP_FOUND)
+    # XXX i#5538: Add trace files for other arches.
+    set(zip_path
+      "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests/drmemtrace.allasm_x86_64.trace.zip")
+    add_executable(tool.drcacheoff.skip_unit_tests tests/skip_unit_tests.cpp)
+    configure_DynamoRIO_standalone(tool.drcacheoff.skip_unit_tests)
+    target_link_libraries(tool.drcacheoff.skip_unit_tests drmemtrace_analyzer
+      drmemtrace_view drmemtrace_raw2trace)
+    use_DynamoRIO_extension(tool.drcacheoff.skip_unit_tests drreg_static)
+    use_DynamoRIO_extension(tool.drcacheoff.skip_unit_tests drcovlib_static)
+    use_DynamoRIO_extension(tool.drcacheoff.skip_unit_tests drdecode)
+    add_test(NAME tool.drcacheoff.skip_unit_tests
+             COMMAND tool.drcacheoff.skip_unit_tests --trace_file ${zip_path})
+  endif ()
 endif ()
 
 ##################################################

diff --git a/clients/drcachesim/analyzer.cpp b/clients/drcachesim/analyzer.cpp
@@ -176,12 +176,13 @@ analyzer_t::init_file_reader(const std::string &trace_path, int verbosity)
 }
 
 analyzer_t::analyzer_t(const std::string &trace_path, analysis_tool_t **tools,
-                       int num_tools, int worker_count)
+                       int num_tools, int worker_count, uint64_t skip_instrs)
     : success_(true)
     , num_tools_(num_tools)
     , tools_(tools)
     , parallel_(true)
     , worker_count_(worker_count)
+    , skip_instrs_(skip_instrs)
 {
     if (!init_file_reader(trace_path)) {
         success_ = false;
@@ -273,9 +274,17 @@ analyzer_t::process_tasks(std::vector<analyzer_shard_data_t *> *tasks)
                 tdata->index, worker_data[i], tdata->iter.get());
         }
         VPRINT(this, 1, "shard_data[0] is %p\n", shard_data[0]);
+        if (skip_instrs_ > 0) {
+            // We skip in each thread.
+            // TODO i#5538: Add top-level header data to memtrace_stream_t for
+            // access by tools, since we're skipping it here.  We considered
+            // not skipping until we see the 1st timestamp but the stream access
+            // approach has other benefits and seems cleaner.
+            (*tdata->iter) = (*tdata->iter).skip_instructions(skip_instrs_);
+        }
         for (; *tdata->iter != *trace_end_; ++(*tdata->iter)) {
+            const memref_t &memref = **tdata->iter;
             for (int i = 0; i < num_tools_; ++i) {
-                const memref_t &memref = **tdata->iter;
                 if (!tools_[i]->parallel_shard_memref(shard_data[i], memref)) {
                     tdata->error = tools_[i]->parallel_shard_error(shard_data[i]);
                     VPRINT(this, 1,
@@ -314,9 +323,13 @@ analyzer_t::run()
     if (!parallel_) {
         if (!start_reading())
             return false;
+        if (skip_instrs_ > 0) {
+            // TODO i#5538: Add top-level header data to memtrace_stream_t; see above.
+            (*serial_trace_iter_) = (*serial_trace_iter_).skip_instructions(skip_instrs_);
+        }
         for (; *serial_trace_iter_ != *trace_end_; ++(*serial_trace_iter_)) {
+            const memref_t &memref = **serial_trace_iter_;
             for (int i = 0; i < num_tools_; ++i) {
-                memref_t memref = **serial_trace_iter_;
                 // We short-circuit and exit on an error to avoid confusion over
                 // the results and avoid wasted continued work.
                 if (!tools_[i]->process_memref(memref)) {

diff --git a/clients/drcachesim/analyzer.h b/clients/drcachesim/analyzer.h
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2016-2020 Google, Inc.  All rights reserved.
+ * Copyright (c) 2016-2022 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -66,7 +66,8 @@ class analyzer_t {
     analyzer_t();
     virtual ~analyzer_t(); /**< Destructor. */
     /** Returns whether the analyzer was created successfully. */
-    virtual bool operator!();
+    virtual bool
+    operator!();
     /** Returns further information on an error in initializing the analyzer. */
     virtual std::string
     get_error_string();
@@ -83,7 +84,7 @@ class analyzer_t {
      * The analyzer calls the initialize() function on each tool before use.
      */
     analyzer_t(const std::string &trace_path, analysis_tool_t **tools, int num_tools,
-               int worker_count = 0);
+               int worker_count = 0, uint64_t skip_instrs = 0);
     /** Launches the analysis process. */
     virtual bool
     run();
@@ -164,6 +165,7 @@ class analyzer_t {
     std::vector<std::vector<analyzer_shard_data_t *>> worker_tasks_;
     int verbosity_ = 0;
     const char *output_prefix_ = "[analyzer]";
+    uint64_t skip_instrs_ = 0;
 };
 
 #endif /* _ANALYZER_H_ */
diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp
@@ -52,6 +52,7 @@
 analyzer_multi_t::analyzer_multi_t()
 {
     worker_count_ = op_jobs.get_value();
+    skip_instrs_ = op_skip_instrs.get_value();
     // Initial measurements show it's sometimes faster to keep the parallel model
     // of using single-file readers but use them sequentially, as opposed to
     // the every-file interleaving reader, but the user can specify -jobs 1, so

diff --git a/clients/drcachesim/common/memtrace_stream.h b/clients/drcachesim/common/memtrace_stream.h
@@ -37,7 +37,7 @@
  * the record and instruction ordinals within the stream, in the presence of
  * skipping: we could add fields to memref but we'd either have to append
  * and have them at different offsets for each type or we'd have to break
- * compatbility to prepend every time we added more; or we could add parameters
+ * compatibility to prepend every time we added more; or we could add parameters
  * to process_memref().  Passing an interface to the init routines seems
  * the simplest and most flexible.
  */

diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp
@@ -487,12 +487,22 @@ droption_t<int>
                    "For simulator types that support it, limits analyis to the single "
                    "thread with the given identifier.  0 enables all threads.");
 
+droption_t<bytesize_t> op_skip_instrs(
+    DROPTION_SCOPE_FRONTEND, "skip_instrs", 0, "Number of instructions to skip",
+    "Specifies the number of instructions to skip in the beginning of the trace "
+    "analysis.  For serial iteration, this number is "
+    "computed just once across the interleaving sequence of all threads; for parallel "
+    "iteration, each thread skips this many insructions.  When built with zipfile "
+    "support, this skipping is optimized and large instruction counts can be quickly "
+    "skipped; this is not the case for -skip_refs.");
+
 droption_t<bytesize_t>
     op_skip_refs(DROPTION_SCOPE_FRONTEND, "skip_refs", 0,
                  "Number of memory references to skip",
-                 "Specifies the number of references to skip "
-                 "in the beginning of the application execution. "
-                 "These memory references are dropped instead of being simulated.");
+                 "Specifies the number of references to skip in the beginning of the "
+                 "application execution. These memory references are dropped instead "
+                 "of being simulated.  This skipping may be slow for large skip values; "
+                 "consider -skip_instrs for a faster method of skipping.");
 
 droption_t<bytesize_t> op_warmup_refs(
     DROPTION_SCOPE_FRONTEND, "warmup_refs", 0,

diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h
@@ -124,6 +124,7 @@ extern droption_t<std::string> op_tracer;
 extern droption_t<std::string> op_tracer_alt;
 extern droption_t<std::string> op_tracer_ops;
 extern droption_t<int> op_only_thread;
+extern droption_t<bytesize_t> op_skip_instrs;
 extern droption_t<bytesize_t> op_skip_refs;
 extern droption_t<bytesize_t> op_warmup_refs;
 extern droption_t<double> op_warmup_fraction;

diff --git a/clients/drcachesim/reader/file_reader.h b/clients/drcachesim/reader/file_reader.h
@@ -38,6 +38,7 @@
 #ifndef _FILE_READER_H_
 #define _FILE_READER_H_ 1
 
+#include <inttypes.h>
 #include <string.h>
 #include <fstream>
 #include <queue>
@@ -55,11 +56,7 @@
 #    ifdef WINDOWS
 #        define ZHEX64_FORMAT_STRING "%016I64x"
 #    else
-#        if defined(__i386__) || defined(__arm__) || defined(__APPLE__)
-#            define ZHEX64_FORMAT_STRING "%016llx"
-#        else
-#            define ZHEX64_FORMAT_STRING "%016lx"
-#        endif
+#        define ZHEX64_FORMAT_STRING "%" PRIx64
 #    endif
 #endif
 
@@ -100,6 +97,35 @@ template <typename T> class file_reader_t : public reader_t {
     virtual bool
     is_complete();
 
+    reader_t &
+    skip_instructions(uint64_t instruction_count) override
+    {
+        if (input_files_.size() > 1) {
+            // TODO i#5538: For fast thread-interleaved (whether serial here or the
+            // forthcoming per-cpu iteration) we need to read in the schedule file(s)
+            // that raw2trace writes out so that we can compute how far to separately
+            // fast-skip in each interleaved thread by calling the per-thread version.
+            // We'll also need to update the memref pid+tid state since we're not
+            // repeating top headers in every thread after a skip.  For now this is a
+            // slow linear walk.
+            return reader_t::skip_instructions(instruction_count);
+        }
+        // If the user asks to skip from the very start, we still need to find the chunk
+        // count marker and drain the header queue.
+        // TODO i#5538: Record all of the header values until the first timestamp
+        // and present them as new memtrace_stream_t interfaces.
+        while (chunk_instr_count_ == 0) {
+            input_entry_ = read_next_entry();
+            process_input_entry();
+        }
+        if (!queues_[0].empty())
+            ERRMSG("Failed to drain header queue\n");
+        bool eof = false;
+        if (!skip_thread_instructions(0, instruction_count, &eof) || eof)
+            at_eof_ = true;
+        return *this;
+    }
+
 protected:
     bool
     read_next_thread_entry(size_t thread_index, OUT trace_entry_t *entry,
@@ -297,6 +323,25 @@ template <typename T> class file_reader_t : public reader_t {
         return nullptr;
     }
 
+    virtual bool
+    skip_thread_instructions(size_t thread_index, uint64_t instruction_count,
+                             OUT bool *eof)
+    {
+        // Default implementation for file types that have no fast seeking and must do a
+        // linear walk.
+        uint64_t stop_count_ = cur_instr_count_ + instruction_count + 1;
+        while (cur_instr_count_ < stop_count_) {
+            if (!read_next_thread_entry(thread_index, &entry_copy_, eof))
+                return false;
+            // Update core state.
+            input_entry_ = &entry_copy_;
+            process_input_entry();
+            // TODO i#5538: Remember the last timestamp+cpu and insert it; share
+            // code with the zipfile reader.
+        }
+        return true;
+    }
+
 private:
     std::string input_path_;
     std::vector<std::string> input_path_list_;