i#6844: Add times_of_interest to drmemtrace scheduler (#6845)

Adds a new feature times_of_interest to the drmemtrace scheduler. This allows specifying regions of interest via timestamps, which cut across all inputs in a workload, providing for a consistent starting point across all threads. The feature requires a cpu schedule file which is used to build a mapping from timestamps to instruction ordinals. The mapping is not perfect due to collapsed consecutive entries and coarse-grained timestamps so interpolation is used in between known points. Adds a unit test. Adds a new analyzer command-line option -skip_to_timestamp which sets a single time-of-interest in the scheduler. Adds a test using the checked-in threadsig x86_64 trace. Fixes #6844
DynamoRIO · Jun 20, 2024 · e11cb67 · e11cb67
1 parent e43cb86
commit e11cb67
Show file tree

Hide file tree

Showing 12 changed files with 580 additions and 86 deletions.
diff --git a/api/docs/release.dox b/api/docs/release.dox
@@ -245,6 +245,8 @@ Further non-compatibility-affecting changes include:
    markers based on the function ID. This filter is enabled by "-filter_keep_func_ids"
    followed by a comma-separated list of function IDs to preserve in the output trace.
    All function markers whose ID is not in the list are removed.
+ - Added -skip_to_timestamp and #dynamorio::drmemtrace::scheduler_tmpl_t::
+   input_workload_t::times_of_interest to the drmemtrace scheduler.
 
 **************************************************
 <hr>

diff --git a/clients/drcachesim/analyzer.cpp b/clients/drcachesim/analyzer.cpp
@@ -248,6 +248,9 @@ analyzer_tmpl_t<RecordType, ReaderType>::init_scheduler(
     if (only_thread != INVALID_THREAD_ID) {
         workload.only_threads.insert(only_thread);
     }
+    if (regions.empty() && skip_to_timestamp_ > 0) {
+        workload.times_of_interest.emplace_back(skip_to_timestamp_, 0);
+    }
     return init_scheduler_common(workload, std::move(options));
 }
 
@@ -308,12 +311,14 @@ analyzer_tmpl_t<RecordType, ReaderType>::init_scheduler_common(
         }
     } else if (parallel_) {
         sched_ops = sched_type_t::make_scheduler_parallel_options(verbosity_);
+        sched_ops.replay_as_traced_istream = options.replay_as_traced_istream;
         sched_ops.read_inputs_in_init = options.read_inputs_in_init;
         if (worker_count_ <= 0)
             worker_count_ = std::thread::hardware_concurrency();
         output_count = worker_count_;
     } else {
         sched_ops = sched_type_t::make_scheduler_serial_options(verbosity_);
+        sched_ops.replay_as_traced_istream = options.replay_as_traced_istream;
         sched_ops.read_inputs_in_init = options.read_inputs_in_init;
         worker_count_ = 1;
         output_count = 1;

diff --git a/clients/drcachesim/analyzer.h b/clients/drcachesim/analyzer.h
@@ -424,6 +424,7 @@ template <typename RecordType, typename ReaderType> class analyzer_tmpl_t {
     int worker_count_;
     const char *output_prefix_ = "[analyzer]";
     uint64_t skip_instrs_ = 0;
+    uint64_t skip_to_timestamp_ = 0;
     uint64_t interval_microseconds_ = 0;
     uint64_t interval_instr_count_ = 0;
     int verbosity_ = 0;

diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp
@@ -351,6 +351,13 @@ analyzer_multi_tmpl_t<RecordType, ReaderType>::analyzer_multi_tmpl_t()
 {
     this->worker_count_ = op_jobs.get_value();
     this->skip_instrs_ = op_skip_instrs.get_value();
+    this->skip_to_timestamp_ = op_skip_to_timestamp.get_value();
+    if (this->skip_instrs_ > 0 && this->skip_to_timestamp_ > 0) {
+        this->error_string_ = "Usage error: only one of -skip_instrs and "
+                              "-skip_to_timestamp can be used at a time";
+        this->success_ = false;
+        return;
+    }
     this->interval_microseconds_ = op_interval_microseconds.get_value();
     this->interval_instr_count_ = op_interval_instr_count.get_value();
     // Initial measurements show it's sometimes faster to keep the parallel model
@@ -437,6 +444,14 @@ analyzer_multi_tmpl_t<RecordType, ReaderType>::analyzer_multi_tmpl_t()
             this->parallel_ = false;
         }
         sched_ops = init_dynamic_schedule();
+    } else if (op_skip_to_timestamp.get_value() > 0) {
+#ifdef HAS_ZIP
+        if (!op_cpu_schedule_file.get_value().empty()) {
+            cpu_schedule_zip_.reset(
+                new zipfile_istream_t(op_cpu_schedule_file.get_value()));
+            sched_ops.replay_as_traced_istream = cpu_schedule_zip_.get();
+        }
+#endif
     }
 
     if (!op_indir.get_value().empty()) {
@@ -521,9 +536,14 @@ analyzer_multi_tmpl_t<RecordType, ReaderType>::init_dynamic_schedule()
         sched_ops.deps = sched_type_t::DEPENDENCY_TIMESTAMPS;
     } else if (!op_cpu_schedule_file.get_value().empty()) {
         cpu_schedule_zip_.reset(new zipfile_istream_t(op_cpu_schedule_file.get_value()));
-        sched_ops.mapping = sched_type_t::MAP_TO_RECORDED_OUTPUT;
-        sched_ops.deps = sched_type_t::DEPENDENCY_TIMESTAMPS;
         sched_ops.replay_as_traced_istream = cpu_schedule_zip_.get();
+        // -cpu_schedule_file is used for two different things: actually replaying,
+        // and just input for -skip_to_timestamp.  Only if -skip_to_timestamp is 0
+        // do we actually replay.
+        if (op_skip_to_timestamp.get_value() == 0) {
+            sched_ops.mapping = sched_type_t::MAP_TO_RECORDED_OUTPUT;
+            sched_ops.deps = sched_type_t::DEPENDENCY_TIMESTAMPS;
+        }
     }
 #endif
     sched_ops.kernel_switch_trace_path = op_sched_switch_file.get_value();

diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp
@@ -549,7 +549,8 @@ droption_t<bytesize_t> op_skip_instrs(
     "Specifies the number of instructions to skip in the beginning of the trace "
     "analysis.  For serial iteration, this number is "
     "computed just once across the interleaving sequence of all threads; for parallel "
-    "iteration, each thread skips this many insructions.  When built with zipfile "
+    "iteration, each thread skips this many instructions (see -skip_to_timestamp for "
+    "an alternative which does align all threads).  When built with zipfile "
     "support, this skipping is optimized and large instruction counts can be quickly "
     "skipped; this is not the case for -skip_refs.");
 
@@ -561,6 +562,17 @@ droption_t<bytesize_t>
                  "of being simulated.  This skipping may be slow for large skip values; "
                  "consider -skip_instrs for a faster method of skipping.");
 
+droption_t<uint64_t> op_skip_to_timestamp(
+    DROPTION_SCOPE_FRONTEND, "skip_to_timestamp", 0, "Timestamp to start at",
+    "Specifies a timestamp to start at, skipping over prior records in the trace. "
+    "This is cross-cutting across all threads.  If the target timestamp is not "
+    "present as a timestamp marker, interpolation is used to approximate the "
+    "target location in each thread.  Only one of this and -skip_instrs can be "
+    "specified.  Requires -cpu_schedule_file to also be specified as a schedule file "
+    "is required to translate the timestamp into per-thread instruction ordinals."
+    "When built with zipfile support, this skipping is optimized and large "
+    "instruction counts can be quickly skipped.");
+
 droption_t<bytesize_t> op_L0_filter_until_instrs(
     DROPTION_SCOPE_CLIENT, "L0_filter_until_instrs", 0,
     "Number of instructions for warmup trace",
@@ -901,9 +913,11 @@ droption_t<std::string> op_replay_file(DROPTION_SCOPE_FRONTEND, "replay_file", "
                                        "Path with stored schedule for replay.");
 droption_t<std::string>
     op_cpu_schedule_file(DROPTION_SCOPE_FRONTEND, "cpu_schedule_file", "",
-                         "Path with stored as-traced schedule for replay",
+                         "Path to as-traced schedule for replay or skip-to-timestamp",
                          "Applies to -core_sharded and -core_serial. "
-                         "Path with stored as-traced schedule for replay.");
+                         "Path with stored as-traced schedule for replay.  If specified "
+                         "with a non-zero -skip_to_timestamp, there is no replay "
+                         "and instead the file is used for the skip request.");
 #endif
 droption_t<std::string> op_sched_switch_file(
     DROPTION_SCOPE_FRONTEND, "sched_switch_file", "",

diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h
@@ -167,6 +167,7 @@ extern dynamorio::droption::droption_t<dynamorio::droption::bytesize_t>
 extern dynamorio::droption::droption_t<int> op_only_thread;
 extern dynamorio::droption::droption_t<dynamorio::droption::bytesize_t> op_skip_instrs;
 extern dynamorio::droption::droption_t<dynamorio::droption::bytesize_t> op_skip_refs;
+extern dynamorio::droption::droption_t<uint64_t> op_skip_to_timestamp;
 extern dynamorio::droption::droption_t<dynamorio::droption::bytesize_t> op_warmup_refs;
 extern dynamorio::droption::droption_t<double> op_warmup_fraction;
 extern dynamorio::droption::droption_t<dynamorio::droption::bytesize_t> op_sim_refs;

diff --git a/clients/drcachesim/reader/zipfile_file_reader.cpp b/clients/drcachesim/reader/zipfile_file_reader.cpp
@@ -185,7 +185,7 @@ file_reader_t<zipfile_reader_t>::skip_instructions(uint64_t instruction_count)
 {
     if (instruction_count == 0)
         return *this;
-    VPRINT(this, 2, "Skipping %" PRIi64 " instrs in %s\n", instruction_count,
+    VPRINT(this, 2, "Skipping %" PRIu64 " instrs in %s\n", instruction_count,
            input_file_.path.c_str());
     if (!pre_skip_instructions())
         return *this;
@@ -199,7 +199,7 @@ file_reader_t<zipfile_reader_t>::skip_instructions(uint64_t instruction_count)
     // know the chunk names to use with a single unzLocateFile.
     uint64_t stop_count = cur_instr_count_ + instruction_count + 1;
     VPRINT(this, 2,
-           "stop=%" PRIi64 " cur=%" PRIi64 " chunk=%" PRIi64 " est=%" PRIi64 "\n",
+           "stop=%" PRIu64 " cur=%" PRIu64 " chunk=%" PRIu64 " est=%" PRIu64 "\n",
            stop_count, cur_instr_count_, chunk_instr_count_,
            cur_instr_count_ +
                (chunk_instr_count_ - (cur_instr_count_ % chunk_instr_count_)));
@@ -227,11 +227,11 @@ file_reader_t<zipfile_reader_t>::skip_instructions(uint64_t instruction_count)
             return *this;
         }
         cur_instr_count_ += chunk_instr_count_ - (cur_instr_count_ % chunk_instr_count_);
-        VPRINT(this, 2, "At %" PRIi64 " instrs at start of new chunk\n",
+        VPRINT(this, 2, "At %" PRIu64 " instrs at start of new chunk\n",
                cur_instr_count_);
         VPRINT(this, 2,
-               "zip chunk stop=%" PRIi64 " cur=%" PRIi64 " chunk=%" PRIi64
-               " end-of-chunk=%" PRIi64 "\n",
+               "zip chunk stop=%" PRIu64 " cur=%" PRIu64 " chunk=%" PRIu64
+               " end-of-chunk=%" PRIu64 "\n",
                stop_count, cur_instr_count_, chunk_instr_count_,
                cur_instr_count_ +
                    (chunk_instr_count_ - (cur_instr_count_ % chunk_instr_count_)));