diff --git a/api/docs/release.dox b/api/docs/release.dox
index d1fde925f0a..0eba0910da3 100644
--- a/api/docs/release.dox
+++ b/api/docs/release.dox
@@ -245,6 +245,8 @@ Further non-compatibility-affecting changes include:
    markers based on the function ID. This filter is enabled by "-filter_keep_func_ids"
    followed by a comma-separated list of function IDs to preserve in the output trace.
    All function markers whose ID is not in the list are removed.
+ - Added -skip_to_timestamp and #dynamorio::drmemtrace::scheduler_tmpl_t::
+   input_workload_t::times_of_interest to the drmemtrace scheduler.
 
 **************************************************
 <hr>
diff --git a/clients/drcachesim/analyzer.cpp b/clients/drcachesim/analyzer.cpp
index dff5bb535c3..6ffe60867dd 100644
--- a/clients/drcachesim/analyzer.cpp
+++ b/clients/drcachesim/analyzer.cpp
@@ -248,6 +248,9 @@ analyzer_tmpl_t<RecordType, ReaderType>::init_scheduler(
     if (only_thread != INVALID_THREAD_ID) {
         workload.only_threads.insert(only_thread);
     }
+    if (regions.empty() && skip_to_timestamp_ > 0) {
+        workload.times_of_interest.emplace_back(skip_to_timestamp_, 0);
+    }
     return init_scheduler_common(workload, std::move(options));
 }
 
@@ -308,12 +311,14 @@ analyzer_tmpl_t<RecordType, ReaderType>::init_scheduler_common(
         }
     } else if (parallel_) {
         sched_ops = sched_type_t::make_scheduler_parallel_options(verbosity_);
+        sched_ops.replay_as_traced_istream = options.replay_as_traced_istream;
         sched_ops.read_inputs_in_init = options.read_inputs_in_init;
         if (worker_count_ <= 0)
             worker_count_ = std::thread::hardware_concurrency();
         output_count = worker_count_;
     } else {
         sched_ops = sched_type_t::make_scheduler_serial_options(verbosity_);
+        sched_ops.replay_as_traced_istream = options.replay_as_traced_istream;
         sched_ops.read_inputs_in_init = options.read_inputs_in_init;
         worker_count_ = 1;
         output_count = 1;
diff --git a/clients/drcachesim/analyzer.h b/clients/drcachesim/analyzer.h
index 43d8ce12083..794ea399eb4 100644
--- a/clients/drcachesim/analyzer.h
+++ b/clients/drcachesim/analyzer.h
@@ -424,6 +424,7 @@ template <typename RecordType, typename ReaderType> class analyzer_tmpl_t {
     int worker_count_;
     const char *output_prefix_ = "[analyzer]";
     uint64_t skip_instrs_ = 0;
+    uint64_t skip_to_timestamp_ = 0;
     uint64_t interval_microseconds_ = 0;
     uint64_t interval_instr_count_ = 0;
     int verbosity_ = 0;
diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp
index c0a3cb57d5a..1389afcfa68 100644
--- a/clients/drcachesim/analyzer_multi.cpp
+++ b/clients/drcachesim/analyzer_multi.cpp
@@ -351,6 +351,13 @@ analyzer_multi_tmpl_t<RecordType, ReaderType>::analyzer_multi_tmpl_t()
 {
     this->worker_count_ = op_jobs.get_value();
     this->skip_instrs_ = op_skip_instrs.get_value();
+    this->skip_to_timestamp_ = op_skip_to_timestamp.get_value();
+    if (this->skip_instrs_ > 0 && this->skip_to_timestamp_ > 0) {
+        this->error_string_ = "Usage error: only one of -skip_instrs and "
+                              "-skip_to_timestamp can be used at a time";
+        this->success_ = false;
+        return;
+    }
     this->interval_microseconds_ = op_interval_microseconds.get_value();
     this->interval_instr_count_ = op_interval_instr_count.get_value();
     // Initial measurements show it's sometimes faster to keep the parallel model
@@ -437,6 +444,14 @@ analyzer_multi_tmpl_t<RecordType, ReaderType>::analyzer_multi_tmpl_t()
             this->parallel_ = false;
         }
         sched_ops = init_dynamic_schedule();
+    } else if (op_skip_to_timestamp.get_value() > 0) {
+#ifdef HAS_ZIP
+        if (!op_cpu_schedule_file.get_value().empty()) {
+            cpu_schedule_zip_.reset(
+                new zipfile_istream_t(op_cpu_schedule_file.get_value()));
+            sched_ops.replay_as_traced_istream = cpu_schedule_zip_.get();
+        }
+#endif
     }
 
     if (!op_indir.get_value().empty()) {
@@ -521,9 +536,14 @@ analyzer_multi_tmpl_t<RecordType, ReaderType>::init_dynamic_schedule()
         sched_ops.deps = sched_type_t::DEPENDENCY_TIMESTAMPS;
     } else if (!op_cpu_schedule_file.get_value().empty()) {
         cpu_schedule_zip_.reset(new zipfile_istream_t(op_cpu_schedule_file.get_value()));
-        sched_ops.mapping = sched_type_t::MAP_TO_RECORDED_OUTPUT;
-        sched_ops.deps = sched_type_t::DEPENDENCY_TIMESTAMPS;
         sched_ops.replay_as_traced_istream = cpu_schedule_zip_.get();
+        // -cpu_schedule_file is used for two different things: actually replaying,
+        // and just input for -skip_to_timestamp.  Only if -skip_to_timestamp is 0
+        // do we actually replay.
+        if (op_skip_to_timestamp.get_value() == 0) {
+            sched_ops.mapping = sched_type_t::MAP_TO_RECORDED_OUTPUT;
+            sched_ops.deps = sched_type_t::DEPENDENCY_TIMESTAMPS;
+        }
     }
 #endif
     sched_ops.kernel_switch_trace_path = op_sched_switch_file.get_value();
diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp
index 6d2c51e913e..34291d45e99 100644
--- a/clients/drcachesim/common/options.cpp
+++ b/clients/drcachesim/common/options.cpp
@@ -549,7 +549,8 @@ droption_t<bytesize_t> op_skip_instrs(
     "Specifies the number of instructions to skip in the beginning of the trace "
     "analysis.  For serial iteration, this number is "
     "computed just once across the interleaving sequence of all threads; for parallel "
-    "iteration, each thread skips this many insructions.  When built with zipfile "
+    "iteration, each thread skips this many instructions (see -skip_to_timestamp for "
+    "an alternative which does align all threads).  When built with zipfile "
     "support, this skipping is optimized and large instruction counts can be quickly "
     "skipped; this is not the case for -skip_refs.");
 
@@ -561,6 +562,17 @@ droption_t<bytesize_t>
                  "of being simulated.  This skipping may be slow for large skip values; "
                  "consider -skip_instrs for a faster method of skipping.");
 
+droption_t<uint64_t> op_skip_to_timestamp(
+    DROPTION_SCOPE_FRONTEND, "skip_to_timestamp", 0, "Timestamp to start at",
+    "Specifies a timestamp to start at, skipping over prior records in the trace. "
+    "This is cross-cutting across all threads.  If the target timestamp is not "
+    "present as a timestamp marker, interpolation is used to approximate the "
+    "target location in each thread.  Only one of this and -skip_instrs can be "
+    "specified.  Requires -cpu_schedule_file to also be specified as a schedule file "
+    "is required to translate the timestamp into per-thread instruction ordinals."
+    "When built with zipfile support, this skipping is optimized and large "
+    "instruction counts can be quickly skipped.");
+
 droption_t<bytesize_t> op_L0_filter_until_instrs(
     DROPTION_SCOPE_CLIENT, "L0_filter_until_instrs", 0,
     "Number of instructions for warmup trace",
@@ -901,9 +913,11 @@ droption_t<std::string> op_replay_file(DROPTION_SCOPE_FRONTEND, "replay_file", "
                                        "Path with stored schedule for replay.");
 droption_t<std::string>
     op_cpu_schedule_file(DROPTION_SCOPE_FRONTEND, "cpu_schedule_file", "",
-                         "Path with stored as-traced schedule for replay",
+                         "Path to as-traced schedule for replay or skip-to-timestamp",
                          "Applies to -core_sharded and -core_serial. "
-                         "Path with stored as-traced schedule for replay.");
+                         "Path with stored as-traced schedule for replay.  If specified "
+                         "with a non-zero -skip_to_timestamp, there is no replay "
+                         "and instead the file is used for the skip request.");
 #endif
 droption_t<std::string> op_sched_switch_file(
     DROPTION_SCOPE_FRONTEND, "sched_switch_file", "",
diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h
index 20286b97a7c..a5bd3b3feaa 100644
--- a/clients/drcachesim/common/options.h
+++ b/clients/drcachesim/common/options.h
@@ -167,6 +167,7 @@ extern dynamorio::droption::droption_t<dynamorio::droption::bytesize_t>
 extern dynamorio::droption::droption_t<int> op_only_thread;
 extern dynamorio::droption::droption_t<dynamorio::droption::bytesize_t> op_skip_instrs;
 extern dynamorio::droption::droption_t<dynamorio::droption::bytesize_t> op_skip_refs;
+extern dynamorio::droption::droption_t<uint64_t> op_skip_to_timestamp;
 extern dynamorio::droption::droption_t<dynamorio::droption::bytesize_t> op_warmup_refs;
 extern dynamorio::droption::droption_t<double> op_warmup_fraction;
 extern dynamorio::droption::droption_t<dynamorio::droption::bytesize_t> op_sim_refs;
diff --git a/clients/drcachesim/reader/zipfile_file_reader.cpp b/clients/drcachesim/reader/zipfile_file_reader.cpp
index acd1aebd9bd..7d03dd324a4 100644
--- a/clients/drcachesim/reader/zipfile_file_reader.cpp
+++ b/clients/drcachesim/reader/zipfile_file_reader.cpp
@@ -185,7 +185,7 @@ file_reader_t<zipfile_reader_t>::skip_instructions(uint64_t instruction_count)
 {
     if (instruction_count == 0)
         return *this;
-    VPRINT(this, 2, "Skipping %" PRIi64 " instrs in %s\n", instruction_count,
+    VPRINT(this, 2, "Skipping %" PRIu64 " instrs in %s\n", instruction_count,
            input_file_.path.c_str());
     if (!pre_skip_instructions())
         return *this;
@@ -199,7 +199,7 @@ file_reader_t<zipfile_reader_t>::skip_instructions(uint64_t instruction_count)
     // know the chunk names to use with a single unzLocateFile.
     uint64_t stop_count = cur_instr_count_ + instruction_count + 1;
     VPRINT(this, 2,
-           "stop=%" PRIi64 " cur=%" PRIi64 " chunk=%" PRIi64 " est=%" PRIi64 "\n",
+           "stop=%" PRIu64 " cur=%" PRIu64 " chunk=%" PRIu64 " est=%" PRIu64 "\n",
            stop_count, cur_instr_count_, chunk_instr_count_,
            cur_instr_count_ +
                (chunk_instr_count_ - (cur_instr_count_ % chunk_instr_count_)));
@@ -227,11 +227,11 @@ file_reader_t<zipfile_reader_t>::skip_instructions(uint64_t instruction_count)
             return *this;
         }
         cur_instr_count_ += chunk_instr_count_ - (cur_instr_count_ % chunk_instr_count_);
-        VPRINT(this, 2, "At %" PRIi64 " instrs at start of new chunk\n",
+        VPRINT(this, 2, "At %" PRIu64 " instrs at start of new chunk\n",
                cur_instr_count_);
         VPRINT(this, 2,
-               "zip chunk stop=%" PRIi64 " cur=%" PRIi64 " chunk=%" PRIi64
-               " end-of-chunk=%" PRIi64 "\n",
+               "zip chunk stop=%" PRIu64 " cur=%" PRIu64 " chunk=%" PRIu64
+               " end-of-chunk=%" PRIu64 "\n",
                stop_count, cur_instr_count_, chunk_instr_count_,
                cur_instr_count_ +
                    (chunk_instr_count_ - (cur_instr_count_ % chunk_instr_count_)));
diff --git a/clients/drcachesim/scheduler/scheduler.cpp b/clients/drcachesim/scheduler/scheduler.cpp
index b7bf1170d32..fc90a723ac9 100644
--- a/clients/drcachesim/scheduler/scheduler.cpp
+++ b/clients/drcachesim/scheduler/scheduler.cpp
@@ -41,6 +41,7 @@
 #include <cstdio>
 #include <iomanip>
 #include <limits>
+#include <map>
 #include <memory>
 #include <mutex>
 #include <ostream>
@@ -698,6 +699,18 @@ scheduler_tmpl_t<RecordType, ReaderType>::init(
                 tid2input_[workload_tid_t(workload_idx, it.first)] = it.second;
             }
         }
+        if (!workload.times_of_interest.empty()) {
+            for (const auto &modifiers : workload.thread_modifiers) {
+                if (!modifiers.regions_of_interest.empty()) {
+                    // We do not support mixing with other ROI specifiers.
+                    return STATUS_ERROR_INVALID_PARAMETER;
+                }
+            }
+            sched_type_t::scheduler_status_t status =
+                create_regions_from_times(workload_tids, workload);
+            if (status != sched_type_t::STATUS_SUCCESS)
+                return STATUS_ERROR_INVALID_PARAMETER;
+        }
         for (const auto &modifiers : workload.thread_modifiers) {
             if (modifiers.struct_size != sizeof(input_thread_info_t))
                 return STATUS_ERROR_INVALID_PARAMETER;
@@ -724,6 +737,8 @@ scheduler_tmpl_t<RecordType, ReaderType>::init(
                 input.priority = modifiers.priority;
                 for (size_t i = 0; i < modifiers.regions_of_interest.size(); ++i) {
                     const auto &range = modifiers.regions_of_interest[i];
+                    VPRINT(this, 3, "ROI #%zu for input %d: [%" PRIu64 ", %" PRIu64 ")\n",
+                           i, index, range.start_instruction, range.stop_instruction);
                     if (range.start_instruction == 0 ||
                         (range.stop_instruction < range.start_instruction &&
                          range.stop_instruction != 0))
@@ -731,8 +746,10 @@ scheduler_tmpl_t<RecordType, ReaderType>::init(
                     if (i == 0)
                         continue;
                     if (range.start_instruction <=
-                        modifiers.regions_of_interest[i - 1].stop_instruction)
+                        modifiers.regions_of_interest[i - 1].stop_instruction) {
+                        error_string_ = "gap required between regions of interest";
                         return STATUS_ERROR_INVALID_PARAMETER;
+                    }
                 }
                 input.regions_of_interest = modifiers.regions_of_interest;
             }
@@ -852,7 +869,8 @@ scheduler_tmpl_t<RecordType, ReaderType>::set_initial_schedule(
             // benefit to reading the schedule file.  The analyzer serial mode or other
             // special cases of one output don't set the replay_as_traced_istream
             // field.)
-            sched_type_t::scheduler_status_t status = read_traced_schedule();
+            sched_type_t::scheduler_status_t status =
+                read_and_instantiate_traced_schedule();
             if (status != sched_type_t::STATUS_SUCCESS)
                 return STATUS_ERROR_INVALID_PARAMETER;
             // Now leverage the regular replay code.
@@ -1066,37 +1084,9 @@ scheduler_tmpl_t<RecordType, ReaderType>::read_recorded_schedule()
 
 template <typename RecordType, typename ReaderType>
 typename scheduler_tmpl_t<RecordType, ReaderType>::scheduler_status_t
-scheduler_tmpl_t<RecordType, ReaderType>::read_traced_schedule()
+scheduler_tmpl_t<RecordType, ReaderType>::read_and_instantiate_traced_schedule()
 {
-    if (options_.replay_as_traced_istream == nullptr)
-        return STATUS_ERROR_INVALID_PARAMETER;
-
-    schedule_entry_t entry(0, 0, 0, 0);
-    // See comment in read_recorded_schedule() on our assumption that we can
-    // easily fit the whole context switch sequence in memory.  This cpu_schedule
-    // file has an entry per timestamp, though, even for consecutive ones on the same
-    // core, so it uses more memory.
-    // We do not have a subfile listing feature in archive_istream_t, but we can
-    // read sequentially as each record has a cpu field.
-    // This schedule_entry_t format doesn't have the stop instruction ordinal (as it was
-    // designed for skip targets only), so we take two passes to get that information.
-    // If we do find memory is an issue we could add a stop field to schedule_entry_t
-    // and collapse as we go, saving memory.
-    // We also need to translate the thread and cpu id values into 0-based ordinals.
-    std::unordered_map<memref_tid_t, input_ordinal_t> tid2input;
-    for (int i = 0; i < static_cast<input_ordinal_t>(inputs_.size()); ++i) {
-        tid2input[inputs_[i].tid] = i;
-    }
     std::vector<std::set<uint64_t>> start2stop(inputs_.size());
-    // We initially number the outputs according to their order in the file, and then
-    // sort by the stored cpuid below.
-    // XXX i#6726: Should we support some direction from the user on this?  Simulation
-    // may want to preserve the NUMA relationships and may need to set up its simulated
-    // cores at init time, so it would prefer to partition by output stream identifier.
-    // Maybe we could at least add the proposed memtrace_stream_t query for cpuid and
-    // let it be called even before reading any records at all?
-    output_ordinal_t cur_output = 0;
-    uint64_t cur_cpu = std::numeric_limits<uint64_t>::max();
     // We also want to collapse same-cpu consecutive records so we start with
     // a temporary local vector.
     std::vector<std::vector<schedule_output_tracker_t>> all_sched(outputs_.size());
@@ -1105,45 +1095,8 @@ scheduler_tmpl_t<RecordType, ReaderType>::read_traced_schedule()
     // These hold entries added in the on-disk (unsorted) order.
     std::vector<output_ordinal_t> disk_ord2index; // Initially [i] holds i.
     std::vector<uint64_t> disk_ord2cpuid;         // [i] holds cpuid for entry i.
-    while (options_.replay_as_traced_istream->read(reinterpret_cast<char *>(&entry),
-                                                   sizeof(entry))) {
-        if (entry.cpu != cur_cpu) {
-            // This is a zipfile component boundary: one conmponent per cpu.
-            if (cur_cpu != std::numeric_limits<uint64_t>::max()) {
-                ++cur_output;
-                if (cur_output >= static_cast<int>(outputs_.size())) {
-                    error_string_ = "replay_as_traced_istream cpu count != output count";
-                    return STATUS_ERROR_INVALID_PARAMETER;
-                }
-            }
-            cur_cpu = entry.cpu;
-            disk_ord2cpuid.push_back(cur_cpu);
-            disk_ord2index.push_back(cur_output);
-        }
-        input_ordinal_t input = tid2input[entry.thread];
-        // We'll fill in the stop ordinal in our second pass below.
-        uint64_t start = entry.start_instruction;
-        uint64_t timestamp = entry.timestamp;
-        // Some entries have no instructions (there is an entry for each timestamp, and
-        // a signal can come in after a prior timestamp with no intervening instrs).
-        if (!all_sched[cur_output].empty() &&
-            input == all_sched[cur_output].back().input &&
-            start == all_sched[cur_output].back().start_instruction) {
-            VPRINT(this, 3,
-                   "Output #%d: as-read segment #%zu has no instructions: skipping\n",
-                   cur_output, all_sched[cur_output].size() - 1);
-            continue;
-        }
-        all_sched[cur_output].emplace_back(true, input, start, timestamp);
-        start2stop[input].insert(start);
-        input_sched[input].emplace_back(cur_output, all_sched[cur_output].size() - 1,
-                                        start, timestamp);
-    }
-    sched_type_t::scheduler_status_t res =
-        check_and_fix_modulo_problem_in_schedule(input_sched, start2stop, all_sched);
-    if (res != sched_type_t::STATUS_SUCCESS)
-        return res;
-    res = remove_zero_instruction_segments(input_sched, all_sched);
+    sched_type_t::scheduler_status_t res = read_traced_schedule(
+        input_sched, start2stop, all_sched, disk_ord2index, disk_ord2cpuid);
     if (res != sched_type_t::STATUS_SUCCESS)
         return res;
     // Sort by cpuid to get a more natural ordering.
@@ -1255,6 +1208,212 @@ scheduler_tmpl_t<RecordType, ReaderType>::read_traced_schedule()
     return STATUS_SUCCESS;
 }
 
+template <typename RecordType, typename ReaderType>
+typename scheduler_tmpl_t<RecordType, ReaderType>::scheduler_status_t
+scheduler_tmpl_t<RecordType, ReaderType>::create_regions_from_times(
+    const std::unordered_map<memref_tid_t, int> &workload_tids,
+    input_workload_t &workload)
+{
+    // First, read from the as-traced schedule file into data structures shared with
+    // replay-as-traced.
+    std::vector<std::vector<schedule_input_tracker_t>> input_sched(inputs_.size());
+    // These are all unused.
+    std::vector<std::set<uint64_t>> start2stop(inputs_.size());
+    std::vector<std::vector<schedule_output_tracker_t>> all_sched;
+    std::vector<output_ordinal_t> disk_ord2index;
+    std::vector<uint64_t> disk_ord2cpuid;
+    sched_type_t::scheduler_status_t res = read_traced_schedule(
+        input_sched, start2stop, all_sched, disk_ord2index, disk_ord2cpuid);
+    if (res != sched_type_t::STATUS_SUCCESS)
+        return res;
+    // Do not allow a replay mode to start later.
+    options_.replay_as_traced_istream = nullptr;
+
+    // Now create an interval tree of timestamps (with instr ordinals as payloads)
+    // for each input. As our intervals do not overlap and have no gaps we need
+    // no size, just the start address key.
+    std::vector<std::map<uint64_t, uint64_t>> time_tree(inputs_.size());
+    for (int input_idx = 0; input_idx < static_cast<input_ordinal_t>(inputs_.size());
+         ++input_idx) {
+        for (int sched_idx = 0;
+             sched_idx < static_cast<int>(input_sched[input_idx].size()); ++sched_idx) {
+            schedule_input_tracker_t &sched = input_sched[input_idx][sched_idx];
+            VPRINT(this, 4, "as-read: input=%d start=%" PRId64 " time=%" PRId64 "\n",
+                   input_idx, sched.start_instruction, sched.timestamp);
+            time_tree[input_idx][sched.timestamp] = sched.start_instruction;
+        }
+    }
+
+    // Finally, convert the requested time ranges into instr ordinal ranges.
+    for (const auto &tid_it : workload_tids) {
+        std::vector<range_t> instr_ranges;
+        bool entire_tid = false;
+        for (const auto &times : workload.times_of_interest) {
+            uint64_t instr_start = 0, instr_end = 0;
+            bool has_start = time_tree_lookup(time_tree[tid_it.second],
+                                              times.start_timestamp, instr_start);
+            bool has_end;
+            if (times.stop_timestamp == 0)
+                has_end = true;
+            else {
+                has_end = time_tree_lookup(time_tree[tid_it.second], times.stop_timestamp,
+                                           instr_end);
+            }
+            if (has_start && has_end && instr_start == instr_end) {
+                if (instr_start == 0 && instr_end == 0) {
+                    entire_tid = true;
+                } else {
+                    ++instr_end;
+                }
+            }
+            // If !has_start we'll include from 0.  The start timestamp will make it be
+            // scheduled last but there will be no delay if no other thread is available.
+            // If !has_end, instr_end will still be 0 which means the end of the trace.
+            if (instr_start > 0 || instr_end > 0) {
+                if (!instr_ranges.empty() &&
+                    (instr_ranges.back().stop_instruction >= instr_start ||
+                     instr_ranges.back().stop_instruction == 0)) {
+                    error_string_ =
+                        "times_of_interest are too close together: "
+                        "corresponding instruction ordinals are overlapping or adjacent";
+                    return STATUS_ERROR_INVALID_PARAMETER;
+                }
+                instr_ranges.emplace_back(instr_start, instr_end);
+                VPRINT(this, 2,
+                       "tid %" PRIu64 " overlaps with times_of_interest [%" PRIu64
+                       ", %" PRIu64 ") @ [%" PRIu64 ", %" PRIu64 ")\n",
+                       tid_it.first, times.start_timestamp, times.stop_timestamp,
+                       instr_start, instr_end);
+            }
+        }
+        if (!entire_tid && instr_ranges.empty()) {
+            // Exclude this thread completely.  We've already created its
+            // inputs_ entry with cross-indices stored in other structures
+            // so instead of trying to erase it we give it a max start point.
+            VPRINT(this, 2,
+                   "tid %" PRIu64 " has no overlap with any times_of_interest entry\n",
+                   tid_it.first);
+            instr_ranges.emplace_back(std::numeric_limits<uint64_t>::max(), 0);
+        }
+        if (entire_tid) {
+            // No range is needed.
+        } else {
+            workload.thread_modifiers.emplace_back(instr_ranges);
+            workload.thread_modifiers.back().tids.emplace_back(tid_it.first);
+        }
+    }
+    return sched_type_t::STATUS_SUCCESS;
+}
+
+template <typename RecordType, typename ReaderType>
+bool
+scheduler_tmpl_t<RecordType, ReaderType>::time_tree_lookup(
+    const std::map<uint64_t, uint64_t> &tree, uint64_t time, uint64_t &ordinal)
+{
+    auto it = tree.upper_bound(time);
+    if (it == tree.begin() || it == tree.end()) {
+        // We do not have a timestamp in the footer, so we assume any time
+        // past the final known timestamp is too far and do not try to
+        // fit into the final post-last-timestamp sequence.
+        return false;
+    }
+    uint64_t upper_time = it->first;
+    uint64_t upper_ord = it->second;
+    it--;
+    uint64_t lower_time = it->first;
+    uint64_t lower_ord = it->second;
+    double fraction = (time - lower_time) / static_cast<double>(upper_time - lower_time);
+    double interpolate = lower_ord + fraction * (upper_ord - lower_ord);
+    ordinal = static_cast<uint64_t>(interpolate);
+    VPRINT(this, 3,
+           "time2ordinal: time %" PRIu64 " => times [%" PRIu64 ", %" PRIu64
+           ") ords [%" PRIu64 ", %" PRIu64 ") => interpolated %" PRIu64 "\n",
+           time, lower_time, upper_time, lower_ord, upper_ord, ordinal);
+    return true;
+}
+
+template <typename RecordType, typename ReaderType>
+typename scheduler_tmpl_t<RecordType, ReaderType>::scheduler_status_t
+scheduler_tmpl_t<RecordType, ReaderType>::read_traced_schedule(
+    std::vector<std::vector<schedule_input_tracker_t>> &input_sched,
+    std::vector<std::set<uint64_t>> &start2stop,
+    std::vector<std::vector<schedule_output_tracker_t>> &all_sched,
+    std::vector<output_ordinal_t> &disk_ord2index, std::vector<uint64_t> &disk_ord2cpuid)
+{
+    if (options_.replay_as_traced_istream == nullptr) {
+        error_string_ = "Missing as-traced istream";
+        return STATUS_ERROR_INVALID_PARAMETER;
+    }
+
+    schedule_entry_t entry(0, 0, 0, 0);
+    // See comment in read_recorded_schedule() on our assumption that we can
+    // easily fit the whole context switch sequence in memory.  This cpu_schedule
+    // file has an entry per timestamp, though, even for consecutive ones on the same
+    // core, so it uses more memory.
+    // We do not have a subfile listing feature in archive_istream_t, but we can
+    // read sequentially as each record has a cpu field.
+    // This schedule_entry_t format doesn't have the stop instruction ordinal (as it was
+    // designed for skip targets only), so we take two passes to get that information.
+    // If we do find memory is an issue we could add a stop field to schedule_entry_t
+    // and collapse as we go, saving memory.
+    // We also need to translate the thread and cpu id values into 0-based ordinals.
+    std::unordered_map<memref_tid_t, input_ordinal_t> tid2input;
+    for (int i = 0; i < static_cast<input_ordinal_t>(inputs_.size()); ++i) {
+        tid2input[inputs_[i].tid] = i;
+    }
+    // We initially number the outputs according to their order in the file, and then
+    // sort by the stored cpuid below.
+    // XXX i#6726: Should we support some direction from the user on this?  Simulation
+    // may want to preserve the NUMA relationships and may need to set up its simulated
+    // cores at init time, so it would prefer to partition by output stream identifier.
+    // Maybe we could at least add the proposed memtrace_stream_t query for cpuid and
+    // let it be called even before reading any records at all?
+    output_ordinal_t cur_output = 0;
+    uint64_t cur_cpu = std::numeric_limits<uint64_t>::max();
+    while (options_.replay_as_traced_istream->read(reinterpret_cast<char *>(&entry),
+                                                   sizeof(entry))) {
+        if (entry.cpu != cur_cpu) {
+            // This is a zipfile component boundary: one conmponent per cpu.
+            if (cur_cpu != std::numeric_limits<uint64_t>::max()) {
+                ++cur_output;
+                if (options_.mapping == MAP_TO_RECORDED_OUTPUT && !outputs_.empty() &&
+                    cur_output >= static_cast<int>(outputs_.size())) {
+                    error_string_ = "replay_as_traced_istream cpu count != output count";
+                    return STATUS_ERROR_INVALID_PARAMETER;
+                }
+            }
+            cur_cpu = entry.cpu;
+            disk_ord2cpuid.push_back(cur_cpu);
+            disk_ord2index.push_back(cur_output);
+        }
+        input_ordinal_t input = tid2input[entry.thread];
+        // The caller must fill in the stop ordinal in a second pass.
+        uint64_t start = entry.start_instruction;
+        uint64_t timestamp = entry.timestamp;
+        // Some entries have no instructions (there is an entry for each timestamp, and
+        // a signal can come in after a prior timestamp with no intervening instrs).
+        if (all_sched.size() < static_cast<size_t>(cur_output + 1))
+            all_sched.resize(cur_output + 1);
+        if (!all_sched[cur_output].empty() &&
+            input == all_sched[cur_output].back().input &&
+            start == all_sched[cur_output].back().start_instruction) {
+            VPRINT(this, 3,
+                   "Output #%d: as-read segment #%zu has no instructions: skipping\n",
+                   cur_output, all_sched[cur_output].size() - 1);
+            continue;
+        }
+        all_sched[cur_output].emplace_back(true, input, start, timestamp);
+        start2stop[input].insert(start);
+        input_sched[input].emplace_back(cur_output, all_sched[cur_output].size() - 1,
+                                        start, timestamp);
+    }
+    sched_type_t::scheduler_status_t res =
+        check_and_fix_modulo_problem_in_schedule(input_sched, start2stop, all_sched);
+    if (res != sched_type_t::STATUS_SUCCESS)
+        return res;
+    return remove_zero_instruction_segments(input_sched, all_sched);
+}
+
 template <typename RecordType, typename ReaderType>
 typename scheduler_tmpl_t<RecordType, ReaderType>::scheduler_status_t
 scheduler_tmpl_t<RecordType, ReaderType>::remove_zero_instruction_segments(
@@ -1888,7 +2047,7 @@ scheduler_tmpl_t<RecordType, ReaderType>::advance_region_of_interest(
     if (input.in_cur_region && cur_instr >= cur_range.start_instruction - 1)
         return sched_type_t::STATUS_OK;
 
-    VPRINT(this, 2, "skipping from %" PRId64 " to %" PRId64 " instrs for ROI\n",
+    VPRINT(this, 2, "skipping from %" PRIu64 " to %" PRIu64 " instrs for ROI\n",
            cur_instr, cur_range.start_instruction);
     if (options_.schedule_record_ostream != nullptr) {
         sched_type_t::stream_status_t status = close_schedule_segment(output, input);
@@ -1943,10 +2102,17 @@ scheduler_tmpl_t<RecordType, ReaderType>::skip_instructions(output_ordinal_t out
     clear_input_queue(input);
     input.reader->skip_instructions(skip_amount);
     if (*input.reader == *input.reader_end) {
-        // Raise error because the input region is out of bounds.
-        VPRINT(this, 2, "skip_instructions: input=%d skip out of bounds\n", input.index);
         mark_input_eof(input);
-        return sched_type_t::STATUS_REGION_INVALID;
+        // Raise error because the input region is out of bounds, unless the max
+        // was used which we ourselves use internally for times_of_interest.
+        if (skip_amount >= std::numeric_limits<uint64_t>::max() - 2) {
+            VPRINT(this, 2, "skip_instructions: input=%d skip to eof\n", input.index);
+            return sched_type_t::STATUS_SKIPPED;
+        } else {
+            VPRINT(this, 2, "skip_instructions: input=%d skip out of bounds\n",
+                   input.index);
+            return sched_type_t::STATUS_REGION_INVALID;
+        }
     }
     input.in_cur_region = true;
     auto *stream = outputs_[output].stream;
diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h
index 5ca8207512f..afdb0ee1b53 100644
--- a/clients/drcachesim/scheduler/scheduler.h
+++ b/clients/drcachesim/scheduler/scheduler.h
@@ -48,6 +48,7 @@
 #include <atomic>
 #include <deque>
 #include <limits>
+#include <map>
 #include <memory>
 #include <mutex>
 #include <queue>
@@ -170,6 +171,27 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
         uint64_t stop_instruction;
     };
 
+    /**
+     * A time range in units of the microsecond timestamps in the traces..
+     */
+    struct timestamp_range_t {
+        /** Convenience constructor. */
+        timestamp_range_t(uint64_t start, uint64_t stop)
+            : start_timestamp(start)
+            , stop_timestamp(stop)
+        {
+        }
+        /**
+         * The starting time in the microsecond timstamp units in the trace.
+         */
+        uint64_t start_timestamp;
+        /**
+         * The ending time in the microsecond timstamp units in the trace.
+         * The ending time is inclusive.  0 means the end of the trace.
+         */
+        uint64_t stop_timestamp;
+    };
+
     /**
      * Specifies details about one set of input trace files from one workload,
      * each input file representing a single software thread.
@@ -311,6 +333,34 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
         /** Scheduling modifiers for the threads in this workload. */
         std::vector<input_thread_info_t> thread_modifiers;
 
+        /**
+         * If non-empty, all input records outside of these ranges are skipped.  These
+         * times cut across all inputs of this workload.  The times are converted into a
+         * separate sequence of instruction
+         * #dynamorio::drmemtrace::scheduler_tmpl_t::range_t for each input.  The end
+         * result is as though the #dynamorio::drmemtrace::scheduler_tmpl_t::
+         * input_thread_info_t.regions_of_interest
+         * field were set for each input.  See the comments by that field for further
+         * details such as the marker inserted between ranges.  Although the times cut
+         * across all inputs for determining the per-input instruction-ordinal ranges, no
+         * barrier is applied to align the resulting regions of interest: i.e., one input
+         * can finish its initial region and move to its next region before all other
+         * inputs finish their initial regions.
+         *
+         * If non-empty, the
+         * #dynamorio::drmemtrace::scheduler_tmpl_t::
+         * input_thread_info_t.regions_of_interest
+         * field must be empty for each modifier for this workload.
+         *
+         * If non-empty, the #dynamorio::drmemtrace::scheduler_tmpl_t::
+         * scheduler_options_t.replay_as_traced_istream field must also be specified to
+         * provide the timestamp-to-instruction-ordinal mappings.  These mappings are not
+         * precise due to the coarse-grained timestamps and the elision of adjacent
+         * timestamps in the istream.  Interpolation is used to estimate instruction
+         * ordinals when timestamps fall in between recorded points.
+         */
+        std::vector<timestamp_range_t> times_of_interest;
+
         // Work around a known Visual Studio issue where it complains about deleted copy
         // constructors for unique_ptr by deleting our copies and defaulting our moves.
         input_workload_t(const input_workload_t &) = delete;
@@ -539,7 +589,10 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
         /**
          * Input stream for replaying the traced schedule when #MAP_TO_RECORDED_OUTPUT is
          * specified for more than one output stream (whose count must match the number
-         * of traced cores).
+         * of traced cores).  Alternatively, if
+         * #dynamorio::drmemtrace::scheduler_tmpl_t::input_workload_t.times_of_interest
+         * is non-empty, this stream is required for obtaining the mappings between
+         * timestamps and instruction ordinals.
          */
         archive_istream_t *replay_as_traced_istream = nullptr;
         /**
@@ -1461,13 +1514,33 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
     skip_instructions(output_ordinal_t output, input_info_t &input, uint64_t skip_amount);
 
     scheduler_status_t
-    read_traced_schedule();
+    read_and_instantiate_traced_schedule();
+
+    scheduler_status_t
+    create_regions_from_times(const std::unordered_map<memref_tid_t, int> &workload_tids,
+                              input_workload_t &workload);
+
+    // Interval time-to-instr-ord tree lookup with interpolation.
+    bool
+    time_tree_lookup(const std::map<uint64_t, uint64_t> &tree, uint64_t time,
+                     uint64_t &ordinal);
+
+    // Reads from the as-traced schedule file into the passed-in structures, after
+    // fixing up zero-instruction sequences and working around i#6107.
+    scheduler_status_t
+    read_traced_schedule(std::vector<std::vector<schedule_input_tracker_t>> &input_sched,
+                         std::vector<std::set<uint64_t>> &start2stop,
+                         std::vector<std::vector<schedule_output_tracker_t>> &all_sched,
+                         std::vector<output_ordinal_t> &disk_ord2index,
+                         std::vector<uint64_t> &disk_ord2cpuid);
 
+    // Helper for read_traced_schedule().
     scheduler_status_t
     remove_zero_instruction_segments(
         std::vector<std::vector<schedule_input_tracker_t>> &input_sched,
         std::vector<std::vector<schedule_output_tracker_t>> &all_sched);
 
+    // Helper for read_traced_schedule().
     scheduler_status_t
     check_and_fix_modulo_problem_in_schedule(
         std::vector<std::vector<schedule_input_tracker_t>> &input_sched,
diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp
index eeec09c073c..554715e42e4 100644
--- a/clients/drcachesim/tests/scheduler_unit_tests.cpp
+++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp
@@ -3555,6 +3555,163 @@ test_replay_as_traced_from_file(const char *testdir)
 #endif
 }
 
+static void
+test_times_of_interest()
+{
+#ifdef HAS_ZIP
+    std::cerr << "\n----------------\nTesting times of interest\n";
+
+    static constexpr int NUM_INPUTS = 3;
+    static constexpr int NUM_OUTPUTS = 1;
+    static constexpr int NUM_TIMESTAMPS = 3;
+    static constexpr int NUM_INSTRS_PER_TIMESTAMP = 3;
+    static constexpr memref_tid_t TID_BASE = 100;
+    static constexpr addr_t PC_BASE = 42;
+    static constexpr int CPU0 = 6;
+    static constexpr int CPU1 = 9;
+    std::vector<trace_entry_t> inputs[NUM_INPUTS];
+    for (int i = 0; i < NUM_INPUTS; ++i) {
+        memref_tid_t tid = TID_BASE + i;
+        inputs[i].push_back(make_thread(tid));
+        inputs[i].push_back(make_pid(1));
+        for (int j = 0; j < NUM_TIMESTAMPS; ++j) {
+            uint64_t timestamp = i == 2 ? (1 + 5 * (j + 1)) : (10 * (j + 1) + 10 * i);
+            inputs[i].push_back(make_timestamp(timestamp));
+            for (int k = 0; k < NUM_INSTRS_PER_TIMESTAMP; ++k) {
+                inputs[i].push_back(make_instr(PC_BASE + 1 /*1-based ranges*/ +
+                                               j * NUM_INSTRS_PER_TIMESTAMP + k));
+            }
+        }
+        inputs[i].push_back(make_exit(tid));
+    }
+
+    // Synthesize a cpu-schedule file.
+    std::string cpu_fname = "tmp_test_times_of_interest.zip";
+    {
+        std::vector<schedule_entry_t> sched0;
+        std::vector<schedule_entry_t> sched1;
+        // We do not bother to interleave to make it easier to see the sequence
+        // in this test.
+        // Thread A.
+        sched0.emplace_back(TID_BASE + 0, 10, CPU0, 0);
+        sched0.emplace_back(TID_BASE + 0, 20, CPU0, 4);
+        sched0.emplace_back(TID_BASE + 0, 30, CPU0, 7);
+        // Thread B.
+        sched0.emplace_back(TID_BASE + 1, 20, CPU0, 0);
+        sched0.emplace_back(TID_BASE + 1, 30, CPU0, 4);
+        sched0.emplace_back(TID_BASE + 1, 40, CPU0, 7);
+        // Thread C.
+        sched1.emplace_back(TID_BASE + 2, 6, CPU1, 0);
+        sched1.emplace_back(TID_BASE + 2, 11, CPU1, 4);
+        sched1.emplace_back(TID_BASE + 2, 16, CPU1, 7);
+        std::ostringstream cpu0_string;
+        cpu0_string << CPU0;
+        std::ostringstream cpu1_string;
+        cpu1_string << CPU1;
+        zipfile_ostream_t outfile(cpu_fname);
+        std::string err = outfile.open_new_component(cpu0_string.str());
+        assert(err.empty());
+        if (!outfile.write(reinterpret_cast<char *>(sched0.data()),
+                           sched0.size() * sizeof(sched0[0])))
+            assert(false);
+        err = outfile.open_new_component(cpu1_string.str());
+        assert(err.empty());
+        if (!outfile.write(reinterpret_cast<char *>(sched1.data()),
+                           sched1.size() * sizeof(sched1[0])))
+            assert(false);
+    }
+
+    {
+        // Test an erroneous range request with no gap.
+        std::vector<scheduler_t::input_workload_t> sched_inputs;
+        std::vector<scheduler_t::input_reader_t> readers;
+        for (int i = 0; i < NUM_INPUTS; i++) {
+            memref_tid_t tid = TID_BASE + i;
+            readers.emplace_back(
+                std::unique_ptr<mock_reader_t>(new mock_reader_t(inputs[i])),
+                std::unique_ptr<mock_reader_t>(new mock_reader_t()), tid);
+        }
+        sched_inputs.emplace_back(std::move(readers));
+        // Pick times that have adjacent corresponding instructions: 30 and 32
+        // have a time gap but no instruction gap.
+        sched_inputs.back().times_of_interest = { { 25, 30 }, { 32, 33 } };
+        scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_ANY_OUTPUT,
+                                                   scheduler_t::DEPENDENCY_TIMESTAMPS,
+                                                   scheduler_t::SCHEDULER_DEFAULTS,
+                                                   /*verbosity=*/3);
+        zipfile_istream_t infile(cpu_fname);
+        sched_ops.replay_as_traced_istream = &infile;
+        scheduler_t scheduler;
+        assert(scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) ==
+               scheduler_t::STATUS_ERROR_INVALID_PARAMETER);
+    }
+    {
+        // Test a valid range request.
+        std::vector<scheduler_t::input_workload_t> sched_inputs;
+        std::vector<scheduler_t::input_reader_t> readers;
+        for (int i = 0; i < NUM_INPUTS; i++) {
+            memref_tid_t tid = TID_BASE + i;
+            readers.emplace_back(
+                std::unique_ptr<mock_reader_t>(new mock_reader_t(inputs[i])),
+                std::unique_ptr<mock_reader_t>(new mock_reader_t()), tid);
+        }
+        sched_inputs.emplace_back(std::move(readers));
+        sched_inputs.back().times_of_interest = { { 25, 30 }, { 38, 39 } };
+        scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_ANY_OUTPUT,
+                                                   scheduler_t::DEPENDENCY_TIMESTAMPS,
+                                                   scheduler_t::SCHEDULER_DEFAULTS,
+                                                   /*verbosity=*/3);
+        zipfile_istream_t infile(cpu_fname);
+        sched_ops.replay_as_traced_istream = &infile;
+        scheduler_t scheduler;
+        if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) !=
+            scheduler_t::STATUS_SUCCESS) {
+            std::cerr << scheduler.get_error_string() << "\n";
+            assert(false);
+        }
+        auto *stream0 = scheduler.get_stream(0);
+        auto check_next = [](scheduler_t::stream_t *stream, memref_tid_t expect_tid,
+                             trace_type_t expect_type, addr_t expect_addr = 0) {
+            memref_t record;
+            scheduler_t::stream_status_t status = stream->next_record(record);
+            assert(status == scheduler_t::STATUS_OK);
+            assert(record.instr.tid == expect_tid);
+            if (record.instr.type != expect_type) {
+                std::cerr << "Expected type " << expect_type
+                          << " != " << record.instr.type << "\n";
+                assert(false);
+            }
+            if (expect_addr != 0 && record.instr.addr != expect_addr) {
+                std::cerr << "Expected addr " << expect_addr
+                          << " != " << record.instr.addr << "\n";
+                assert(false);
+            }
+        };
+        // Range is 5 until the end.
+        check_next(stream0, TID_BASE + 0, TRACE_TYPE_INSTR, PC_BASE + 5);
+        check_next(stream0, TID_BASE + 0, TRACE_TYPE_INSTR, PC_BASE + 6);
+        check_next(stream0, TID_BASE + 0, TRACE_TYPE_MARKER);
+        check_next(stream0, TID_BASE + 0, TRACE_TYPE_INSTR, PC_BASE + 7);
+        check_next(stream0, TID_BASE + 0, TRACE_TYPE_INSTR, PC_BASE + 8);
+        check_next(stream0, TID_BASE + 0, TRACE_TYPE_INSTR, PC_BASE + 9);
+        check_next(stream0, TID_BASE + 0, TRACE_TYPE_THREAD_EXIT);
+        // Two ranges: 2-4 and 6-7.
+        check_next(stream0, TID_BASE + 1, TRACE_TYPE_INSTR, PC_BASE + 2);
+        check_next(stream0, TID_BASE + 1, TRACE_TYPE_INSTR, PC_BASE + 3);
+        check_next(stream0, TID_BASE + 1, TRACE_TYPE_MARKER);
+        check_next(stream0, TID_BASE + 1, TRACE_TYPE_INSTR, PC_BASE + 4);
+        // Window id marker in between.
+        check_next(stream0, TID_BASE + 1, TRACE_TYPE_MARKER);
+        check_next(stream0, TID_BASE + 1, TRACE_TYPE_INSTR, PC_BASE + 6);
+        check_next(stream0, TID_BASE + 1, TRACE_TYPE_MARKER);
+        check_next(stream0, TID_BASE + 1, TRACE_TYPE_INSTR, PC_BASE + 7);
+        check_next(stream0, TID_BASE + 1, TRACE_TYPE_THREAD_EXIT);
+        memref_t record;
+        assert(stream0->next_record(record) == scheduler_t::STATUS_EOF);
+    }
+#endif
+}
+
 static void
 test_inactive()
 {
@@ -4773,6 +4930,7 @@ test_main(int argc, const char *argv[])
     test_replay_as_traced_i6107_workaround();
     test_replay_as_traced_dup_start();
     test_replay_as_traced_sort();
+    test_times_of_interest();
     test_inactive();
     test_direct_switch();
     test_unscheduled();
diff --git a/clients/drcachesim/tests/skip_to_timestamp.templatex b/clients/drcachesim/tests/skip_to_timestamp.templatex
new file mode 100644
index 00000000000..f0f2f3d9b59
--- /dev/null
+++ b/clients/drcachesim/tests/skip_to_timestamp.templatex
@@ -0,0 +1,47 @@
+Basic counts tool results:
+Total counts:
+       46166 total \(fetched\) instructions
+        4327 total unique \(fetched\) instructions
+       46166 total userspace instructions
+           0 total kernel instructions
+       27090 total non-fetched instructions
+       27090 total non-fetched userspace instructions
+           0 total non-fetched kernel instructions
+           0 total prefetches
+       37530 total data loads
+       38316 total data stores
+           0 total icache flushes
+           0 total dcache flushes
+           2 total threads
+         514 total timestamp \+ cpuid markers
+.*
+Thread 1257602 counts:
+       30351 \(fetched\) instructions
+         309 unique \(fetched\) instructions
+       30351 userspace instructions
+           0 kernel instructions
+       27090 non-fetched instructions
+       27090 non-fetched userspace instructions
+           0 non-fetched kernel instructions
+           0 prefetches
+       33202 data loads
+       35737 data stores
+           0 icache flushes
+           0 dcache flushes
+          52 timestamp \+ cpuid markers
+.*
+Thread 1257596 counts:
+       15815 \(fetched\) instructions
+        4074 unique \(fetched\) instructions
+       15815 userspace instructions
+           0 kernel instructions
+           0 non-fetched instructions
+           0 non-fetched userspace instructions
+           0 non-fetched kernel instructions
+           0 prefetches
+        4328 data loads
+        2579 data stores
+           0 icache flushes
+           0 dcache flushes
+         462 timestamp \+ cpuid markers
+.*
diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt
index b5f2e730d33..37e32fe61ef 100644
--- a/suite/tests/CMakeLists.txt
+++ b/suite/tests/CMakeLists.txt
@@ -3870,6 +3870,13 @@ if (BUILD_CLIENTS)
           "")
         set(tool.simulate_as_traced_rawtemp ON) # no preprocessor
 
+        torunonly_simtool(skip_to_timestamp ${ci_shared_app}
+          # We pick a timestamp far enough in that only 2 threads remain
+          # at that point, testing thread exclusion.
+          "-indir ${thread_trace_dir} -tool basic_counts -skip_to_timestamp 13331862029905366 -cpu_schedule_file ${cpu_sched_path}"
+          "")
+        set(tool.skip_to_timestamp_rawtemp ON) # no preprocessor
+
         set(switch_file
           "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests/mock_switch_sequences.x64.zip")
         torunonly_simtool(switch_insertion ${ci_shared_app}