diff --git a/api/docs/debug_memtrace.dox b/api/docs/debug_memtrace.dox
index 9da0ad59ac4..9c893d1d55b 100644
--- a/api/docs/debug_memtrace.dox
+++ b/api/docs/debug_memtrace.dox
@@ -1,5 +1,5 @@
 /* ******************************************************************************
- * Copyright (c) 2010-2022 Google, Inc.  All rights reserved.
+ * Copyright (c) 2010-2023 Google, Inc.  All rights reserved.
  * ******************************************************************************/
 
 /*
@@ -100,7 +100,7 @@ Type cheat sheet (from trace_type_t enum):
 - 0x19 header
 - 0x16 thread
 - 0x18 pid
-- 0x1c marker: 2=timestamp; 3=cpuid
+- 0x1c marker: 2=timestamp; 3=cpuid; 0xc=version; 9=filetype
 - 0x0a instr (non-cti)
 - 0x0e direct call
 - 0x00 load
diff --git a/api/docs/release.dox b/api/docs/release.dox
index f9f7d5582fe..bbfce0afafc 100644
--- a/api/docs/release.dox
+++ b/api/docs/release.dox
@@ -149,6 +149,9 @@ changes:
  - Removed the drcachesim external iterator analyzer interface.  Users should instead
    use the new #dynamorio::drmemtrace::scheduler_tmpl_t interface for direct control
    over iteration.  See \ref sec_drcachesim_sched for example code.
+ - Refactored the drmemtrace reader and file reader classes to better fit the
+   new scheduler model: now each reader owns just one single stream of records
+   with all multi-stream interleaving owned by the scheduler.
 
 Further non-compatibility-affecting changes include:
  - Added AArchXX support for attaching to a running process.
diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt
index d1b7fe27dfa..7a50fef6511 100644
--- a/clients/drcachesim/CMakeLists.txt
+++ b/clients/drcachesim/CMakeLists.txt
@@ -715,7 +715,8 @@ if (BUILD_TESTS)
     add_executable(tool.drcacheoff.view_test tests/view_test.cpp reader/file_reader.cpp)
     configure_DynamoRIO_standalone(tool.drcacheoff.view_test)
     add_win32_flags(tool.drcacheoff.view_test)
-    target_link_libraries(tool.drcacheoff.view_test drmemtrace_view drmemtrace_raw2trace)
+    target_link_libraries(tool.drcacheoff.view_test drmemtrace_view drmemtrace_raw2trace
+      drmemtrace_analyzer)
     use_DynamoRIO_extension(tool.drcacheoff.view_test drreg_static)
     use_DynamoRIO_extension(tool.drcacheoff.view_test drcovlib_static)
     use_DynamoRIO_extension(tool.drcacheoff.view_test drdecode)
diff --git a/clients/drcachesim/reader/compressed_file_reader.cpp b/clients/drcachesim/reader/compressed_file_reader.cpp
index bfbaa78cdd8..6113c266476 100644
--- a/clients/drcachesim/reader/compressed_file_reader.cpp
+++ b/clients/drcachesim/reader/compressed_file_reader.cpp
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2017-2022 Google, Inc.  All rights reserved.
+ * Copyright (c) 2017-2023 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -46,25 +46,24 @@ open_single_file_common(const std::string &path, gzFile &out)
     return out != nullptr;
 }
 
-bool
-read_next_thread_entry_common(gzip_reader_t *gzip, OUT trace_entry_t *entry,
-                              OUT bool *eof)
+trace_entry_t *
+read_next_entry_common(gzip_reader_t *gzip, bool *eof)
 {
     if (gzip->cur_buf >= gzip->max_buf) {
         int len = gzread(gzip->file, gzip->buf, sizeof(gzip->buf));
         // Returns less than asked-for for end of file, or –1 for error.
         // We should always get a multiple of the record size.
-        if (len < static_cast<int>(sizeof(*entry)) ||
-            len % static_cast<int>(sizeof(*entry)) != 0) {
+        if (len < static_cast<int>(sizeof(trace_entry_t)) ||
+            len % static_cast<int>(sizeof(trace_entry_t)) != 0) {
             *eof = (len >= 0);
-            return false;
+            return nullptr;
         }
         gzip->cur_buf = gzip->buf;
         gzip->max_buf = gzip->buf + (len / sizeof(*gzip->max_buf));
     }
-    *entry = *gzip->cur_buf;
+    trace_entry_t *res = gzip->cur_buf;
     ++gzip->cur_buf;
-    return true;
+    return res;
 }
 
 } // namespace
@@ -78,9 +77,8 @@ template <>
 /* clang-format on */
 file_reader_t<gzip_reader_t>::~file_reader_t<gzip_reader_t>()
 {
-    for (auto &gzip : input_files_)
-        gzclose(gzip.file);
-    delete[] thread_eof_;
+    if (input_file_.file != nullptr)
+        gzclose(input_file_.file);
 }
 
 template <>
@@ -91,34 +89,24 @@ file_reader_t<gzip_reader_t>::open_single_file(const std::string &path)
     if (!open_single_file_common(path, file))
         return false;
     VPRINT(this, 1, "Opened input file %s\n", path.c_str());
-    input_files_.emplace_back(file);
+    input_file_ = gzip_reader_t(file);
     return true;
 }
 
 template <>
-bool
-file_reader_t<gzip_reader_t>::read_next_thread_entry(size_t thread_index,
-                                                     OUT trace_entry_t *entry,
-                                                     OUT bool *eof)
+trace_entry_t *
+file_reader_t<gzip_reader_t>::read_next_entry()
 {
-    if (!read_next_thread_entry_common(&input_files_[thread_index], entry, eof))
-        return false;
-    VPRINT(this, 4, "Read from thread #%zd file: type=%s (%d), size=%d, addr=%zu\n",
-           thread_index, trace_type_names[entry->type], entry->type, entry->size,
-           entry->addr);
-    return true;
-}
-
-template <>
-bool
-file_reader_t<gzip_reader_t>::is_complete()
-{
-    // The gzip reading interface does not support seeking to SEEK_END so there
-    // is no efficient way to read the footer.
-    // We could have the trace file writer seek back and set a bit at the start.
-    // Currently we are forced to not use this function.
-    // XXX: Should we just remove this interface, then?
-    return false;
+    trace_entry_t *entry = read_queue();
+    if (entry != nullptr)
+        return entry;
+    entry = read_next_entry_common(&input_file_, &at_eof_);
+    if (entry == nullptr)
+        return entry;
+    VPRINT(this, 4, "Read from file: type=%s (%d), size=%d, addr=%zu\n",
+           trace_type_names[entry->type], entry->type, entry->size, entry->addr);
+    entry_copy_ = *entry;
+    return &entry_copy_;
 }
 
 namespace dynamorio {
@@ -154,8 +142,10 @@ template <>
 bool
 record_file_reader_t<gzip_reader_t>::read_next_entry()
 {
-    if (!read_next_thread_entry_common(input_file_.get(), &cur_entry_, &eof_))
+    trace_entry_t *entry = read_next_entry_common(input_file_.get(), &eof_);
+    if (entry == nullptr)
         return false;
+    cur_entry_ = *entry;
     VPRINT(this, 4, "Read from file: type=%s (%d), size=%d, addr=%zu\n",
            trace_type_names[cur_entry_.type], cur_entry_.type, cur_entry_.size,
            cur_entry_.addr);
diff --git a/clients/drcachesim/reader/compressed_file_reader.h b/clients/drcachesim/reader/compressed_file_reader.h
index b54984371ab..19095bb4a66 100644
--- a/clients/drcachesim/reader/compressed_file_reader.h
+++ b/clients/drcachesim/reader/compressed_file_reader.h
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2017-2022 Google, Inc.  All rights reserved.
+ * Copyright (c) 2017-2023 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -40,6 +40,8 @@
 #include "record_file_reader.h"
 
 struct gzip_reader_t {
+    gzip_reader_t()
+        : file(nullptr) {};
     explicit gzip_reader_t(gzFile file)
         : file(file)
     {
diff --git a/clients/drcachesim/reader/file_reader.cpp b/clients/drcachesim/reader/file_reader.cpp
index 940de8206d7..a305780b085 100644
--- a/clients/drcachesim/reader/file_reader.cpp
+++ b/clients/drcachesim/reader/file_reader.cpp
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2016-2022 Google, Inc.  All rights reserved.
+ * Copyright (c) 2016-2023 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -38,75 +38,33 @@ template <>
 /* clang-format on */
 file_reader_t<std::ifstream *>::~file_reader_t()
 {
-    for (auto fstream : input_files_) {
-        delete fstream;
-    }
-    delete[] thread_eof_;
+    delete input_file_;
 }
 
 template <>
 bool
 file_reader_t<std::ifstream *>::open_single_file(const std::string &path)
 {
-    auto fstream = new std::ifstream(path, std::ifstream::binary);
-    if (!*fstream)
+    input_file_ = new std::ifstream(path, std::ifstream::binary);
+    if (!*input_file_)
         return false;
     VPRINT(this, 1, "Opened input file %s\n", path.c_str());
-    input_files_.push_back(fstream);
-    return true;
-}
-
-template <>
-bool
-file_reader_t<std::ifstream *>::read_next_thread_entry(size_t thread_index,
-                                                       OUT trace_entry_t *entry,
-                                                       OUT bool *eof)
-{
-    if (!input_files_[thread_index]->read((char *)entry, sizeof(*entry))) {
-        *eof = input_files_[thread_index]->eof();
-        return false;
-    }
-    VPRINT(this, 4, "Read from thread #%zd file: type=%s (%d), size=%d, addr=%zu\n",
-           thread_index, trace_type_names[entry->type], entry->type, entry->size,
-           entry->addr);
     return true;
 }
 
 template <>
-bool
-file_reader_t<std::ifstream *>::is_complete()
+trace_entry_t *
+file_reader_t<std::ifstream *>::read_next_entry()
 {
-    // FIXME i#3230: this code is in the middle of refactoring for split thread files.
-    // We support the is_complete() call from analyzer_multi for a single file for now,
-    // but may have to abandon this function altogether since gzFile doesn't support it.
-    bool opened_temporarily = false;
-    if (input_files_.empty()) {
-        // Supporting analyzer_multi calling before init() for a single legacy file.
-        opened_temporarily = true;
-        if (!input_path_list_.empty() || input_path_.empty() ||
-            directory_iterator_t::is_directory(input_path_))
-            return false; // Not supported.
-        if (!open_single_file(input_path_))
-            return false;
-    }
-    bool res = false;
-    for (auto fstream : input_files_) {
-        res = false;
-        std::streampos pos = fstream->tellg();
-        fstream->seekg(-(int)sizeof(trace_entry_t), fstream->end);
-        // Avoid reaching eof b/c we can't seek away from it.
-        if (fstream->read((char *)&entry_copy_.type, sizeof(entry_copy_.type)) &&
-            entry_copy_.type == TRACE_TYPE_FOOTER)
-            res = true;
-        fstream->seekg(pos);
-        if (!res)
-            break;
-    }
-    if (opened_temporarily) {
-        // Put things back for init().
-        for (auto fstream : input_files_)
-            delete fstream;
-        input_files_.clear();
+    trace_entry_t *from_queue = read_queue();
+    if (from_queue != nullptr)
+        return from_queue;
+    if (!input_file_->read((char *)&entry_copy_, sizeof(entry_copy_))) {
+        at_eof_ = input_file_->eof();
+        return nullptr;
     }
-    return res;
+    VPRINT(this, 4, "Read from file: type=%s (%d), size=%d, addr=%zu\n",
+           trace_type_names[entry_copy_.type], entry_copy_.type, entry_copy_.size,
+           entry_copy_.addr);
+    return &entry_copy_;
 }
diff --git a/clients/drcachesim/reader/file_reader.h b/clients/drcachesim/reader/file_reader.h
index 65325092310..e1c7d8ba729 100644
--- a/clients/drcachesim/reader/file_reader.h
+++ b/clients/drcachesim/reader/file_reader.h
@@ -78,26 +78,17 @@ template <typename T> class file_reader_t : public reader_t {
     {
         online_ = false;
     }
-    explicit file_reader_t(const std::vector<std::string> &path_list, int verbosity = 0)
-        : reader_t(verbosity, "[file_reader]")
-        , input_path_list_(path_list)
-    {
-        online_ = false;
-    }
     virtual ~file_reader_t();
     bool
     init() override
     {
         at_eof_ = false;
-        if (!open_input_files())
+        if (!open_input_file())
             return false;
         ++*this;
         return true;
     }
 
-    virtual bool
-    is_complete();
-
     std::string
     get_stream_name() const override
     {
@@ -107,287 +98,83 @@ template <typename T> class file_reader_t : public reader_t {
         return input_path_.substr(ind + 1);
     }
 
-    reader_t &
-    skip_instructions(uint64_t instruction_count) override
-    {
-        if (instruction_count == 0)
-            return *this;
-        if (input_files_.size() > 1) {
-            // TODO i#5538: For fast thread-interleaved (whether serial here or the
-            // forthcoming per-cpu iteration) we need to read in the schedule file(s)
-            // that raw2trace writes out so that we can compute how far to separately
-            // fast-skip in each interleaved thread by calling the per-thread version.
-            // We'll also need to update the memref pid+tid state since we're not
-            // repeating top headers in every thread after a skip.  For now this is a
-            // slow linear walk.
-            return reader_t::skip_instructions(instruction_count);
-        }
-        // If the user asks to skip from the very start, we still need to find the chunk
-        // count marker and drain the header queue and populate the stream header values.
-        // XXX: We assume the page size is the final header; it is complex to wait for
-        // the timestamp as we don't want to read it yet.
-        while (page_size_ == 0) {
-            input_entry_ = read_next_entry();
-            process_input_entry();
-        }
-        if (!queues_[0].empty())
-            ERRMSG("Failed to drain header queue\n");
-        bool eof = false;
-        if (!skip_thread_instructions(0, instruction_count, &eof) || eof)
-            at_eof_ = true;
-        return *this;
-    }
-
 protected:
-    bool
-    read_next_thread_entry(size_t thread_index, OUT trace_entry_t *entry,
-                           OUT bool *eof) override;
+    trace_entry_t *
+    read_next_entry() override;
 
     virtual bool
     open_single_file(const std::string &path);
 
     virtual bool
-    open_input_files()
+    open_input_file()
     {
-        if (!input_path_list_.empty()) {
-            for (const std::string &path : input_path_list_) {
-                if (!open_single_file(path)) {
-                    ERRMSG("Failed to open %s\n", path.c_str());
-                    return false;
-                }
-            }
-        } else if (directory_iterator_t::is_directory(input_path_)) {
-            VPRINT(this, 1, "Iterating directory %s\n", input_path_.c_str());
-            directory_iterator_t end;
-            directory_iterator_t iter(input_path_);
-            if (!iter) {
-                ERRMSG("Failed to list directory %s: %s", input_path_.c_str(),
-                       iter.error_string().c_str());
-                return false;
-            }
-            for (; iter != end; ++iter) {
-                std::string fname = *iter;
-                if (fname == "." || fname == ".." ||
-                    starts_with(fname, DRMEMTRACE_SERIAL_SCHEDULE_FILENAME) ||
-                    fname == DRMEMTRACE_CPU_SCHEDULE_FILENAME)
-                    continue;
-                // Skip the auxiliary files.
-                if (fname == DRMEMTRACE_MODULE_LIST_FILENAME ||
-                    fname == DRMEMTRACE_FUNCTION_LIST_FILENAME ||
-                    fname == DRMEMTRACE_ENCODING_FILENAME)
-                    continue;
-                VPRINT(this, 2, "Found file %s\n", fname.c_str());
-                if (!open_single_file(input_path_ + DIRSEP + fname)) {
-                    ERRMSG("Failed to open %s\n", fname.c_str());
-                    return false;
-                }
-            }
-        } else {
-            if (!open_single_file(input_path_)) {
-                ERRMSG("Failed to open %s\n", input_path_.c_str());
-                return false;
-            }
-        }
-        if (input_files_.empty()) {
-            ERRMSG("No thread files found.");
+        if (!open_single_file(input_path_)) {
+            ERRMSG("Failed to open %s\n", input_path_.c_str());
             return false;
         }
 
-        thread_count_ = input_files_.size();
-        queues_.resize(input_files_.size());
-        tids_.resize(input_files_.size());
-        timestamps_.resize(input_files_.size());
-        times_.resize(input_files_.size(), 0);
-        // We can't take the address of a vector<bool> element so we use a raw array.
-        thread_eof_ = new bool[input_files_.size()];
-        memset(thread_eof_, 0, input_files_.size() * sizeof(*thread_eof_));
-
-        // First read the tid and pid entries which precede any timestamps_.
+        // First read the tid and pid entries which precede any timestamps.
         // We hand out the tid to the output on every thread switch, and the pid
         // the very first time for the thread.
-        trace_entry_t header, next, pid = {};
-        for (index_ = 0; index_ < input_files_.size(); ++index_) {
-            if (!read_next_thread_entry(index_, &header, &thread_eof_[index_]) ||
-                header.type != TRACE_TYPE_HEADER) {
-                ERRMSG("Invalid header for input file #%zu\n", index_);
-                return false;
-            }
-            // We can handle the older version 1 as well which simply omits the
-            // early marker with the arch tag, and version 2 which only differs wrt
-            // TRACE_MARKER_TYPE_KERNEL_EVENT..
-            if (header.addr > TRACE_ENTRY_VERSION) {
-                ERRMSG(
-                    "Cannot handle version #%zu (expect version <= #%u) for input file "
-                    "#%zu\n",
-                    header.addr, TRACE_ENTRY_VERSION, index_);
+        trace_entry_t *entry;
+        trace_entry_t header = {}, pid = {}, tid = {};
+        entry = read_next_entry();
+        if (entry == nullptr || entry->type != TRACE_TYPE_HEADER) {
+            ERRMSG("Invalid header\n");
+            return false;
+        }
+        header = *entry;
+        // We can handle the older version 1 as well which simply omits the
+        // early marker with the arch tag, and version 2 which only differs wrt
+        // TRACE_MARKER_TYPE_KERNEL_EVENT..
+        if (entry->addr > TRACE_ENTRY_VERSION) {
+            ERRMSG("Cannot handle version #%zu (expect version <= #%u)\n", entry->addr,
+                   TRACE_ENTRY_VERSION);
+            return false;
+        }
+        // Read the meta entries until we hit the pid.
+        // We want to pass the tid+pid to the reader *before* any markers,
+        // even though 2 markers preced the tid+pid in the file.
+        std::queue<trace_entry_t> marker_queue;
+        while ((entry = read_next_entry()) != nullptr) {
+            if (entry->type == TRACE_TYPE_PID) {
+                // We assume the pid entry is the last, right before the timestamp.
+                pid = *entry;
+                break;
+            } else if (entry->type == TRACE_TYPE_THREAD)
+                tid = *entry;
+            else if (entry->type == TRACE_TYPE_MARKER)
+                marker_queue.push(*entry);
+            else {
+                ERRMSG("Unexpected trace sequence\n");
                 return false;
             }
-            // Read the meta entries until we hit the pid.
-            // We want to pass the tid+pid to the reader *before* any markers,
-            // even though 2 markers preced the tid+pid in the file.
-            std::queue<trace_entry_t> marker_queue;
-            while (read_next_thread_entry(index_, &next, &thread_eof_[index_])) {
-                if (next.type == TRACE_TYPE_PID) {
-                    // We assume the pid entry is the last, right before the timestamp.
-                    pid = next;
-                    break;
-                } else if (next.type == TRACE_TYPE_THREAD)
-                    tids_[index_] = next;
-                else if (next.type == TRACE_TYPE_MARKER)
-                    marker_queue.push(next);
-                else {
-                    ERRMSG("Unexpected trace sequence for input file #%zu\n", index_);
-                    return false;
-                }
-            }
-            VPRINT(this, 2, "Read thread #%zd header: ver=%zu, pid=%zu, tid=%zu\n",
-                   index_, header.addr, pid.addr, tids_[index_].addr);
-            // The reader expects us to own the header and pass the tid as
-            // the first entry.
-            queues_[index_].push(tids_[index_]);
-            queues_[index_].push(pid);
-            while (!marker_queue.empty()) {
-                queues_[index_].push(marker_queue.front());
-                marker_queue.pop();
-            }
         }
-        index_ = input_files_.size();
-
-        return true;
-    }
-
-    trace_entry_t *
-    read_next_entry() override
-    {
-        // We read the thread files simultaneously in lockstep and merge them into
-        // a single interleaved stream in timestamp order.
-        // When a thread file runs out we leave its times_[] entry as 0 and its file at
-        // eof.
-        while (thread_count_ > 0) {
-            if (index_ >= input_files_.size()) {
-                // Pick the next thread by looking for the smallest timestamp.
-                uint64_t min_time = 0xffffffffffffffff;
-                size_t next_index = 0;
-                for (size_t i = 0; i < times_.size(); ++i) {
-                    if (times_[i] == 0 && !thread_eof_[i]) {
-                        if (!read_next_thread_entry(i, &timestamps_[i],
-                                                    &thread_eof_[i])) {
-                            ERRMSG("Failed to read from input file #%zu\n", i);
-                            return nullptr;
-                        }
-                        if (timestamps_[i].type != TRACE_TYPE_MARKER &&
-                            timestamps_[i].size != TRACE_MARKER_TYPE_TIMESTAMP) {
-                            ERRMSG("Missing timestamp entry in input file #%zu\n", i);
-                            return nullptr;
-                        }
-                        times_[i] = timestamps_[i].addr;
-                        VPRINT(this, 3,
-                               "Thread #%zu timestamp is @0x" ZHEX64_FORMAT_STRING "\n",
-                               i, times_[i]);
-                    }
-                    if (times_[i] != 0 && times_[i] < min_time) {
-                        min_time = times_[i];
-                        next_index = i;
-                    }
-                }
-                VPRINT(this, 2,
-                       "Next thread in timestamp order is #%zu @0x" ZHEX64_FORMAT_STRING
-                       "\n",
-                       next_index, times_[next_index]);
-                index_ = next_index;
-                times_[index_] = 0; // Read from file for this thread's next timestamp.
-                // If the queue is not empty, it should contain the initial tid;pid.
-                if ((queues_[index_].empty() ||
-                     queues_[index_].front().type != TRACE_TYPE_THREAD) &&
-                    // For a single thread (or already-interleaved file) we do not need
-                    // thread entries before each timestamp.
-                    input_files_.size() > 1)
-                    queues_[index_].push(tids_[index_]);
-                queues_[index_].push(timestamps_[index_]);
-            }
-            if (!queues_[index_].empty()) {
-                entry_copy_ = queues_[index_].front();
-                queues_[index_].pop();
-                return &entry_copy_;
-            }
-            VPRINT(this, 4, "About to read thread #%zu\n", index_);
-            if (!read_next_thread_entry(index_, &entry_copy_, &thread_eof_[index_])) {
-                if (thread_eof_[index_]) {
-                    VPRINT(this, 2, "Thread #%zu at eof\n", index_);
-                    --thread_count_;
-                    if (thread_count_ == 0) {
-                        VPRINT(this, 2, "All threads at eof\n");
-                        at_eof_ = true;
-                        break;
-                    }
-                    times_[index_] = 0;
-                    index_ = input_files_.size(); // Request thread scan.
-                    continue;
-                } else {
-                    ERRMSG("Failed to read from input file #%zu\n", index_);
-                    return nullptr;
-                }
-            }
-            if (entry_copy_.type == TRACE_TYPE_MARKER &&
-                entry_copy_.size == TRACE_MARKER_TYPE_TIMESTAMP) {
-                VPRINT(this, 3, "Thread #%zu timestamp 0x" ZHEX64_FORMAT_STRING "\n",
-                       index_, (uint64_t)entry_copy_.addr);
-                times_[index_] = entry_copy_.addr;
-                timestamps_[index_] = entry_copy_;
-                index_ = input_files_.size(); // Request thread scan.
-                continue;
-            }
-            return &entry_copy_;
+        VPRINT(this, 2, "Read header: ver=%zu, pid=%zu, tid=%zu\n", header.addr, pid.addr,
+               tid.addr);
+        // The reader expects us to own the header and pass the tid as
+        // the first entry.
+        queue_.push(tid);
+        queue_.push(pid);
+        while (!marker_queue.empty()) {
+            queue_.push(marker_queue.front());
+            marker_queue.pop();
         }
-        return nullptr;
+        return true;
     }
 
-    virtual bool
-    skip_thread_instructions(size_t thread_index, uint64_t instruction_count,
-                             OUT bool *eof)
+    // Provided so that instantiations can specialize.
+    reader_t &
+    skip_instructions(uint64_t instruction_count) override
     {
-        // TODO i#5538,i#5843: Once we move interleaving code from file_reader_t
-        // into scheduler_t, remove these _thread variants: read_next_thread_entry()
-        // and skip_thread_instructions().  Use reader_t::skip_instructions() here
-        // (don't override).
-
-        // Default implementation for file types that have no fast seeking and must do a
-        // linear walk.
-        // FIXME i#5538,i#5843: This is broken as it goes too far: see how
-        // reader_t::skip_instructions() does it.  We'll fix when we refactor (see
-        // comment at top of function).
-        uint64_t stop_count_ = cur_instr_count_ + instruction_count + 1;
-        while (cur_instr_count_ < stop_count_) {
-            if (!read_next_thread_entry(thread_index, &entry_copy_, eof))
-                return false;
-            // Update core state.
-            input_entry_ = &entry_copy_;
-            process_input_entry();
-            // TODO i#5538: Remember the last timestamp+cpu and insert it; share
-            // code with the zipfile reader.
-        }
-        return true;
+        return reader_t::skip_instructions(instruction_count);
     }
 
     // Protected for access by mock_file_reader_t.
-    std::vector<T> input_files_;
+    T input_file_;
 
 private:
     std::string input_path_;
-    std::vector<std::string> input_path_list_;
-    trace_entry_t entry_copy_;
-    // The current thread we're processing is "index".  If it's set to input_files_.size()
-    // that means we need to pick a new thread.
-    size_t index_;
-    size_t thread_count_;
-    // TODO i#5843: Once we move interleaving code from file_reader_t into scheduler_t
-    // these will all become singleton as one class instance here will have just one
-    // thread.  We can merge queues_ here into the new reader_t::queue_.
-    std::vector<std::queue<trace_entry_t>> queues_;
-    std::vector<trace_entry_t> tids_;
-    std::vector<trace_entry_t> timestamps_;
-    std::vector<uint64_t> times_;
-    bool *thread_eof_ = nullptr;
 };
 
 #endif /* _FILE_READER_H_ */
diff --git a/clients/drcachesim/reader/ipc_reader.cpp b/clients/drcachesim/reader/ipc_reader.cpp
index 1635c5b2572..1d7b3a1dd5a 100644
--- a/clients/drcachesim/reader/ipc_reader.cpp
+++ b/clients/drcachesim/reader/ipc_reader.cpp
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2015-2022 Google, Inc.  All rights reserved.
+ * Copyright (c) 2015-2023 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -92,6 +92,9 @@ ipc_reader_t::~ipc_reader_t()
 trace_entry_t *
 ipc_reader_t::read_next_entry()
 {
+    trace_entry_t *from_queue = read_queue();
+    if (from_queue != nullptr)
+        return from_queue;
     ++cur_buf_;
     if (cur_buf_ >= end_buf_) {
         ssize_t sz = pipe_.read(buf_, sizeof(buf_)); // blocking read
diff --git a/clients/drcachesim/reader/ipc_reader.h b/clients/drcachesim/reader/ipc_reader.h
index 8ac19feaaa5..447708cde7b 100644
--- a/clients/drcachesim/reader/ipc_reader.h
+++ b/clients/drcachesim/reader/ipc_reader.h
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2015-2020 Google, Inc.  All rights reserved.
+ * Copyright (c) 2015-2023 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -60,13 +60,6 @@ class ipc_reader_t : public reader_t {
     trace_entry_t *
     read_next_entry() override;
 
-    bool
-    read_next_thread_entry(size_t, trace_entry_t *, bool *) override
-    {
-        // Only an interleaved stream is supported.
-        return false;
-    }
-
 private:
     named_pipe_t pipe_;
     bool creation_success_;
diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp
index b5ea179014f..1fc5d3051d3 100644
--- a/clients/drcachesim/reader/reader.cpp
+++ b/clients/drcachesim/reader/reader.cpp
@@ -47,14 +47,13 @@ reader_t::operator*()
 }
 
 trace_entry_t *
-reader_t::read_next_entry_or_queue()
+reader_t::read_queue()
 {
-    if (!queue_.empty()) {
-        local_entry_ = queue_.front();
-        queue_.pop();
-        return &local_entry_;
-    }
-    return read_next_entry();
+    if (queue_.empty())
+        return nullptr;
+    entry_copy_ = queue_.front();
+    queue_.pop();
+    return &entry_copy_;
 }
 
 reader_t &
@@ -63,7 +62,7 @@ reader_t::operator++()
     // We bail if we get a partial read, or EOF, or any error.
     while (true) {
         if (bundle_idx_ == 0 /*not in instr bundle*/)
-            input_entry_ = read_next_entry_or_queue();
+            input_entry_ = read_next_entry();
         if (input_entry_ == NULL) {
             if (!at_eof_) {
                 ERRMSG("Trace is truncated\n");
@@ -337,6 +336,25 @@ reader_t::use_prev(trace_entry_t *prev)
     input_entry_ = prev;
 }
 
+void
+reader_t::pre_skip_instructions()
+{
+    // If the user asks to skip from the very start, we still need to find the chunk
+    // count marker and drain the header queue and populate the stream header values.
+    // XXX: We assume the page size is the final header; it is complex to wait for
+    // the timestamp as we don't want to read it yet.
+    while (page_size_ == 0) {
+        input_entry_ = read_next_entry();
+        if (input_entry_->type != TRACE_TYPE_MARKER ||
+            input_entry_->size == TRACE_MARKER_TYPE_TIMESTAMP) {
+            // Likely some mock in a test with no page size header: just move on.
+            queue_.push(*input_entry_);
+            break;
+        }
+        process_input_entry();
+    }
+}
+
 reader_t &
 reader_t::skip_instructions(uint64_t instruction_count)
 {
@@ -349,37 +367,98 @@ reader_t::skip_instructions(uint64_t instruction_count)
         at_eof_ = true;
         return *this;
     }
+    pre_skip_instructions();
+    return skip_instructions_with_timestamp(cur_instr_count_ + instruction_count);
+}
+
+reader_t &
+reader_t::skip_instructions_with_timestamp(uint64_t stop_instruction_count)
+{
     // This base class has no fast seeking and must do a linear walk.
     // We have +1 because we need to skip the memrefs of the final skipped
     // instr, so we look for the 1st unskipped instr: but we do not want to
     // process it so we do not use the ++ operator function.
-    uint64_t stop_count = cur_instr_count_ + instruction_count + 1;
+    uint64_t stop_count = stop_instruction_count + 1;
+    trace_entry_t timestamp = {};
+    trace_entry_t cpu = {};
+    trace_entry_t next_instr = {};
+    bool prev_was_record_ord = false;
+    bool found_real_timestamp = false;
     while (cur_instr_count_ < stop_count) { // End condition is never reached.
-        // TODO i#5538: Remember the last timestamp+cpu and insert it; share
-        // code with the zipfile reader.
-        // TODO i#5538: If skipping from the start, Record all of the header values
-        // until the first timestamp and present them as new memtrace_stream_t
-        // interfaces.
         // Remember the prior entry to use as the cur entry when we hit the
-        // too-far instr.
+        // too-far instr if we didn't find a timestamp.
         if (input_entry_ != nullptr) // Only at start: and we checked for skipping 0.
-            local_entry_ = *input_entry_;
-        input_entry_ = read_next_entry_or_queue();
-        if (input_entry_ == nullptr) {
-            if (!at_eof_) {
-                ERRMSG("Trace is truncated\n");
-                assert(false);
-                at_eof_ = true;
-            }
+            entry_copy_ = *input_entry_;
+        trace_entry_t *next = read_next_entry();
+        if (next == nullptr) {
+            VPRINT(this, 1, "Failed to read next entry\n");
+            at_eof_ = true;
             return *this;
         }
+        // We need to pass up memrefs for the final skipped instr, but we don't
+        // want to process_input_entry() on the first unskipped instr so we can
+        // insert the timestamp+cpu first.
         if (cur_instr_count_ + 1 == stop_count &&
-            type_is_instr(static_cast<trace_type_t>(input_entry_->type))) {
-            queue_.push(*input_entry_);
-            use_prev(&local_entry_);
+            type_is_instr(static_cast<trace_type_t>(next->type))) {
+            next_instr = *next;
             break;
         }
+        // To examine the produced memrefs we'd have to have the base reader
+        // expose these hidden entries.  It is simpler for us to read the
+        // trace_entry_t directly prior to processing by the base class.
+        if (next->type == TRACE_TYPE_MARKER) {
+            if (next->size == TRACE_MARKER_TYPE_RECORD_ORDINAL) {
+                cur_ref_count_ = next->addr;
+                prev_was_record_ord = true;
+                VPRINT(this, 4, "Found record ordinal marker: new ord %" PRIu64 "\n",
+                       cur_ref_count_);
+            } else if (next->size == TRACE_MARKER_TYPE_TIMESTAMP) {
+                timestamp = *next;
+                if (prev_was_record_ord)
+                    --cur_ref_count_; // Invisible to ordinals.
+                else
+                    found_real_timestamp = true;
+            } else if (next->size == TRACE_MARKER_TYPE_CPU_ID) {
+                cpu = *next;
+                if (prev_was_record_ord)
+                    --cur_ref_count_; // Invisible to ordinals.
+            } else
+                prev_was_record_ord = false;
+        } else
+            prev_was_record_ord = false;
+        // Update core state.
+        input_entry_ = next;
+        process_input_entry();
+    }
+    if (timestamp.type == TRACE_TYPE_MARKER && cpu.type == TRACE_TYPE_MARKER) {
+        // Insert the two markers.
+        if (!found_real_timestamp) {
+            VPRINT(this, 4, "Using duplicate timestamp\n");
+            // These synthetic entries are not real records in the unskipped trace, so we
+            // do not associate record counts with them.
+            suppress_ref_count_ = 1;
+            if (cpu.type == TRACE_TYPE_MARKER)
+                ++suppress_ref_count_;
+        } else {
+            // These are not invisible but we already counted them in the loop above
+            // so we need to avoid a double-count.
+            VPRINT(this, 4, "Found real timestamp: walking back ord from %" PRIu64 "\n",
+                   cur_ref_count_);
+            --cur_ref_count_;
+            if (cpu.type == TRACE_TYPE_MARKER)
+                --cur_ref_count_;
+        }
+        entry_copy_ = timestamp;
+        input_entry_ = &entry_copy_;
         process_input_entry();
+        if (cpu.type == TRACE_TYPE_MARKER)
+            queue_.push(cpu);
+        queue_.push(next_instr);
+    } else {
+        // We missed the markers somehow.
+        VPRINT(this, 1, "Skip failed to find both timestamp and cpu\n");
+        queue_.push(next_instr);
+        use_prev(&entry_copy_);
     }
     return *this;
 }
diff --git a/clients/drcachesim/reader/reader.h b/clients/drcachesim/reader/reader.h
index 9eb0ce6b510..25092627ff1 100644
--- a/clients/drcachesim/reader/reader.h
+++ b/clients/drcachesim/reader/reader.h
@@ -110,18 +110,25 @@ class reader_t : public std::iterator<std::input_iterator_tag, memref_t>,
     virtual reader_t &
     operator++();
 
-    // Skips records until "count" instruction records have been passed.
-    // This will skip top-level headers for a thread; it is up to the caller
-    // to first observe those before skipping, if needed.  For interleaved-thread
-    // iteration, top-level headers in other threads will be skipped as well
-    // (but generally speaking these are identical to the initial thread).
-    // TODO i#5538: Add access to these header values from #memtrace_stream_t
-    // and document it here.
-    // TODO i#5538: Skipping from the middle will not always duplicate the
-    // last timestamp,cpu.
+    // Skips records until "count" instruction records have been passed.  If any
+    // timestamp (plus cpuid) is skipped, the most recent skipped timestamp will be
+    // duplicated prior to the target instruction.  Top-level headers will still be
+    // observed.  This generally should call pre_skip_instructions() to observe the
+    // headers, perform any fast skipping, and then should call
+    // skip_instructions_with_timestamp() to properly duplicate the prior timestamp.
     virtual reader_t &
     skip_instructions(uint64_t instruction_count);
 
+    virtual void
+    pre_skip_instructions();
+
+    // Performs a simple walk until it sees the instruction whose ordinal is
+    // "stop_instruction_count" along with all of its associated records such as
+    // memrefs.  If any timestamp (plus cpuid) is skipped, the most recent skipped
+    // timestamp will be duplicated prior to the target instruction.
+    virtual reader_t &
+    skip_instructions_with_timestamp(uint64_t stop_instruction_count);
+
     // Supplied for subclasses that may fail in their constructors.
     virtual bool
     operator!()
@@ -182,24 +189,21 @@ class reader_t : public std::iterator<std::input_iterator_tag, memref_t>,
     }
 
 protected:
-    // This reads the next entry from the stream of entries from all threads interleaved
-    // in timestamp order.
+    // This reads the next entry from the single stream of entries (or from the
+    // local queue if non-empty).  If it returns false it will set at_eof_ to distinguish
+    // end-of-file from an error.  It should call read_queue() first before reading
+    // a new entry from the input stream.
     virtual trace_entry_t *
     read_next_entry() = 0;
-    // This first checks the local queue before calling read_next_entry().
-    // in timestamp order.
+    // Returns and removes the entry (nullptr if none) from the local queue.
+    // This should be called by read_next_entry() prior to reading a new record
+    // from the input stream.
     virtual trace_entry_t *
-    read_next_entry_or_queue();
+    read_queue();
     // Replaces the just-read record with the prior record, supplied here.
     // Separated into a virtual method for overriding in test mock readers.
     virtual void
     use_prev(trace_entry_t *prev);
-    // This reads the next entry from the single stream of entries
-    // from the specified thread.  If it returns false it will set *eof to distinguish
-    // end-of-file from an error.
-    virtual bool
-    read_next_thread_entry(size_t thread_index, OUT trace_entry_t *entry,
-                           OUT bool *eof) = 0;
     // This updates internal state for the just-read input_entry_.
     // Returns whether a new memref record is now available.
     virtual bool
@@ -230,7 +234,7 @@ class reader_t : public std::iterator<std::input_iterator_tag, memref_t>,
     // We store into this queue records already read from the input but not
     // yet returned to the iterator.
     std::queue<trace_entry_t> queue_;
-    trace_entry_t local_entry_; // For use in returning a queue entry.
+    trace_entry_t entry_copy_; // For use in returning a queue entry.
 
 private:
     struct encoding_info_t {
diff --git a/clients/drcachesim/reader/snappy_file_reader.cpp b/clients/drcachesim/reader/snappy_file_reader.cpp
index 58a85b7495f..180f10b137a 100644
--- a/clients/drcachesim/reader/snappy_file_reader.cpp
+++ b/clients/drcachesim/reader/snappy_file_reader.cpp
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2019-2022 Google, Inc.  All rights reserved.
+ * Copyright (c) 2019-2023 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -218,32 +218,25 @@ file_reader_t<snappy_reader_t>::open_single_file(const std::string &path)
     if (!*file)
         return false;
     VPRINT(this, 1, "Opened snappy input file %s\n", path.c_str());
-    input_files_.emplace_back(file);
+    input_file_ = snappy_reader_t(file);
     return true;
 }
 
 template <>
-bool
-file_reader_t<snappy_reader_t>::read_next_thread_entry(size_t thread_index,
-                                                       OUT trace_entry_t *entry,
-                                                       OUT bool *eof)
+trace_entry_t *
+file_reader_t<snappy_reader_t>::read_next_entry()
 {
-    int len = input_files_[thread_index].read(sizeof(*entry), entry);
+    trace_entry_t *from_queue = read_queue();
+    if (from_queue != nullptr)
+        return from_queue;
+    int len = input_file_.read(sizeof(entry_copy_), &entry_copy_);
     // Returns less than asked-for for end of file, or –1 for error.
-    if (len < (int)sizeof(*entry)) {
-        *eof = input_files_[thread_index].eof();
-        return false;
+    if (len < (int)sizeof(entry_copy_)) {
+        at_eof_ = input_file_.eof();
+        return nullptr;
     }
-    VPRINT(this, 4, "Read from thread #%zd file: type=%s (%d), size=%d, addr=%zu\n",
-           thread_index, trace_type_names[entry->type], entry->type, entry->size,
-           entry->addr);
-    return true;
-}
-
-template <>
-bool
-file_reader_t<snappy_reader_t>::is_complete()
-{
-    // Not supported, similar to gzip reader.
-    return false;
+    VPRINT(this, 4, "Read from file: type=%s (%d), size=%d, addr=%zu\n",
+           trace_type_names[entry_copy_.type], entry_copy_.type, entry_copy_.size,
+           entry_copy_.addr);
+    return &entry_copy_;
 }
diff --git a/clients/drcachesim/reader/snappy_file_reader.h b/clients/drcachesim/reader/snappy_file_reader.h
index 5bcdacc1bae..63ae65670f7 100644
--- a/clients/drcachesim/reader/snappy_file_reader.h
+++ b/clients/drcachesim/reader/snappy_file_reader.h
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2019-2022 Google, Inc.  All rights reserved.
+ * Copyright (c) 2019-2023 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -51,6 +51,7 @@
 
 class snappy_reader_t : snappy_consts_t {
 public:
+    snappy_reader_t() = default;
     snappy_reader_t(std::ifstream *stream);
 
     // Read 'size' bytes into the 'to'.
diff --git a/clients/drcachesim/reader/zipfile_file_reader.cpp b/clients/drcachesim/reader/zipfile_file_reader.cpp
index c67106850e4..4c6f07d1663 100644
--- a/clients/drcachesim/reader/zipfile_file_reader.cpp
+++ b/clients/drcachesim/reader/zipfile_file_reader.cpp
@@ -42,9 +42,8 @@ template <>
 /* clang-format on */
 file_reader_t<zipfile_reader_t>::~file_reader_t<zipfile_reader_t>()
 {
-    for (auto &zipfile : input_files_)
-        unzClose(zipfile.file);
-    delete[] thread_eof_;
+    if (input_file_.file != nullptr)
+        unzClose(input_file_.file);
 }
 
 template <>
@@ -54,7 +53,7 @@ file_reader_t<zipfile_reader_t>::open_single_file(const std::string &path)
     unzFile file = unzOpen(path.c_str());
     if (file == nullptr)
         return false;
-    input_files_.emplace_back(file);
+    input_file_ = zipfile_reader_t(file);
     if (unzGoToFirstFile(file) != UNZ_OK || unzOpenCurrentFile(file) != UNZ_OK)
         return false;
     VPRINT(this, 1, "Opened input file %s\n", path.c_str());
@@ -62,13 +61,13 @@ file_reader_t<zipfile_reader_t>::open_single_file(const std::string &path)
 }
 
 template <>
-bool
-file_reader_t<zipfile_reader_t>::read_next_thread_entry(size_t thread_index,
-                                                        OUT trace_entry_t *entry,
-                                                        OUT bool *eof)
+trace_entry_t *
+file_reader_t<zipfile_reader_t>::read_next_entry()
 {
-    *eof = false;
-    zipfile_reader_t *zipfile = &input_files_[thread_index];
+    trace_entry_t *from_queue = read_queue();
+    if (from_queue != nullptr)
+        return from_queue;
+    zipfile_reader_t *zipfile = &input_file_;
     if (zipfile->cur_buf >= zipfile->max_buf) {
         int num_read =
             unzReadCurrentFile(zipfile->file, zipfile->buf, sizeof(zipfile->buf));
@@ -80,180 +79,107 @@ file_reader_t<zipfile_reader_t>::read_next_thread_entry(size_t thread_index,
                 // This call is expensive if we do it every time.
                 unzGetCurrentFileInfo64(zipfile->file, nullptr, name, sizeof(name),
                                         nullptr, 0, nullptr, 0);
-                VPRINT(this, 3,
-                       "Thread #%zd hit end of component %s; opening next component\n",
-                       thread_index, name);
+                VPRINT(this, 3, "Hit end of component %s; opening next component\n",
+                       name);
             }
 #endif
-            // read_next_entry() stores the last entry into entry_copy_.
+            // read_next_entry() stored the last-read entry into entry_copy_.
             if ((entry_copy_.type != TRACE_TYPE_MARKER ||
                  entry_copy_.size != TRACE_MARKER_TYPE_CHUNK_FOOTER) &&
                 entry_copy_.type != TRACE_TYPE_FOOTER) {
                 VPRINT(this, 1, "Chunk is missing footer: truncation detected\n");
-                return false;
+                return nullptr;
             }
             if (unzCloseCurrentFile(zipfile->file) != UNZ_OK)
-                return false;
+                return nullptr;
             int res = unzGoToNextFile(zipfile->file);
             if (res != UNZ_OK) {
                 if (res == UNZ_END_OF_LIST_OF_FILE) {
-                    VPRINT(this, 2, "Thread #%zd hit EOF\n", thread_index);
-                    *eof = true;
+                    VPRINT(this, 2, "Hit EOF\n");
+                    at_eof_ = true;
                 }
-                return false;
+                return nullptr;
             }
             if (unzOpenCurrentFile(zipfile->file) != UNZ_OK)
-                return false;
+                return nullptr;
             num_read =
                 unzReadCurrentFile(zipfile->file, zipfile->buf, sizeof(zipfile->buf));
         }
-        if (num_read < static_cast<int>(sizeof(*entry))) {
-            VPRINT(this, 1, "Thread #%zd failed to read: returned %d\n", thread_index,
-                   num_read);
-            return false;
+        if (num_read < static_cast<int>(sizeof(entry_copy_))) {
+            VPRINT(this, 1, "Failed to read: returned %d\n", num_read);
+            return nullptr;
         }
         zipfile->cur_buf = zipfile->buf;
         zipfile->max_buf = zipfile->buf + (num_read / sizeof(*zipfile->max_buf));
     }
-    *entry = *zipfile->cur_buf;
+    entry_copy_ = *zipfile->cur_buf;
     ++zipfile->cur_buf;
-    VPRINT(this, 4, "Read from thread #%zd: type=%s (%d), size=%d, addr=%zu\n",
-           thread_index, trace_type_names[entry->type], entry->type, entry->size,
-           entry->addr);
-    return true;
-}
-
-template <>
-bool
-file_reader_t<zipfile_reader_t>::is_complete()
-{
-    // We could call unzeof() but we need the thread index.
-    // XXX: We should remove or change this interface.  It is not used now.
-    return false;
+    VPRINT(this, 4, "Read: type=%s (%d), size=%d, addr=%zu\n",
+           trace_type_names[entry_copy_.type], entry_copy_.type, entry_copy_.size,
+           entry_copy_.addr);
+    return &entry_copy_;
 }
 
 template <>
-bool
-file_reader_t<zipfile_reader_t>::skip_thread_instructions(size_t thread_index,
-                                                          uint64_t instruction_count,
-                                                          OUT bool *eof)
+reader_t &
+file_reader_t<zipfile_reader_t>::skip_instructions(uint64_t instruction_count)
 {
     if (instruction_count == 0)
-        return true;
-    VPRINT(this, 2, "Thread #%zd skipping %" PRIi64 " instrs\n", thread_index,
-           instruction_count);
-    trace_entry_t timestamp = {};
-    trace_entry_t cpu = {};
-    const memref_t &memref = **this;
-    if (memref.marker.type == TRACE_TYPE_MARKER &&
-        memref.marker.marker_type == TRACE_MARKER_TYPE_TIMESTAMP) {
-        timestamp = entry_copy_;
+        return *this;
+    VPRINT(this, 2, "Skipping %" PRIi64 " instrs\n", instruction_count);
+    pre_skip_instructions();
+    if (chunk_instr_count_ == 0) {
+        VPRINT(this, 1, "Failed to record chunk instr count\n");
+        at_eof_ = true;
+        return *this;
     }
-    zipfile_reader_t *zipfile = &input_files_[thread_index];
+    zipfile_reader_t *zipfile = &input_file_;
     // We assume our unzGoToNextFile loop is plenty performant and we don't need to
     // know the chunk names to use with a single unzLocateFile.
-    uint64_t stop_count_ = cur_instr_count_ + instruction_count + 1;
+    uint64_t stop_count = cur_instr_count_ + instruction_count + 1;
     VPRINT(this, 2,
            "stop=%" PRIi64 " cur=%" PRIi64 " chunk=%" PRIi64 " est=%" PRIi64 "\n",
-           stop_count_, cur_instr_count_, chunk_instr_count_,
+           stop_count, cur_instr_count_, chunk_instr_count_,
            cur_instr_count_ +
                (chunk_instr_count_ - (cur_instr_count_ % chunk_instr_count_)));
     // First, quickly skip over chunks to reach the chunk containing the target.
     while (cur_instr_count_ +
                (chunk_instr_count_ - (cur_instr_count_ % chunk_instr_count_)) <
-           stop_count_) {
-        if (unzCloseCurrentFile(zipfile->file) != UNZ_OK)
-            return false;
+           stop_count) {
+        if (unzCloseCurrentFile(zipfile->file) != UNZ_OK) {
+            VPRINT(this, 1, "Failed to close zip subfile\n");
+            at_eof_ = true;
+            return *this;
+        }
         int res = unzGoToNextFile(zipfile->file);
         if (res != UNZ_OK) {
-            if (res == UNZ_END_OF_LIST_OF_FILE) {
-                VPRINT(this, 2, "Thread #%zd hit EOF\n", thread_index);
-                *eof = true;
-            }
-            return false;
+            if (res == UNZ_END_OF_LIST_OF_FILE)
+                VPRINT(this, 2, "Hit EOF\n");
+            else
+                VPRINT(this, 2, "Failed to go to next zip subfile\n");
+            at_eof_ = true;
+            return *this;
+        }
+        if (unzOpenCurrentFile(zipfile->file) != UNZ_OK) {
+            VPRINT(this, 1, "Failed to open zip subfile\n");
+            at_eof_ = true;
+            return *this;
         }
-        if (unzOpenCurrentFile(zipfile->file) != UNZ_OK)
-            return false;
         cur_instr_count_ += chunk_instr_count_ - (cur_instr_count_ % chunk_instr_count_);
-        VPRINT(this, 2, "Thread #%zd at %" PRIi64 " instrs at start of new chunk\n",
-               thread_index, cur_instr_count_);
+        VPRINT(this, 2, "At %" PRIi64 " instrs at start of new chunk\n",
+               cur_instr_count_);
         VPRINT(this, 2,
                "zip chunk stop=%" PRIi64 " cur=%" PRIi64 " chunk=%" PRIi64
                " end-of-chunk=%" PRIi64 "\n",
-               stop_count_, cur_instr_count_, chunk_instr_count_,
+               stop_count, cur_instr_count_, chunk_instr_count_,
                cur_instr_count_ +
                    (chunk_instr_count_ - (cur_instr_count_ % chunk_instr_count_)));
         // Clear cached data from the prior chunk.
         zipfile->cur_buf = zipfile->max_buf;
     }
-    // We have to linearly walk the last mile.
-    bool prev_was_record_ord = false;
-    bool found_real_timestamp = false;
-    while (cur_instr_count_ < stop_count_) { // End condition is never reached.
-        if (!read_next_thread_entry(thread_index, &entry_copy_, eof))
-            return false;
-        // We need to pass up memrefs for the final skipped instr, but we don't
-        // want to process_input_entry() on the first unskipped instr so we can
-        // insert the timestamp+cpu first.
-        if (cur_instr_count_ + 1 == stop_count_ &&
-            type_is_instr(static_cast<trace_type_t>(entry_copy_.type)))
-            break;
-        // To examine the produced memrefs we'd have to have the base reader
-        // expose these hidden entries.  It is simpler for us to read the
-        // trace_entry_t directly prior to processing by the base class.
-        if (entry_copy_.type == TRACE_TYPE_MARKER) {
-            if (entry_copy_.size == TRACE_MARKER_TYPE_RECORD_ORDINAL) {
-                cur_ref_count_ = entry_copy_.addr;
-                prev_was_record_ord = true;
-                VPRINT(this, 4, "Found record ordinal marker: new ord %" PRIu64 "\n",
-                       cur_ref_count_);
-            } else if (entry_copy_.size == TRACE_MARKER_TYPE_TIMESTAMP) {
-                timestamp = entry_copy_;
-                if (prev_was_record_ord)
-                    --cur_ref_count_; // Invisible to ordinals.
-                else
-                    found_real_timestamp = true;
-            } else if (entry_copy_.size == TRACE_MARKER_TYPE_CPU_ID) {
-                cpu = entry_copy_;
-                if (prev_was_record_ord)
-                    --cur_ref_count_; // Invisible to ordinals.
-            } else
-                prev_was_record_ord = false;
-        } else
-            prev_was_record_ord = false;
-        // Update core state.
-        input_entry_ = &entry_copy_;
-        process_input_entry();
-    }
-    if (timestamp.type == TRACE_TYPE_MARKER && cpu.type == TRACE_TYPE_MARKER) {
-        // Insert the two markers.
-        trace_entry_t instr = entry_copy_;
-        entry_copy_ = timestamp;
-        if (!found_real_timestamp) {
-            // These synthetic entries are not real records in the unskipped trace, so we
-            // do not associate record counts with them.
-            suppress_ref_count_ = 1;
-            if (cpu.type == TRACE_TYPE_MARKER)
-                ++suppress_ref_count_;
-        } else {
-            // These are not invisible but we already counted them in the loop above
-            // so we need to avoid a double-count.
-            VPRINT(this, 4, "Found real timestamp: walking back ord from %" PRIu64 "\n",
-                   cur_ref_count_);
-            --cur_ref_count_;
-            if (cpu.type == TRACE_TYPE_MARKER)
-                --cur_ref_count_;
-        }
-        process_input_entry();
-        if (cpu.type == TRACE_TYPE_MARKER)
-            queues_[thread_index].push(cpu);
-        queues_[thread_index].push(instr);
-    } else {
-        // We missed the markers somehow; fall back to just process the instr.
-        // TODO i#5538: For skipping from the middle we need to have the
-        // base reader cache the last timestamp,cpu.
-        VPRINT(this, 1, "Skip failed to find both timestamp and cpu\n");
-        process_input_entry();
-    }
-    return true;
+    // Now do a linear walk the rest of the way, remembering timestamps (we have
+    // duplicated timestamps at the start of the chunk to cover any skipped in
+    // the fast chunk jumps we just did).
+    // Subtract 1 to pass the target instr itself.
+    return skip_instructions_with_timestamp(stop_count - 1);
 }
diff --git a/clients/drcachesim/reader/zipfile_file_reader.h b/clients/drcachesim/reader/zipfile_file_reader.h
index 3caea97082f..8f318af09f5 100644
--- a/clients/drcachesim/reader/zipfile_file_reader.h
+++ b/clients/drcachesim/reader/zipfile_file_reader.h
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2017-2022 Google, Inc.  All rights reserved.
+ * Copyright (c) 2017-2023 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -40,6 +40,10 @@
 #include "file_reader.h"
 
 struct zipfile_reader_t {
+    zipfile_reader_t()
+        : file(nullptr)
+    {
+    }
     explicit zipfile_reader_t(unzFile file)
         : file(file)
     {
@@ -59,9 +63,7 @@ typedef file_reader_t<zipfile_reader_t> zipfile_file_reader_t;
  * class declaration.
  */
 template <>
-bool
-file_reader_t<zipfile_reader_t>::skip_thread_instructions(size_t thread_index,
-                                                          uint64_t instruction_count,
-                                                          OUT bool *eof);
+reader_t &
+file_reader_t<zipfile_reader_t>::skip_instructions(uint64_t instruction_count);
 
 #endif /* _ZIPFILE_FILE_READER_H_ */
diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp
index 58368903594..c8c785b7e9d 100644
--- a/clients/drcachesim/tests/scheduler_unit_tests.cpp
+++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp
@@ -90,6 +90,9 @@ class mock_reader_t : public reader_t {
     trace_entry_t *
     read_next_entry() override
     {
+        trace_entry_t *entry = read_queue();
+        if (entry != nullptr)
+            return entry;
         // We need this to work just well enough for reader_t::skip_instructions
         // to identify instr entries.
         ++index_;
@@ -106,12 +109,6 @@ class mock_reader_t : public reader_t {
         --index_;
         queue_.pop();
     }
-    bool
-    read_next_thread_entry(size_t thread_index, OUT trace_entry_t *entry,
-                           OUT bool *eof) override
-    {
-        return true;
-    }
     std::string
     get_stream_name() const override
     {
diff --git a/clients/drcachesim/tests/view_test.cpp b/clients/drcachesim/tests/view_test.cpp
index 86e5bf95b60..49bf64e7857 100644
--- a/clients/drcachesim/tests/view_test.cpp
+++ b/clients/drcachesim/tests/view_test.cpp
@@ -42,6 +42,7 @@
 #include "../common/memref.h"
 #include "../tracer/raw2trace.h"
 #include "../reader/file_reader.h"
+#include "../scheduler/scheduler.h"
 #include "memref_gen.h"
 
 #undef ASSERT
@@ -61,19 +62,14 @@
         }                             \
     } while (0)
 
+using namespace dynamorio::drmemtrace;
+
 // These are for our mock serial reader and must be in the same namespace
 // as file_reader_t's declaration.
 template <> file_reader_t<std::vector<trace_entry_t>>::~file_reader_t()
 {
 }
 
-template <>
-bool
-file_reader_t<std::vector<trace_entry_t>>::is_complete()
-{
-    return false;
-}
-
 template <>
 bool
 file_reader_t<std::vector<trace_entry_t>>::open_single_file(const std::string &path)
@@ -82,11 +78,10 @@ file_reader_t<std::vector<trace_entry_t>>::open_single_file(const std::string &p
 }
 
 template <>
-bool
-file_reader_t<std::vector<trace_entry_t>>::read_next_thread_entry(
-    size_t thread_index, OUT trace_entry_t *entry, OUT bool *eof)
+trace_entry_t *
+file_reader_t<std::vector<trace_entry_t>>::read_next_entry()
 {
-    return false;
+    return nullptr;
 }
 
 namespace {
@@ -415,48 +410,55 @@ run_limit_tests(void *drcontext)
 }
 
 /***************************************************************************
- * Serial reader mock.
+ * File reader mock.
  */
 
 class mock_file_reader_t : public file_reader_t<std::vector<trace_entry_t>> {
 public:
     mock_file_reader_t()
     {
+        at_eof_ = true;
     }
-    mock_file_reader_t(const std::vector<std::vector<trace_entry_t>> &entries)
-        : file_reader_t(std::vector<std::string>(1, "non-empty"))
+    mock_file_reader_t(const std::vector<trace_entry_t> &entries)
     {
-        input_files_ = entries;
-        pos_.resize(input_files_.size(), 0);
-        init();
+        input_file_ = entries;
+        pos_ = 0;
     }
-    bool
-    read_next_thread_entry(size_t thread_index, OUT trace_entry_t *entry,
-                           OUT bool *eof) override
+    trace_entry_t *
+    read_next_entry() override
     {
-        if (pos_[thread_index] >= input_files_[thread_index].size()) {
-            *eof = true;
-            return false;
+        if (at_eof_)
+            return nullptr;
+        trace_entry_t *entry = read_queue();
+        if (entry != nullptr)
+            return entry;
+        if (pos_ >= input_file_.size()) {
+            at_eof_ = true;
+            return nullptr;
         }
-        *entry = input_files_[thread_index][pos_[thread_index]];
-        ++pos_[thread_index];
-        return true;
+        entry = &input_file_[pos_];
+        ++pos_;
+        return entry;
     }
 
 private:
-    std::vector<size_t> pos_;
+    size_t pos_;
 };
 
 std::string
 run_serial_test_helper(view_t &view,
-                       const std::vector<std::vector<trace_entry_t>> &entries)
+                       const std::vector<std::vector<trace_entry_t>> &entries,
+                       const std::vector<memref_tid_t> &tids)
+
 {
     class local_stream_t : public default_memtrace_stream_t {
     public:
         local_stream_t(view_t &view,
-                       const std::vector<std::vector<trace_entry_t>> &entries)
+                       const std::vector<std::vector<trace_entry_t>> &entries,
+                       const std::vector<memref_tid_t> &tids)
             : view_(view)
             , entries_(entries)
+            , tids_(tids)
         {
         }
 
@@ -468,10 +470,27 @@ run_serial_test_helper(view_t &view,
             std::stringstream capture;
             std::streambuf *prior = std::cerr.rdbuf(capture.rdbuf());
             // Run the tool.
-            mock_file_reader_t serial(entries_);
-            mock_file_reader_t end;
-            for (; serial != end; ++serial) {
-                const memref_t memref = *serial;
+            std::vector<scheduler_t::input_reader_t> readers;
+            for (size_t i = 0; i < entries_.size(); i++) {
+                readers.emplace_back(
+                    std::unique_ptr<mock_file_reader_t>(
+                        new mock_file_reader_t(entries_[i])),
+                    std::unique_ptr<mock_file_reader_t>(new mock_file_reader_t()),
+                    tids_[i]);
+            }
+            scheduler_t scheduler;
+            std::vector<scheduler_t::input_workload_t> sched_inputs;
+            sched_inputs.emplace_back(std::move(readers));
+            if (scheduler.init(sched_inputs, 1,
+                               scheduler_t::make_scheduler_serial_options()) !=
+                scheduler_t::STATUS_SUCCESS)
+                assert(false);
+            auto *stream = scheduler.get_stream(0);
+            memref_t memref;
+            for (scheduler_t::stream_status_t status = stream->next_record(memref);
+                 status != scheduler_t::STATUS_EOF;
+                 status = stream->next_record(memref)) {
+                assert(status == scheduler_t::STATUS_OK);
                 ++ref_count_;
                 if (type_is_instr(memref.instr.type))
                     ++instr_count_;
@@ -498,11 +517,12 @@ run_serial_test_helper(view_t &view,
     private:
         view_t &view_;
         const std::vector<std::vector<trace_entry_t>> &entries_;
+        const std::vector<memref_tid_t> &tids_;
         uint64_t ref_count_ = 0;
         uint64_t instr_count_ = 0;
     };
 
-    local_stream_t stream(view, entries);
+    local_stream_t stream(view, entries, tids);
     return stream.run();
 }
 
@@ -512,6 +532,7 @@ bool
 run_single_thread_chunk_test(void *drcontext)
 {
     const memref_tid_t t1 = 3;
+    std::vector<memref_tid_t> tids = { t1 };
     std::vector<std::vector<trace_entry_t>> entries = { {
         { TRACE_TYPE_HEADER, 0, { 0x1 } },
         { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION, { 3 } },
@@ -544,7 +565,7 @@ run_single_thread_chunk_test(void *drcontext)
 )DELIM";
     instrlist_t *ilist_unused = nullptr;
     view_nomod_test_t view(drcontext, *ilist_unused, 0, 0, 0);
-    std::string res = run_serial_test_helper(view, entries);
+    std::string res = run_serial_test_helper(view, entries, tids);
     // Make 64-bit match our 32-bit expect string.
     res = std::regex_replace(res, std::regex("0x000000000000002a"), "0x0000002a");
     if (res != expect) {
@@ -559,9 +580,10 @@ run_serial_chunk_test(void *drcontext)
 {
     // We ensure headers are not omitted incorrectly, which they were
     // in the first implementation of the reader skipping dup headers:
-    // i#/5538#issuecomment-1407235283
+    // i#5538#issuecomment-1407235283
     const memref_tid_t t1 = 3;
     const memref_tid_t t2 = 7;
+    std::vector<memref_tid_t> tids = { t1, t2 };
     std::vector<std::vector<trace_entry_t>> entries = {
         {
             { TRACE_TYPE_HEADER, 0, { 0x1 } },
@@ -601,17 +623,15 @@ run_serial_chunk_test(void *drcontext)
            2           0:           3 <marker: filetype 0x0>
            3           0:           3 <marker: cache line size 64>
            4           0:           3 <marker: chunk instruction count 20>
+           5           0:           3 <marker: timestamp 1001>
+           6           0:           3 <marker: tid 3 on core 2>
+           7           1:           3 ifetch       4 byte(s) @ 0x0000002a non-branch
+           8           2:           3 ifetch       4 byte(s) @ 0x0000002a non-branch
 ------------------------------------------------------------
-           5           0:           7 <marker: version 3>
-           6           0:           7 <marker: filetype 0x0>
-           7           0:           7 <marker: cache line size 64>
-           8           0:           7 <marker: chunk instruction count 2>
-------------------------------------------------------------
-           9           0:           3 <marker: timestamp 1001>
-          10           0:           3 <marker: tid 3 on core 2>
-          11           1:           3 ifetch       4 byte(s) @ 0x0000002a non-branch
-          12           2:           3 ifetch       4 byte(s) @ 0x0000002a non-branch
-------------------------------------------------------------
+           9           2:           7 <marker: version 3>
+          10           2:           7 <marker: filetype 0x0>
+          11           2:           7 <marker: cache line size 64>
+          12           2:           7 <marker: chunk instruction count 2>
           13           2:           7 <marker: timestamp 1002>
           14           2:           7 <marker: tid 7 on core 2>
           15           3:           7 ifetch       4 byte(s) @ 0x0000002a non-branch
@@ -627,7 +647,7 @@ run_serial_chunk_test(void *drcontext)
 )DELIM";
     instrlist_t *ilist_unused = nullptr;
     view_nomod_test_t view(drcontext, *ilist_unused, 0, 0, 0);
-    std::string res = run_serial_test_helper(view, entries);
+    std::string res = run_serial_test_helper(view, entries, tids);
     // Make 64-bit match our 32-bit expect string.
     res = std::regex_replace(res, std::regex("0x000000000000002a"), "0x0000002a");
     if (res != expect) {
diff --git a/clients/drcachesim/tracer/raw2trace.h b/clients/drcachesim/tracer/raw2trace.h
index 01f2163a3a3..06d2d761bd5 100644
--- a/clients/drcachesim/tracer/raw2trace.h
+++ b/clients/drcachesim/tracer/raw2trace.h
@@ -1743,12 +1743,6 @@ class memref_counter_t : public reader_t {
     {
         return nullptr;
     };
-    bool
-    read_next_thread_entry(size_t thread_index, OUT trace_entry_t *entry,
-                           OUT bool *eof) override
-    {
-        return false;
-    }
     std::string
     get_stream_name() const override
     {
@@ -1757,7 +1751,7 @@ class memref_counter_t : public reader_t {
     int
     entry_memref_count(const trace_entry_t *entry)
     {
-        // Mirror file_reader_t::open_input_files().
+        // Mirror file_reader_t::open_input_file().
         // In particular, we need to skip TRACE_TYPE_HEADER and to pass the
         // tid and pid to the reader before the 2 markers in front of them.
         if (!saw_pid_) {