diff --git a/api/docs/release.dox b/api/docs/release.dox
index 4d990f48ad8..d84f3b5d0e6 100644
--- a/api/docs/release.dox
+++ b/api/docs/release.dox
@@ -186,6 +186,9 @@ Further non-compatibility-affecting changes include:
  - Deprecated the drmemtrace analysis tool functions initialize() and
    parallel_shard_init(), replacing them with initialize_stream() and
    parallel_shard_init_stream().  The old versions will continue to work.
+   The stream interface passed to analysis tools provides tools with the
+   record and instruction ordinals along with the values of top-level
+   headers.
  - Added #record_analyzer_t and #record_analysis_tool_t for analyzing the
    sequence of #trace_entry_t exactly as present in a stored offline trace.
 
diff --git a/clients/drcachesim/common/memtrace_stream.h b/clients/drcachesim/common/memtrace_stream.h
index 3101d352d19..9962b3aef9b 100644
--- a/clients/drcachesim/common/memtrace_stream.h
+++ b/clients/drcachesim/common/memtrace_stream.h
@@ -81,6 +81,42 @@ class memtrace_stream_t {
      */
     virtual std::string
     get_stream_name() const = 0;
+
+    /**
+     * Returns the #trace_version_t value from the #TRACE_MARKER_TYPE_VERSION record
+     * in the trace header.
+     */
+    virtual uint64_t
+    get_version() const = 0;
+
+    /**
+     * Returns the OFFLINE_FILE_TYPE_* bitfields of type #offline_file_type_t
+     * identifying the architecture and other key high-level attributes of the trace
+     * from the #TRACE_MARKER_TYPE_FILETYPE record in the trace header.
+     */
+    virtual uint64_t
+    get_filetype() const = 0;
+
+    /**
+     * Returns the cache line size from the #TRACE_MARKER_TYPE_CACHE_LINE_SIZE record in
+     * the trace header.
+     */
+    virtual uint64_t
+    get_cache_line_size() const = 0;
+
+    /**
+     * Returns the chunk instruction count from the #TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT
+     * record in the trace header.
+     */
+    virtual uint64_t
+    get_chunk_instr_count() const = 0;
+
+    /**
+     * Returns the page size from the #TRACE_MARKER_TYPE_PAGE_SIZE record in
+     * the trace header.
+     */
+    virtual uint64_t
+    get_page_size() const = 0;
 };
 
 #endif /* _MEMTRACE_STREAM_H_ */
diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp
index 7e2acad766f..0745c3c7e5c 100644
--- a/clients/drcachesim/common/options.cpp
+++ b/clients/drcachesim/common/options.cpp
@@ -105,7 +105,7 @@ droption_t<bytesize_t> op_chunk_instr_count(
     // We do not support tiny chunks.  We do not support disabling chunks with a 0
     // value, to simplify testing: although we're still having to support generating
     // non-zip files for !HAS_ZLIB/!HAS_ZIP!
-    bytesize_t(10),
+    bytesize_t(1000),
 #ifdef X64
     bytesize_t(1ULL << 63),
 #else
diff --git a/clients/drcachesim/reader/file_reader.h b/clients/drcachesim/reader/file_reader.h
index 002e9613f6b..076a0f8d2ff 100644
--- a/clients/drcachesim/reader/file_reader.h
+++ b/clients/drcachesim/reader/file_reader.h
@@ -110,6 +110,8 @@ template <typename T> class file_reader_t : public reader_t {
     reader_t &
     skip_instructions(uint64_t instruction_count) override
     {
+        if (instruction_count == 0)
+            return *this;
         if (input_files_.size() > 1) {
             // TODO i#5538: For fast thread-interleaved (whether serial here or the
             // forthcoming per-cpu iteration) we need to read in the schedule file(s)
@@ -121,10 +123,10 @@ template <typename T> class file_reader_t : public reader_t {
             return reader_t::skip_instructions(instruction_count);
         }
         // If the user asks to skip from the very start, we still need to find the chunk
-        // count marker and drain the header queue.
-        // TODO i#5538: Record all of the header values until the first timestamp
-        // and present them as new memtrace_stream_t interfaces.
-        while (chunk_instr_count_ == 0) {
+        // count marker and drain the header queue and populate the stream header values.
+        // XXX: We assume the page size is the final header; it is complex to wait for
+        // the timestamp as we don't want to read it yet.
+        while (page_size_ == 0) {
             input_entry_ = read_next_entry();
             process_input_entry();
         }
@@ -221,6 +223,9 @@ template <typename T> class file_reader_t : public reader_t {
                 return false;
             }
             // Read the meta entries until we hit the pid.
+            // We want to pass the tid+pid to the reader *before* any markers,
+            // even though 2 markers preced the tid+pid in the file.
+            std::queue<trace_entry_t> marker_queue;
             while (read_next_thread_entry(index_, &next, &thread_eof_[index_])) {
                 if (next.type == TRACE_TYPE_PID) {
                     // We assume the pid entry is the last, right before the timestamp.
@@ -229,7 +234,7 @@ template <typename T> class file_reader_t : public reader_t {
                 } else if (next.type == TRACE_TYPE_THREAD)
                     tids_[index_] = next;
                 else if (next.type == TRACE_TYPE_MARKER)
-                    queues_[index_].push(next);
+                    marker_queue.push(next);
                 else {
                     ERRMSG("Unexpected trace sequence for input file #%zu\n", index_);
                     return false;
@@ -241,6 +246,10 @@ template <typename T> class file_reader_t : public reader_t {
             // the first entry.
             queues_[index_].push(tids_[index_]);
             queues_[index_].push(pid);
+            while (!marker_queue.empty()) {
+                queues_[index_].push(marker_queue.front());
+                marker_queue.pop();
+            }
         }
         index_ = input_files_.size();
 
diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp
index 9e5c6cb22d0..b837a5c6a19 100644
--- a/clients/drcachesim/reader/reader.cpp
+++ b/clients/drcachesim/reader/reader.cpp
@@ -204,6 +204,7 @@ reader_t::process_input_entry()
         cur_pc_ = next_pc_;
         cur_ref_.instr.addr = cur_pc_;
         next_pc_ = cur_pc_ + cur_ref_.instr.size;
+        ++cur_instr_count_;
         // input_entry_->size stores the number of instrs in this bundle
         assert(input_entry_->size <= sizeof(input_entry_->length));
         if (bundle_idx_ == input_entry_->size)
@@ -249,17 +250,7 @@ reader_t::process_input_entry()
         break;
     case TRACE_TYPE_MARKER:
         cur_ref_.marker.type = (trace_type_t)input_entry_->type;
-        if (!online_ &&
-            (input_entry_->size == TRACE_MARKER_TYPE_VERSION ||
-             input_entry_->size == TRACE_MARKER_TYPE_FILETYPE)) {
-            // Do not carry over a prior thread on a thread switch to a
-            // first-time-seen new thread, whose tid entry is *after* these
-            // markers for offline traces.
-            cur_pid_ = 0;
-            cur_tid_ = 0;
-        } else {
-            assert(cur_tid_ != 0 && cur_pid_ != 0);
-        }
+        assert(cur_tid_ != 0 && cur_pid_ != 0);
         cur_ref_.marker.pid = cur_pid_;
         cur_ref_.marker.tid = cur_tid_;
         cur_ref_.marker.marker_type = (trace_marker_type_t)input_entry_->size;
@@ -284,11 +275,18 @@ reader_t::process_input_entry()
         }
         if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_TIMESTAMP)
             last_timestamp_instr_count_ = cur_instr_count_;
+        else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_VERSION)
+            version_ = cur_ref_.marker.marker_value;
+        else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) {
+            filetype_ = cur_ref_.marker.marker_value;
+            if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, filetype_))
+                expect_no_encodings_ = false;
+        } else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_CACHE_LINE_SIZE)
+            cache_line_size_ = cur_ref_.marker.marker_value;
+        else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_PAGE_SIZE)
+            page_size_ = cur_ref_.marker.marker_value;
         else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT)
             chunk_instr_count_ = cur_ref_.marker.marker_value;
-        else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE &&
-                 TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, cur_ref_.marker.marker_value))
-            expect_no_encodings_ = false;
         break;
     default:
         ERRMSG("Unknown trace entry type %d\n", input_entry_->type);
diff --git a/clients/drcachesim/reader/reader.h b/clients/drcachesim/reader/reader.h
index 81610928b57..7a6ed92fe3d 100644
--- a/clients/drcachesim/reader/reader.h
+++ b/clients/drcachesim/reader/reader.h
@@ -145,6 +145,31 @@ class reader_t : public std::iterator<std::input_iterator_tag, memref_t>,
     {
         return cur_instr_count_;
     }
+    uint64_t
+    get_version() const override
+    {
+        return version_;
+    }
+    uint64_t
+    get_filetype() const override
+    {
+        return filetype_;
+    }
+    uint64_t
+    get_cache_line_size() const override
+    {
+        return cache_line_size_;
+    }
+    uint64_t
+    get_chunk_instr_count() const override
+    {
+        return chunk_instr_count_;
+    }
+    uint64_t
+    get_page_size() const override
+    {
+        return page_size_;
+    }
 
 protected:
     // This reads the next entry from the stream of entries from all threads interleaved
@@ -174,9 +199,14 @@ class reader_t : public std::iterator<std::input_iterator_tag, memref_t>,
     uint64_t cur_ref_count_ = 0;
     int64_t suppress_ref_count_ = -1;
     uint64_t cur_instr_count_ = 0;
-    uint64_t chunk_instr_count_ = 0; // Unchanging once set to non-zero.
     uint64_t last_timestamp_instr_count_ = 0;
     trace_entry_t *input_entry_ = nullptr;
+    // Remember top-level headers for the memtrace_stream_t interface.
+    uint64_t version_ = 0;
+    uint64_t filetype_ = 0;
+    uint64_t cache_line_size_ = 0;
+    uint64_t chunk_instr_count_ = 0;
+    uint64_t page_size_ = 0;
 
 private:
     struct encoding_info_t {
diff --git a/clients/drcachesim/reader/record_file_reader.h b/clients/drcachesim/reader/record_file_reader.h
index b9d9467f01b..318bbc62ea7 100644
--- a/clients/drcachesim/reader/record_file_reader.h
+++ b/clients/drcachesim/reader/record_file_reader.h
@@ -145,6 +145,19 @@ class record_reader_t : public std::iterator<std::input_iterator_tag, trace_entr
             ++cur_ref_count_;
             if (type_is_instr(static_cast<trace_type_t>(cur_entry_.type)))
                 ++cur_instr_count_;
+            if (cur_entry_.type == TRACE_TYPE_MARKER) {
+                switch (cur_entry_.size) {
+                case TRACE_MARKER_TYPE_VERSION: version_ = cur_entry_.addr; break;
+                case TRACE_MARKER_TYPE_FILETYPE: filetype_ = cur_entry_.addr; break;
+                case TRACE_MARKER_TYPE_CACHE_LINE_SIZE:
+                    cache_line_size_ = cur_entry_.addr;
+                    break;
+                case TRACE_MARKER_TYPE_PAGE_SIZE: page_size_ = cur_entry_.addr; break;
+                case TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT:
+                    chunk_instr_count_ = cur_entry_.addr;
+                    break;
+                }
+            }
         }
         return *this;
     }
@@ -159,6 +172,31 @@ class record_reader_t : public std::iterator<std::input_iterator_tag, trace_entr
     {
         return cur_instr_count_;
     }
+    uint64_t
+    get_version() const override
+    {
+        return version_;
+    }
+    uint64_t
+    get_filetype() const override
+    {
+        return filetype_;
+    }
+    uint64_t
+    get_cache_line_size() const override
+    {
+        return cache_line_size_;
+    }
+    uint64_t
+    get_chunk_instr_count() const override
+    {
+        return chunk_instr_count_;
+    }
+    uint64_t
+    get_page_size() const override
+    {
+        return page_size_;
+    }
 
     virtual bool
     operator==(const record_reader_t &rhs) const
@@ -199,6 +237,12 @@ class record_reader_t : public std::iterator<std::input_iterator_tag, trace_entr
 private:
     uint64_t cur_ref_count_ = 0;
     uint64_t cur_instr_count_ = 0;
+    // Remember top-level headers for the memtrace_stream_t interface.
+    uint64_t version_ = 0;
+    uint64_t filetype_ = 0;
+    uint64_t cache_line_size_ = 0;
+    uint64_t chunk_instr_count_ = 0;
+    uint64_t page_size_ = 0;
 };
 
 /**
diff --git a/clients/drcachesim/tests/drmemtrace.allasm_x86_64.trace.zip b/clients/drcachesim/tests/drmemtrace.allasm_x86_64.trace.zip
index 3cd75e1b86e..1e7facfe94e 100644
Binary files a/clients/drcachesim/tests/drmemtrace.allasm_x86_64.trace.zip and b/clients/drcachesim/tests/drmemtrace.allasm_x86_64.trace.zip differ
diff --git a/clients/drcachesim/tests/offline-skip.expect b/clients/drcachesim/tests/offline-skip.expect
new file mode 100644
index 00000000000..cd13d9371b5
--- /dev/null
+++ b/clients/drcachesim/tests/offline-skip.expect
@@ -0,0 +1,19 @@
+Output format:
+<record#> <instr#>: T<tid> <record details>
+------------------------------------------------------------
+        0       63: T1384871 <marker: timestamp 13313186810097539>
+        0       63: T1384871 <marker: tid 1384871 on core 9>
+       92       64: T1384871 ifetch       4 byte(s) @ 0x0000000000401028 48 83 eb 01          sub    $0x0000000000000001 %rbx -> %rbx
+       93       65: T1384871 ifetch       4 byte(s) @ 0x000000000040102c 48 83 fb 00          cmp    %rbx $0x0000000000000000
+       94       66: T1384871 ifetch       2 byte(s) @ 0x0000000000401030 75 d9                jnz    $0x000000000040100b
+       95       67: T1384871 ifetch       7 byte(s) @ 0x000000000040100b 48 c7 c7 01 00 00 00 mov    $0x0000000000000001 -> %rdi
+       96       68: T1384871 ifetch       8 byte(s) @ 0x0000000000401012 48 8d 34 25 00 20 40 lea    0x00402000 -> %rsi
+       96       68: T1384871                                             00
+       97       69: T1384871 ifetch       7 byte(s) @ 0x000000000040101a 48 c7 c2 0d 00 00 00 mov    $0x000000000000000d -> %rdx
+       98       70: T1384871 ifetch       5 byte(s) @ 0x0000000000401021 b8 01 00 00 00       mov    $0x00000001 -> %eax
+       99       71: T1384871 ifetch       2 byte(s) @ 0x0000000000401026 0f 05                syscall  -> %rcx %r11
+View tool results:
+              8 : total instructions
+
+===========================================================================
+Trace invariant checks passed
diff --git a/clients/drcachesim/tests/view_test.cpp b/clients/drcachesim/tests/view_test.cpp
index 2de3300a81a..10dd85e222e 100644
--- a/clients/drcachesim/tests/view_test.cpp
+++ b/clients/drcachesim/tests/view_test.cpp
@@ -164,6 +164,31 @@ run_test_helper(view_t &view, const std::vector<memref_t> &memrefs)
         {
             return "";
         }
+        uint64_t
+        get_version() const override
+        {
+            return 0;
+        }
+        uint64_t
+        get_filetype() const override
+        {
+            return 0;
+        }
+        uint64_t
+        get_cache_line_size() const override
+        {
+            return 0;
+        }
+        uint64_t
+        get_chunk_instr_count() const override
+        {
+            return 0;
+        }
+        uint64_t
+        get_page_size() const override
+        {
+            return 0;
+        }
 
     private:
         view_t &view_;
diff --git a/clients/drcachesim/tools/invariant_checker.cpp b/clients/drcachesim/tools/invariant_checker.cpp
index 253534e475a..cb2fe201272 100644
--- a/clients/drcachesim/tools/invariant_checker.cpp
+++ b/clients/drcachesim/tools/invariant_checker.cpp
@@ -60,13 +60,21 @@ invariant_checker_t::~invariant_checker_t()
 {
 }
 
+std::string
+invariant_checker_t::initialize_stream(memtrace_stream_t *serial_stream)
+{
+    serial_stream_ = serial_stream;
+    return "";
+}
+
 void
 invariant_checker_t::report_if_false(per_shard_t *shard, bool condition,
                                      const std::string &invariant_name)
 {
     if (!condition) {
         std::cerr << "Trace invariant failure in T" << shard->tid_ << " at ref # "
-                  << shard->ref_count_ << ": " << invariant_name << "\n";
+                  << shard->stream->get_record_ordinal() << ": " << invariant_name
+                  << "\n";
         abort();
     }
 }
@@ -78,15 +86,25 @@ invariant_checker_t::parallel_shard_supported()
 }
 
 void *
-invariant_checker_t::parallel_shard_init(int shard_index, void *worker_data)
+invariant_checker_t::parallel_shard_init_stream(int shard_index, void *worker_data,
+                                                memtrace_stream_t *shard_stream)
 {
     auto per_shard = std::unique_ptr<per_shard_t>(new per_shard_t);
+    per_shard->stream = shard_stream;
     void *res = reinterpret_cast<void *>(per_shard.get());
     std::lock_guard<std::mutex> guard(shard_map_mutex_);
     shard_map_[shard_index] = std::move(per_shard);
     return res;
 }
 
+// We have no stream interface in invariant_checker_test unit tests.
+// XXX: Could we refactor the test to use a reader that takes a vector?
+void *
+invariant_checker_t::parallel_shard_init(int shard_index, void *worker_data)
+{
+    return parallel_shard_init_stream(shard_index, worker_data, nullptr);
+}
+
 bool
 invariant_checker_t::parallel_shard_exit(void *shard_data)
 {
@@ -104,12 +122,29 @@ bool
 invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
 {
     per_shard_t *shard = reinterpret_cast<per_shard_t *>(shard_data);
-    ++shard->ref_count_;
     if (shard->tid_ == -1 && memref.data.tid != 0)
         shard->tid_ = memref.data.tid;
-    // XXX i#5538: Have the infrastructure provide a common instr and record count.
+    // We check the memtrace_stream_t counts with our own, unless there was an
+    // instr skip from the start where we cannot compare, or we're in a unit
+    // test with no stream interface, or we're in serial mode (since we want
+    // per-shard counts for error reporting; XXX: we could add our own global
+    // counts to compare to the serial stream).
+    ++shard->ref_count_;
     if (type_is_instr(memref.instr.type))
         ++shard->instr_count_;
+    // XXX: We also can't verify counts with a skip invoked from the middle, but
+    // we have no simple way to detect that here.
+    if (shard->instr_count_ <= 1 && !shard->skipped_instrs_ && shard->stream != nullptr &&
+        shard->stream->get_instruction_ordinal() > 1)
+        shard->skipped_instrs_ = true;
+    if (!shard->skipped_instrs_ && shard->stream != nullptr &&
+        (shard->stream != serial_stream_ || shard_map_.size() == 1)) {
+        report_if_false(shard, shard->ref_count_ == shard->stream->get_record_ordinal(),
+                        "Stream record ordinal inaccurate");
+        report_if_false(shard,
+                        shard->instr_count_ == shard->stream->get_instruction_ordinal(),
+                        "Stream instr ordinal inaccurate");
+    }
 #ifdef UNIX
     if (has_annotations_) {
         // Check conditions specific to the signal_invariants app, where it
@@ -181,6 +216,10 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem
     if (memref.marker.type == TRACE_TYPE_MARKER &&
         memref.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) {
         shard->file_type_ = static_cast<offline_file_type_t>(memref.marker.marker_value);
+        report_if_false(shard,
+                        shard->stream == nullptr ||
+                            shard->file_type_ == shard->stream->get_filetype(),
+                        "Stream interface filetype != trace marker");
     }
     if (memref.marker.type == TRACE_TYPE_MARKER &&
         memref.marker.marker_type == TRACE_MARKER_TYPE_INSTRUCTION_COUNT) {
@@ -193,10 +232,26 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem
     if (memref.marker.type == TRACE_TYPE_MARKER &&
         memref.marker.marker_type == TRACE_MARKER_TYPE_CACHE_LINE_SIZE) {
         shard->found_cache_line_size_marker_ = true;
+        report_if_false(shard,
+                        shard->stream == nullptr ||
+                            memref.marker.marker_value ==
+                                shard->stream->get_cache_line_size(),
+                        "Stream interface cache line size != trace marker");
     }
     if (memref.marker.type == TRACE_TYPE_MARKER &&
         memref.marker.marker_type == TRACE_MARKER_TYPE_PAGE_SIZE) {
         shard->found_page_size_marker_ = true;
+        report_if_false(shard,
+                        shard->stream == nullptr ||
+                            memref.marker.marker_value == shard->stream->get_page_size(),
+                        "Stream interface page size != trace marker");
+    }
+    if (memref.marker.type == TRACE_TYPE_MARKER &&
+        memref.marker.marker_type == TRACE_MARKER_TYPE_VERSION) {
+        report_if_false(shard,
+                        shard->stream == nullptr ||
+                            memref.marker.marker_value == shard->stream->get_version(),
+                        "Stream interface version != trace marker");
     }
 
     // Invariant: each chunk's instruction count must be identical and equal to
@@ -204,12 +259,18 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem
     if (memref.marker.type == TRACE_TYPE_MARKER &&
         memref.marker.marker_type == TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT) {
         shard->chunk_instr_count_ = memref.marker.marker_value;
+        report_if_false(shard,
+                        shard->stream == nullptr ||
+                            shard->chunk_instr_count_ ==
+                                shard->stream->get_chunk_instr_count(),
+                        "Stream interface chunk instr count != trace marker");
     }
     if (memref.marker.type == TRACE_TYPE_MARKER &&
         memref.marker.marker_type == TRACE_MARKER_TYPE_CHUNK_FOOTER) {
         report_if_false(shard,
-                        shard->chunk_instr_count_ != 0 &&
-                            shard->instr_count_ % shard->chunk_instr_count_ == 0,
+                        shard->skipped_instrs_ ||
+                            (shard->chunk_instr_count_ != 0 &&
+                             shard->instr_count_ % shard->chunk_instr_count_ == 0),
                         "Chunk instruction counts are inconsistent");
     }
 
@@ -242,9 +303,15 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem
                                  shard->file_type_) ||
                             shard->found_instr_count_marker_,
                         "Missing instr count markers");
-        report_if_false(shard, shard->found_cache_line_size_marker_,
+        report_if_false(shard,
+                        shard->found_cache_line_size_marker_ ||
+                            (shard->skipped_instrs_ && shard->stream != nullptr &&
+                             shard->stream->get_cache_line_size() > 0),
                         "Missing cache line marker");
-        report_if_false(shard, shard->found_page_size_marker_,
+        report_if_false(shard,
+                        shard->found_page_size_marker_ ||
+                            (shard->skipped_instrs_ && shard->stream != nullptr &&
+                             shard->stream->get_page_size() > 0),
                         "Missing page size marker");
         if (knob_test_name_ == "filter_asm_instr_count") {
             static constexpr int ASM_INSTR_COUNT = 133;
@@ -445,6 +512,7 @@ invariant_checker_t::process_memref(const memref_t &memref)
     if (lookup == shard_map_.end()) {
         auto per_shard_unique = std::unique_ptr<per_shard_t>(new per_shard_t);
         per_shard = per_shard_unique.get();
+        per_shard->stream = serial_stream_;
         shard_map_[memref.data.tid] = std::move(per_shard_unique);
     } else
         per_shard = lookup->second.get();
diff --git a/clients/drcachesim/tools/invariant_checker.h b/clients/drcachesim/tools/invariant_checker.h
index a3b0954d495..376d9357665 100644
--- a/clients/drcachesim/tools/invariant_checker.h
+++ b/clients/drcachesim/tools/invariant_checker.h
@@ -51,6 +51,8 @@ class invariant_checker_t : public analysis_tool_t {
                         std::istream *serial_schedule_file = nullptr,
                         std::istream *cpu_schedule_file = nullptr);
     virtual ~invariant_checker_t();
+    std::string
+    initialize_stream(memtrace_stream_t *serial_stream) override;
     bool
     process_memref(const memref_t &memref) override;
     bool
@@ -58,6 +60,9 @@ class invariant_checker_t : public analysis_tool_t {
     bool
     parallel_shard_supported() override;
     void *
+    parallel_shard_init_stream(int shard_index, void *worker_data,
+                               memtrace_stream_t *shard_stream) override;
+    void *
     parallel_shard_init(int shard_index, void *worker_data) override;
     bool
     parallel_shard_exit(void *shard_data) override;
@@ -77,6 +82,7 @@ class invariant_checker_t : public analysis_tool_t {
             prev_xfer_marker_.marker.marker_type = TRACE_MARKER_TYPE_VERSION;
             last_xfer_marker_.marker.marker_type = TRACE_MARKER_TYPE_VERSION;
         }
+        memtrace_stream_t *stream = nullptr;
         memref_t prev_entry_ = {};
         memref_t prev_instr_ = {};
         memref_t prev_xfer_marker_ = {}; // Cleared on seeing an instr.
@@ -112,6 +118,7 @@ class invariant_checker_t : public analysis_tool_t {
         uint64_t last_timestamp_ = 0;
         std::vector<schedule_entry_t> sched_;
         std::unordered_map<uint64_t, std::vector<schedule_entry_t>> cpu2sched_;
+        bool skipped_instrs_ = false;
     };
 
     // We provide this for subclasses to run these invariants with custom
@@ -135,6 +142,8 @@ class invariant_checker_t : public analysis_tool_t {
 
     std::istream *serial_schedule_file_ = nullptr;
     std::istream *cpu_schedule_file_ = nullptr;
+
+    memtrace_stream_t *serial_stream_ = nullptr;
 };
 
 #endif /* _INVARIANT_CHECKER_H_ */
diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt
index ae06eef3c2b..c4a52ba9308 100644
--- a/suite/tests/CMakeLists.txt
+++ b/suite/tests/CMakeLists.txt
@@ -3745,6 +3745,18 @@ if (BUILD_CLIENTS)
       unset(tool.drcacheoff.builtin-prefetch-basic-counts_rawtemp) # use preprocessor
     endif ()
 
+    if (X86 AND X64 AND ZLIB_FOUND)
+      # XXX i#5538: Add trace files for other arches.
+      set(zip_path
+        "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests/drmemtrace.allasm_x86_64.trace.zip")
+      torunonly_api(tool.drcacheoff.skip "${drcachesim_path}" "offline-skip" ""
+        # Test invariants with global headers after skipping instrs.
+        "-simulator_type;view;-infile;${zip_path};${test_mode_flag};-skip_instrs;63;-sim_refs;10"
+        OFF OFF)
+      set(tool.drcacheoff.skip_basedir "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests")
+      set(tool.drcacheoff.skip_rawtemp ON) # no preprocessor
+    endif ()
+
     # We run common.decode-bad to test markers for faults
     if (X86) # decode-bad is x86-only
       # Do not cap the trace size (done to shorten tests) b/c it will then miss