diff --git a/api/docs/release.dox b/api/docs/release.dox
index 87e9a33166b..3dd1c281755 100644
--- a/api/docs/release.dox
+++ b/api/docs/release.dox
@@ -183,6 +183,12 @@ Further non-compatibility-affecting changes include:
    They can be created with opnd_create_reg_element_vector(), detected with
    opnd_is_element_vector_reg() and have their element size retrieved by
    opnd_get_vector_element_size().
+ - Deprecated the drmemtrace analysis tool functions initialize() and
+   parallel_shard_init(), replacing them with initialize_stream() and
+   parallel_shard_init_stream().  The old versions will continue to work.
+
+**************************************************
+<hr>
 
 The changes between version 9.0.1 and 9.0.0 include the following compatibility
 changes:
@@ -208,6 +214,9 @@ Further non-compatibility-affecting changes include:
  - Added -tool_dir drrun/drconfig parameter to control where to look for tool
    config files.
 
+**************************************************
+<hr>
+
 The changes between version 9.0.0 and 8.0.0 include the following compatibility
 changes:
 
diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt
index 4b8b75845cc..3db2a9a9fbd 100644
--- a/clients/drcachesim/CMakeLists.txt
+++ b/clients/drcachesim/CMakeLists.txt
@@ -250,6 +250,7 @@ link_with_pthread(drmemtrace_analyzer)
 install_client_nonDR_header(drmemtrace common/utils.h)
 install_client_nonDR_header(drmemtrace common/trace_entry.h)
 install_client_nonDR_header(drmemtrace common/memref.h)
+install_client_nonDR_header(drmemtrace common/memtrace_stream.h)
 install_client_nonDR_header(drmemtrace reader/reader.h)
 install_client_nonDR_header(drmemtrace analysis_tool.h)
 install_client_nonDR_header(drmemtrace analyzer.h)
diff --git a/clients/drcachesim/analysis_tool.h b/clients/drcachesim/analysis_tool.h
index 4d881060eca..aa43710d5bb 100644
--- a/clients/drcachesim/analysis_tool.h
+++ b/clients/drcachesim/analysis_tool.h
@@ -44,6 +44,7 @@
 // To support installation of headers for analysis tools into a single
 // separate directory we omit common/ here and rely on -I.
 #include "memref.h"
+#include "memtrace_stream.h"
 #include <string>
 
 /**
@@ -93,17 +94,31 @@ class analysis_tool_t {
         : success_(true) {};
     virtual ~analysis_tool_t() {}; /**< Destructor. */
     /**
-     * Tools are encouraged to perform any initialization that might fail here rather
-     * than in the constructor.  On an error, this returns an error string.  On success,
-     * it returns "".
+     * \deprecated The initialize_stream() function is called by the analyzer; this
+     * function is only called if the default implementation of initialize_stream() is
+     * left in place and it calls this version.  On an error, this returns an error
+     * string.  On success, it returns "".
      */
     virtual std::string
     initialize()
     {
         return "";
     }
+    /**
+     * Tools are encouraged to perform any initialization that might fail here rather
+     * than in the constructor.  The \p serial_stream interface allows tools to query
+     * details of the underlying trace during serial operation; it is nullptr for
+     * parallel operation (a per-shard version is passed to parallel_shard_init_stream()).
+     * On an error, this returns an error string.  On success, it returns "".
+     */
+    virtual std::string
+    initialize_stream(memtrace_stream_t *serial_stream)
+    {
+        return initialize();
+    }
     /** Returns whether the tool was created successfully. */
-    virtual bool operator!()
+    virtual bool
+    operator!()
     {
         return !success_;
     }
@@ -136,7 +151,7 @@ class analysis_tool_t {
     /**
      * Returns whether this tool supports analyzing trace shards concurrently, or
      * whether it needs to see a single thread-interleaved stream of traced
-     * events.
+     * events.  This may be called prior to initialize().
      */
     virtual bool
     parallel_shard_supported()
@@ -168,20 +183,32 @@ class analysis_tool_t {
         return "";
     }
     /**
-     * Invoked once for each trace shard prior to calling parallel_shard_memref() for
-     * that shard, this allows a tool to create data local to a shard.  The \p
-     * shard_index is a unique identifier allowing shard data to be stored into a
-     * global table if desired (typically for aggregation use in print_results()).
-     * The \p worker_data is the return value of parallel_worker_init() for the
-     * worker thread who will exclusively operate on this shard.  The return value
-     * here will be passed to each invocation of parallel_shard_memref() for that
-     * same shard.
+     * \deprecated The parallel_shard_init_stream() is what is called by the analyzer;
+     * this function is only called if the default implementation of
+     * parallel_shard_init_stream() is left in place and it calls this version.
      */
     virtual void *
     parallel_shard_init(int shard_index, void *worker_data)
     {
         return nullptr;
     }
+    /**
+     * Invoked once for each trace shard prior to calling parallel_shard_memref() for
+     * that shard, this allows a tool to create data local to a shard.  The \p
+     * shard_index is a unique identifier allowing shard data to be stored into a global
+     * table if desired (typically for aggregation use in print_results()).  The \p
+     * worker_data is the return value of parallel_worker_init() for the worker thread
+     * who will exclusively operate on this shard.  The \p shard_stream allows tools to
+     * query details of the underlying trace shard during parallel operation; it is
+     * valid only until parallel_shard_exit() is called.  The return value here will be
+     * passed to each invocation of parallel_shard_memref() for that same shard.
+     */
+    virtual void *
+    parallel_shard_init_stream(int shard_index, void *worker_data,
+                               memtrace_stream_t *shard_stream)
+    {
+        return parallel_shard_init(shard_index, worker_data);
+    }
     /**
      * Invoked once when all trace entries for a shard have been processed.  \p
      * shard_data is the value returned by parallel_shard_init() for this shard.
diff --git a/clients/drcachesim/analyzer.cpp b/clients/drcachesim/analyzer.cpp
index 6ff4680db27..0a79ae7a9c4 100644
--- a/clients/drcachesim/analyzer.cpp
+++ b/clients/drcachesim/analyzer.cpp
@@ -45,6 +45,7 @@
 #    include "reader/snappy_file_reader.h"
 #endif
 #include "common/utils.h"
+#include "memtrace_stream.h"
 
 #ifdef HAS_ZLIB
 // Even if the file is uncompressed, zlib's gzip interface is faster than
@@ -182,6 +183,11 @@ analyzer_t::analyzer_t(const std::string &trace_path, analysis_tool_t **tools,
     , parallel_(true)
     , worker_count_(worker_count)
 {
+    if (!init_file_reader(trace_path)) {
+        success_ = false;
+        error_string_ = "Failed to create reader";
+        return;
+    }
     for (int i = 0; i < num_tools; ++i) {
         if (tools_[i] == NULL || !*tools_[i]) {
             success_ = false;
@@ -190,15 +196,13 @@ analyzer_t::analyzer_t(const std::string &trace_path, analysis_tool_t **tools,
                 error_string_ += ": " + tools_[i]->get_error_string();
             return;
         }
-        const std::string error = tools_[i]->initialize();
+        const std::string error = tools_[i]->initialize_stream(serial_trace_iter_.get());
         if (!error.empty()) {
             success_ = false;
             error_string_ = "Tool failed to initialize: " + error;
             return;
         }
     }
-    if (!init_file_reader(trace_path))
-        success_ = false;
 }
 
 analyzer_t::analyzer_t(const std::string &trace_path)
@@ -264,8 +268,10 @@ analyzer_t::process_tasks(std::vector<analyzer_shard_data_t *> *tasks)
             return;
         }
         std::vector<void *> shard_data(num_tools_);
-        for (int i = 0; i < num_tools_; ++i)
-            shard_data[i] = tools_[i]->parallel_shard_init(tdata->index, worker_data[i]);
+        for (int i = 0; i < num_tools_; ++i) {
+            shard_data[i] = tools_[i]->parallel_shard_init_stream(
+                tdata->index, worker_data[i], tdata->iter.get());
+        }
         VPRINT(this, 1, "shard_data[0] is %p\n", shard_data[0]);
         for (; *tdata->iter != *trace_end_; ++(*tdata->iter)) {
             for (int i = 0; i < num_tools_; ++i) {
diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp
index c7ae3692c72..2ffc82f5c56 100644
--- a/clients/drcachesim/analyzer_multi.cpp
+++ b/clients/drcachesim/analyzer_multi.cpp
@@ -151,6 +151,10 @@ analyzer_multi_t::analyzer_multi_t()
         if (!init_file_reader(op_infile.get_value(), op_verbose.get_value()))
             success_ = false;
     }
+    if (!init_analysis_tools()) {
+        success_ = false;
+        return;
+    }
     // We can't call serial_trace_iter_->init() here as it blocks for ipc_reader_t.
 }
 
@@ -170,14 +174,10 @@ analyzer_multi_t::create_analysis_tools()
     tools_[0] = drmemtrace_analysis_tool_create();
     if (tools_[0] == NULL)
         return false;
-    std::string tool_error;
     if (!*tools_[0]) {
-        tool_error = tools_[0]->get_error_string();
+        std::string tool_error = tools_[0]->get_error_string();
         if (tool_error.empty())
             tool_error = "no error message provided.";
-    } else
-        tool_error = tools_[0]->initialize();
-    if (!tool_error.empty()) {
         error_string_ = "Tool failed to initialize: " + tool_error;
         delete tools_[0];
         tools_[0] = NULL;
@@ -229,8 +229,6 @@ analyzer_multi_t::create_analysis_tools()
             serial_schedule_file_.get(), cpu_schedule_file_.get());
         if (tools_[1] == NULL)
             return false;
-        if (!!*tools_[1])
-            tools_[1]->initialize();
         if (!*tools_[1]) {
             error_string_ = tools_[1]->get_error_string();
             delete tools_[1];
@@ -242,6 +240,28 @@ analyzer_multi_t::create_analysis_tools()
     return true;
 }
 
+bool
+analyzer_multi_t::init_analysis_tools()
+{
+    std::string tool_error = tools_[0]->initialize_stream(serial_trace_iter_.get());
+    if (!tool_error.empty()) {
+        error_string_ = "Tool failed to initialize: " + tool_error;
+        delete tools_[0];
+        tools_[0] = NULL;
+        return false;
+    }
+    if (op_test_mode.get_value()) {
+        tools_[1]->initialize_stream(serial_trace_iter_.get());
+        if (!*tools_[1]) {
+            error_string_ = tools_[1]->get_error_string();
+            delete tools_[1];
+            tools_[1] = NULL;
+            return false;
+        }
+    }
+    return true;
+}
+
 void
 analyzer_multi_t::destroy_analysis_tools()
 {
diff --git a/clients/drcachesim/analyzer_multi.h b/clients/drcachesim/analyzer_multi.h
index 2d7e4b264ae..5f688765c9e 100644
--- a/clients/drcachesim/analyzer_multi.h
+++ b/clients/drcachesim/analyzer_multi.h
@@ -49,6 +49,8 @@ class analyzer_multi_t : public analyzer_t {
 protected:
     bool
     create_analysis_tools();
+    bool
+    init_analysis_tools();
     void
     destroy_analysis_tools();
 
diff --git a/clients/drcachesim/common/memtrace_stream.h b/clients/drcachesim/common/memtrace_stream.h
new file mode 100644
index 00000000000..e25bba70d40
--- /dev/null
+++ b/clients/drcachesim/common/memtrace_stream.h
@@ -0,0 +1,78 @@
+/* **********************************************************
+ * Copyright (c) 2022 Google, Inc.  All rights reserved.
+ * **********************************************************/
+
+/*
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Google, Inc. nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without
+ *   specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+/* memtrace_stream: an interface to access aspects of the full stream of memory
+ * trace records.
+ *
+ * We had considered other avenues for analysis_tool_t to obtain things like
+ * the record and instruction ordinals within the stream, in the presence of
+ * skipping: we could add fields to memref but we'd either have to append
+ * and have them at different offsets for each type or we'd have to break
+ * compatbility to prepend every time we added more; or we could add parameters
+ * to process_memref().  Passing an interface to the init routines seems
+ * the simplest and most flexible.
+ */
+
+#ifndef _MEMTRACE_STREAM_H_
+#define _MEMTRACE_STREAM_H_ 1
+
+/**
+ * @file drmemtrace/memtrace_stream.h
+ * @brief DrMemtrace interface for obtaining information from analysis
+ * tools on the full stream of memory reference records.
+ */
+
+/**
+ * This is an interface for obtaining information from analysis tools
+ * on the full stream of memory reference records.
+ */
+class memtrace_stream_t {
+public:
+    /** Destructor. */
+    virtual ~memtrace_stream_t()
+    {
+    }
+    /**
+     * Returns the count of #memref_t records from the start of the trace to this point.
+     * This includes records skipped over and not presented to any tool.
+     */
+    virtual uint64_t
+    get_record_ordinal() = 0;
+    /**
+     * Returns the count of instructions from the start of the trace to this point.
+     * This includes instructions skipped over and not presented to any tool.
+     */
+    virtual uint64_t
+    get_instruction_ordinal() = 0;
+};
+
+#endif /* _MEMTRACE_STREAM_H_ */
diff --git a/clients/drcachesim/docs/drcachesim.dox.in b/clients/drcachesim/docs/drcachesim.dox.in
index d98d14dbdee..c75af89da19 100644
--- a/clients/drcachesim/docs/drcachesim.dox.in
+++ b/clients/drcachesim/docs/drcachesim.dox.in
@@ -638,87 +638,63 @@ disassembling instructions in AT&T, Intel, Arm, or DR format (to see
 disassembly for online traces, pass the `-instr_encodings` option). The
 -skip_refs and -sim_refs flags can be used to
 set a start point and end point for the disassembled view. Note that these
-flags compute the number of instructions which are skipped or displayed which
-is distinct from the number of trace entries.
+flags compute the number of trace entry records which are skipped or displayed which
+is distinct from the number of instruction records.
 
 The tool displays loads and stores, as well as metadata marker entries for
 timestamps, on which core and thread the subsequent instruction sequence was
 executed, and kernel and system call transfers (these correspond to
 signal or event handler interruptions of the regular execution flow).
 
+In its first two columns, the tool displays the trace record ordinal
+and the instruction fetch ordinal.
+
 \code
-$ bin64/drrun -t drcachesim -simulator_type view -sim_refs 20 -indir drmemtrace.*.dir
-T80431 <marker: version 3>
-T80431 <marker: filetype 0x40>
-T80431 <marker: cache line size 64>
-T80431 <marker: timestamp 13269546858099127>
-T80431 <marker: tid 80431 on core 1>
-T80431   0x00007f2ae335d090  48 89 e7             mov    %rsp, %rdi
-T80431   0x00007f2ae335d093  e8 48 0d 00 00       call   $0x00007f2ae335dde0
-T80431     write 8 byte(s) @ 0x7ffdf5770ac8
-T80431   0x00007f2ae335dde0  55                   push   %rbp
-T80431     write 8 byte(s) @ 0x7ffdf5770ac0
-T80431   0x00007f2ae335dde1  48 89 e5             mov    %rsp, %rbp
-T80431   0x00007f2ae335dde4  41 57                push   %r15
-T80431     write 8 byte(s) @ 0x7ffdf5770ab8
-T80431   0x00007f2ae335dde6  49 89 ff             mov    %rdi, %r15
-T80431   0x00007f2ae335dde9  41 56                push   %r14
-T80431     write 8 byte(s) @ 0x7ffdf5770ab0
-T80431   0x00007f2ae335ddeb  41 55                push   %r13
-T80431     write 8 byte(s) @ 0x7ffdf5770aa8
-T80431   0x00007f2ae335dded  41 54                push   %r12
-T80431     write 8 byte(s) @ 0x7ffdf5770aa0
-T80431   0x00007f2ae335ddef  53                   push   %rbx
-T80431     write 8 byte(s) @ 0x7ffdf5770a98
-T80431   0x00007f2ae335ddf0  48 83 ec 38          sub    $0x38, %rsp
-T80431   0x00007f2ae335ddf4  0f 31                rdtsc
-T80431   0x00007f2ae335ddf6  48 c1 e2 20          shl    $0x20, %rdx
-T80431   0x00007f2ae335ddfa  48 09 d0             or     %rdx, %rax
-T80431   0x00007f2ae335ddfd  48 8d 15 74 90 02 00 lea    <rel> 0x00007f2ae3386e78, %rdx
-T80431   0x00007f2ae335de04  48 89 05 75 87 02 00 mov    %rax, <rel> 0x00007f2ae3386580
-T80431     write 8 byte(s) @ 0x7f2ae3386580
-T80431   0x00007f2ae335de0b  48 8b 05 66 90 02 00 mov    <rel> 0x00007f2ae3386e78, %rax
-T80431     read  8 byte(s) @ 0x7f2ae3386e78
-T80431   0x00007f2ae335de12  49 89 d4             mov    %rdx, %r12
-T80431   0x00007f2ae335de15  4c 2b 25 e4 91 02 00 sub    <rel> 0x00007f2ae3387000, %r12
-T80431     read  8 byte(s) @ 0x7f2ae3387000
-T80431   0x00007f2ae335de1c  48 89 15 d5 9b 02 00 mov    %rdx, <rel> 0x00007f2ae33879f8
+$ $ bin64/drrun -t drcachesim -simulator_type view -indir drmemtrace.*.dir -sim_refs 20
+Output format:
+<record#> <instr#>: T<tid> <record details>
+------------------------------------------------------------
+        1        0: T3256418 <marker: version 4>
+        2        0: T3256418 <marker: filetype 0x240>
+        3        0: T3256418 <marker: cache line size 64>
+        4        0: T3256418 <marker: chunk instruction count 1024>
+        5        0: T3256418 <marker: page size 4096>
+        6        0: T3256418 <marker: timestamp 13312410768080478>
+        7        0: T3256418 <marker: tid 3256418 on core 7>
+        8        1: T3256418 ifetch       3 byte(s) @ 0x00007fc205a61940 48 89 e7             mov    %rsp, %rdi
+        9        2: T3256418 ifetch       5 byte(s) @ 0x00007fc205a61943 e8 b8 0c 00 00       call   $0x00007fc205a62600
+       10        2: T3256418 write        8 byte(s) @ 0x00007fff9a9e3528 by PC 0x00007fc205a61943
+       11        3: T3256418 ifetch       1 byte(s) @ 0x00007fc205a62600 55                   push   %rbp
+       12        3: T3256418 write        8 byte(s) @ 0x00007fff9a9e3520 by PC 0x00007fc205a62600
+       13        4: T3256418 ifetch       3 byte(s) @ 0x00007fc205a62601 48 89 e5             mov    %rsp, %rbp
+       14        5: T3256418 ifetch       2 byte(s) @ 0x00007fc205a62604 41 57                push   %r15
+       15        5: T3256418 write        8 byte(s) @ 0x00007fff9a9e3518 by PC 0x00007fc205a62604
+       16        6: T3256418 ifetch       2 byte(s) @ 0x00007fc205a62606 41 56                push   %r14
+       17        6: T3256418 write        8 byte(s) @ 0x00007fff9a9e3510 by PC 0x00007fc205a62606
+       18        7: T3256418 ifetch       2 byte(s) @ 0x00007fc205a62608 41 55                push   %r13
+       19        7: T3256418 write        8 byte(s) @ 0x00007fff9a9e3508 by PC 0x00007fc205a62608
+       20        8: T3256418 ifetch       2 byte(s) @ 0x00007fc205a6260a 41 54                push   %r12
 View tool results:
-             20 : total instructions
+              8 : total instructions
 \endcode
 
 An example of thread switches:
 
 \code
 ------------------------------------------------------------
-T342625 <marker: timestamp 13260900247983768>
-T342625 <marker: tid 342625 on core 3>
-T342625   0x0000000000402460  31 ed                xor    %ebp, %ebp
-T342625   0x0000000000402462  49 89 d1             mov    %rdx, %r9
-T342625   0x0000000000402465  5e                   pop    %rsi
-T342625     read  8 byte(s) @ 0x7ffe70dce480
-T342625   0x0000000000402466  48 89 e2             mov    %rsp, %rdx
+       46        0: T3264758 <marker: timestamp 13312413437398055>
+       47        0: T3264758 <marker: tid 3264758 on core 2>
+       48        1: T3264758 ifetch       3 byte(s) @ 0x00007f4ea89e4940 48 89 e7             mov    %rsp, %rdi
+       49        2: T3264758 ifetch       5 byte(s) @ 0x00007f4ea89e4943 e8 b8 0c 00 00       call   $0x00007f4ea89e5600
+       50        2: T3264758 write        8 byte(s) @ 0x00007ffd93a0cf18 by PC 0x00007f4ea89e4943
 ...
-T342625   0x0000000000467c42  4d 89 c8             mov    %r9, %r8
-T342625   0x0000000000467c45  4c 8b 54 24 08       mov    0x08(%rsp), %r10
-T342625     read  8 byte(s) @ 0x7ffe70dce100
-T342625   0x0000000000467c4a  b8 38 00 00 00       mov    $0x00000038, %eax
-T342625   0x0000000000467c4f  0f 05                syscall
+  2854543  2149665: T3264758 ifetch       5 byte(s) @ 0x00007f4ea7c87f8c b8 0e 00 00 00       mov    $0x0000000e, %eax
+  2854544  2149666: T3264758 ifetch       2 byte(s) @ 0x00007f4ea7c87f91 0f 05                syscall
 ------------------------------------------------------------
-T342626 <marker: timestamp 13260900248221723>
-T342626 <marker: tid 342626 on core 0>
-T342626   0x0000000000467c51  48 85 c0             test   %rax, %rax
-T342626   0x0000000000467c54  7c 13                jl     $0x0000000000467c69
-T342626   0x0000000000467c56  74 01                jz     $0x0000000000467c59
-T342626   0x0000000000467c59  31 ed                xor    %ebp, %ebp
-T342626   0x0000000000467c5b  58                   pop    %rax
-T342626     read  8 byte(s) @ 0x7f899f928e70
-T342626   0x0000000000467c5c  5f                   pop    %rdi
-T342626     read  8 byte(s) @ 0x7f899f928e78
-T342626   0x0000000000467c5d  ff d0                call   %rax
-T342626     write 8 byte(s) @ 0x7f899f928e78
-T342626   0x0000000000404a30  41 54                push   %r12
-T342626     write 8 byte(s) @ 0x7f899f928e70
+  2854545  2149666: T3264760 <marker: timestamp 13312413438835999>
+  2854546  2149666: T3264760 <marker: tid 3264760 on core 11>
+  2854547  2149667: T3264760 ifetch       3 byte(s) @ 0x00007f4ea7d0b099 48 85 c0             test   %rax, %rax
+  2854548  2149668: T3264760 ifetch       2 byte(s) @ 0x00007f4ea7d0b09c 7c 18                jl     $0x00007f4ea7d0b0b6
 ...
 \endcode
 
@@ -728,43 +704,40 @@ with metadata showing that the signal was delivered just after a
 non-taken conditional branch:
 
 \code
-T585061   0x00007fdb4e95128f  41 f6 44 24 08 08    test   0x08(%r12), $0x08
-T585061     read  1 byte(s) @ 0x7ffd5af76b08
-T585061   0x00007fdb4e951295  0f 85 28 04 00 00    jnz    $0x00007fdb4e9516c3
-T585061 <marker: kernel xfer from 0x7fdb4e95129b to handler>
-T585061 <marker: timestamp 13269730052517230>
-T585061 <marker: tid 585061 on core 3>
-T585061   0x00007fdb4ace9dba  55                   push   %rbp
-T585061     write 8 byte(s) @ 0x7ffd5af763d0
-T585061   0x00007fdb4ace9dbb  48 89 e5             mov    %rsp, %rbp
-T585061   0x00007fdb4ace9dbe  89 7d fc             mov    %edi, -0x04(%rbp)
-T585061     write 4 byte(s) @ 0x7ffd5af763cc
-T585061   0x00007fdb4ace9dc1  48 89 75 f0          mov    %rsi, -0x10(%rbp)
-T585061     write 8 byte(s) @ 0x7ffd5af763c0
-T585061   0x00007fdb4ace9dc5  48 89 55 e8          mov    %rdx, -0x18(%rbp)
-T585061     write 8 byte(s) @ 0x7ffd5af763b8
-T585061   0x00007fdb4ace9dc9  83 7d fc 1a          cmp    -0x04(%rbp), $0x1a
-T585061     read  4 byte(s) @ 0x7ffd5af763cc
-T585061   0x00007fdb4ace9dcd  75 0f                jnz    $0x00007fdb4ace9dde
-T585061   0x00007fdb4ace9dcf  8b 05 7f 23 20 00    mov    <rel> 0x00007fdb4aeec154, %eax
-T585061     read  4 byte(s) @ 0x7fdb4aeec154
-T585061   0x00007fdb4ace9dd5  83 c0 01             add    $0x01, %eax
-T585061   0x00007fdb4ace9dd8  89 05 76 23 20 00    mov    %eax, <rel> 0x00007fdb4aeec154
-T585061     write 4 byte(s) @ 0x7fdb4aeec154
-T585061   0x00007fdb4ace9dde  90                   nop
-T585061   0x00007fdb4ace9ddf  5d                   pop    %rbp
-T585061     read  8 byte(s) @ 0x7ffd5af763d0
-T585061   0x00007fdb4ace9de0  c3                   ret
-T585061     read  8 byte(s) @ 0x7ffd5af763d8
-T585061   0x00007fdb4e95c140  48 c7 c0 0f 00 00 00 mov    $0x0000000f, %rax
-T585061   0x00007fdb4e95c147  0f 05                syscall
-T585061 <marker: timestamp 13269730052517239>
-T585061 <marker: tid 585061 on core 3>
-T585061 <marker: syscall xfer from 0x7fdb4e95c149>
-T585061 <marker: timestamp 13269730052520271>
-T585061 <marker: tid 585061 on core 3>
-T585061   0x00007fdb4e95129b  48 8b 1d 8e 40 01 00 mov    <rel> 0x00007fdb4e965330, %rbx
-T585061     read  8 byte(s) @ 0x7fdb4e965330
+  2851502  2147588: T3264758 ifetch       2 byte(s) @ 0x00007f4ea7c87a54 eb 14                jmp    $0x00007f4ea7c87a6a
+  2851503  2147588: T3264758 <marker: kernel xfer from 0x7f4ea7c87a6a to handler>
+  2851504  2147588: T3264758 <marker: timestamp 13312413438786440>
+  2851505  2147588: T3264758 <marker: tid 3264758 on core 8>
+  2851506  2147589: T3264758 ifetch       1 byte(s) @ 0x00007f4ea47fbdba 55                   push   %rbp
+  2851507  2147589: T3264758 write        8 byte(s) @ 0x00007ffd93a0be30 by PC 0x00007f4ea47fbdba
+  2851508  2147590: T3264758 ifetch       3 byte(s) @ 0x00007f4ea47fbdbb 48 89 e5             mov    %rsp, %rbp
+  2851509  2147591: T3264758 ifetch       3 byte(s) @ 0x00007f4ea47fbdbe 89 7d fc             mov    %edi, -0x04(%rbp)
+  2851510  2147591: T3264758 write        4 byte(s) @ 0x00007ffd93a0be2c by PC 0x00007f4ea47fbdbe
+  2851511  2147592: T3264758 ifetch       4 byte(s) @ 0x00007f4ea47fbdc1 48 89 75 f0          mov    %rsi, -0x10(%rbp)
+  2851512  2147592: T3264758 write        8 byte(s) @ 0x00007ffd93a0be20 by PC 0x00007f4ea47fbdc1
+  2851513  2147593: T3264758 ifetch       4 byte(s) @ 0x00007f4ea47fbdc5 48 89 55 e8          mov    %rdx, -0x18(%rbp)
+  2851514  2147593: T3264758 write        8 byte(s) @ 0x00007ffd93a0be18 by PC 0x00007f4ea47fbdc5
+  2851515  2147594: T3264758 ifetch       4 byte(s) @ 0x00007f4ea47fbdc9 83 7d fc 1a          cmp    -0x04(%rbp), $0x1a
+  2851516  2147594: T3264758 read         4 byte(s) @ 0x00007ffd93a0be2c by PC 0x00007f4ea47fbdc9
+  2851517  2147595: T3264758 ifetch       2 byte(s) @ 0x00007f4ea47fbdcd 75 0f                jnz    $0x00007f4ea47fbdde
+  2851518  2147596: T3264758 ifetch       6 byte(s) @ 0x00007f4ea47fbdcf 8b 05 7f 23 20 00    mov    <rel> 0x00007f4ea49fe154, %eax
+  2851519  2147596: T3264758 read         4 byte(s) @ 0x00007f4ea49fe154 by PC 0x00007f4ea47fbdcf
+  2851520  2147597: T3264758 ifetch       3 byte(s) @ 0x00007f4ea47fbdd5 83 c0 01             add    $0x01, %eax
+  2851521  2147598: T3264758 ifetch       6 byte(s) @ 0x00007f4ea47fbdd8 89 05 76 23 20 00    mov    %eax, <rel> 0x00007f4ea49fe154
+  2851522  2147598: T3264758 write        4 byte(s) @ 0x00007f4ea49fe154 by PC 0x00007f4ea47fbdd8
+  2851523  2147599: T3264758 ifetch       1 byte(s) @ 0x00007f4ea47fbdde 90                   nop
+  2851524  2147600: T3264758 ifetch       1 byte(s) @ 0x00007f4ea47fbddf 5d                   pop    %rbp
+  2851525  2147600: T3264758 read         8 byte(s) @ 0x00007ffd93a0be30 by PC 0x00007f4ea47fbddf
+  2851526  2147601: T3264758 ifetch       1 byte(s) @ 0x00007f4ea47fbde0 c3                   ret
+  2851527  2147601: T3264758 read         8 byte(s) @ 0x00007ffd93a0be38 by PC 0x00007f4ea47fbde0
+  2851528  2147602: T3264758 ifetch       7 byte(s) @ 0x00007f4ea7c3daa0 48 c7 c0 0f 00 00 00 mov    $0x0000000f, %rax
+  2851529  2147603: T3264758 ifetch       2 byte(s) @ 0x00007f4ea7c3daa7 0f 05                syscall
+  2851530  2147603: T3264758 <marker: timestamp 13312413438787645>
+  2851531  2147603: T3264758 <marker: tid 3264758 on core 8>
+  2851532  2147603: T3264758 <marker: syscall xfer from 0x7f4ea7c3daa9>
+  2851533  2147603: T3264758 <marker: timestamp 13312413438787652>
+  2851534  2147603: T3264758 <marker: tid 3264758 on core 8>
+  2851535  2147604: T3264758 ifetch       5 byte(s) @ 0x00007f4ea7c87a6a e8 11 8b 07 00       call   $0x00007f4ea7d00580
 \endcode
 
 \section sec_tool_func_view View Function Calls
@@ -1417,7 +1390,7 @@ application thread, but the tool interface can support other divisions.  For too
 that support concurrent processing of shards and do not need to see a single
 time-sorted interleaved merged trace, the interface functions with the parallel_
 prefix should be overridden, and parallel_shard_supported() should return true.
-parallel_shard_init() will be invoked for each shard prior to invoking
+parallel_shard_init_stream() will be invoked for each shard prior to invoking
 parallel_shard_memref() for each entry in that shard; the data structure returned
 from parallel_shard_init() will be passed to parallel_shard_memref() for each
 trace entry for that shard.  The concurrency model used guarantees that all
@@ -1458,6 +1431,15 @@ on each thread transition.  Other built-in markers indicate
 disruptions in user mode control flow such as signal handler entry and
 exit.
 
+The absolute ordinals for trace records and instruction fetches are
+available via the #memtrace_stream_t interface passed to the
+initialize_stream() function for serial operation and
+parallel_shard_init_stream() for parallel operation.  If the iterator
+skips over some records that are not passed to the tools, these
+ordinals will include those skipped records.  If a tool wishes to
+count only those records or instructions that it sees, it can add its
+own counters.
+
 CMake support is provided for including the headers and linking the
 libraries of the \p drmemtrace framework.  A new CMake function is defined
 in the DynamoRIO package which sets the include directory for using the \p
diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp
index b9217d0e275..26f9852fa70 100644
--- a/clients/drcachesim/reader/reader.cpp
+++ b/clients/drcachesim/reader/reader.cpp
@@ -287,8 +287,10 @@ reader_t::operator++()
             at_eof_ = true; // bail
             break;
         }
-        if (have_memref)
+        if (have_memref) {
+            ++cur_ref_count_;
             break;
+        }
     }
 
     return *this;
diff --git a/clients/drcachesim/reader/reader.h b/clients/drcachesim/reader/reader.h
index 4696ad9ba27..490f7203f58 100644
--- a/clients/drcachesim/reader/reader.h
+++ b/clients/drcachesim/reader/reader.h
@@ -42,6 +42,7 @@
 #include <unordered_map>
 // For exporting we avoid "../common" and rely on -I.
 #include "memref.h"
+#include "memtrace_stream.h"
 #include "utils.h"
 
 #define OUT /* just a marker */
@@ -58,7 +59,8 @@
 #    define VPRINT(reader, level, ...) /* nothing */
 #endif
 
-class reader_t : public std::iterator<std::input_iterator_tag, memref_t> {
+class reader_t : public std::iterator<std::input_iterator_tag, memref_t>,
+                 public memtrace_stream_t {
 public:
     reader_t()
     {
@@ -113,6 +115,17 @@ class reader_t : public std::iterator<std::input_iterator_tag, memref_t> {
     // 2) It is difficult to implement for file_reader_t as streams do not
     //    have a copy constructor.
 
+    uint64_t
+    get_record_ordinal() override
+    {
+        return cur_ref_count_;
+    }
+    uint64_t
+    get_instruction_ordinal() override
+    {
+        return cur_instr_count_;
+    }
+
 protected:
     // This reads the next entry from the stream of entries from all threads interleaved
     // in timestamp order.
@@ -150,6 +163,7 @@ class reader_t : public std::iterator<std::input_iterator_tag, memref_t> {
     addr_t prev_instr_addr_ = 0;
     int bundle_idx_ = 0;
     std::unordered_map<memref_tid_t, memref_pid_t> tid2pid_;
+    uint64_t cur_ref_count_ = 0;
     uint64_t cur_instr_count_ = 0;
     uint64_t chunk_instr_count_ = 0; // Unchanging once set to non-zero.
     uint64_t last_timestamp_instr_count_ = 0;
diff --git a/clients/drcachesim/tests/offline-phys.templatex b/clients/drcachesim/tests/offline-phys.templatex
index 0450a7d94d2..f6257fe695b 100644
--- a/clients/drcachesim/tests/offline-phys.templatex
+++ b/clients/drcachesim/tests/offline-phys.templatex
@@ -9,21 +9,21 @@ Adios world!
 Adios world!
 Adios world!
 Output format:
-<record#>: T<tid> <record details>
+<record#> <instr#>: T<tid> <record details>
 ------------------------------------------------------------
-        1: T[0-9]+ <marker: version 4>
-        2: T[0-9]+ <marker: filetype 0x242>
-        3: T[0-9]+ <marker: cache line size [0-9]+>
-        4: T[0-9]+ <marker: chunk instruction count [0-9]+>
-        5: T[0-9]+ <marker: page size [0-9]+>
-        6: T[0-9]+ <marker: timestamp [0-9]+>
-        7: T[0-9]+ <marker: tid [0-9]+ on core [0-9]+>
-        8: T[0-9]+ <marker: physical address for following virtual: 0x[0-9a-f][0-9a-f]+>
-        9: T[0-9]+ <marker: virtual address for prior physical: 0x[0-9a-f][0-9a-f]+>
-       10: T[0-9]+ <marker: physical address for following virtual: 0x[0-9a-f][0-9a-f]+>
-       11: T[0-9]+ <marker: virtual address for prior physical: 0x[0-9a-f][0-9a-f]+>
-       12: T[0-9]+ <marker: physical address for following virtual: 0x[0-9a-f][0-9a-f]+>
-       13: T[0-9]+ <marker: virtual address for prior physical: 0x[0-9a-f][0-9a-f]+>
-       14: T[0-9]+ <marker: timestamp [0-9]+>
-       15: T[0-9]+ <marker: tid [0-9]+ on core [0-9]+>
-       16: T[0-9]+ ifetch .*
+        1        0: T[0-9]+ <marker: version 4>
+        2        0: T[0-9]+ <marker: filetype 0x242>
+        3        0: T[0-9]+ <marker: cache line size [0-9]+>
+        4        0: T[0-9]+ <marker: chunk instruction count [0-9]+>
+        5        0: T[0-9]+ <marker: page size [0-9]+>
+        6        0: T[0-9]+ <marker: timestamp [0-9]+>
+        7        0: T[0-9]+ <marker: tid [0-9]+ on core [0-9]+>
+        8        0: T[0-9]+ <marker: physical address for following virtual: 0x[0-9a-f][0-9a-f]+>
+        9        0: T[0-9]+ <marker: virtual address for prior physical: 0x[0-9a-f][0-9a-f]+>
+       10        0: T[0-9]+ <marker: physical address for following virtual: 0x[0-9a-f][0-9a-f]+>
+       11        0: T[0-9]+ <marker: virtual address for prior physical: 0x[0-9a-f][0-9a-f]+>
+       12        0: T[0-9]+ <marker: physical address for following virtual: 0x[0-9a-f][0-9a-f]+>
+       13        0: T[0-9]+ <marker: virtual address for prior physical: 0x[0-9a-f][0-9a-f]+>
+       14        0: T[0-9]+ <marker: timestamp [0-9]+>
+       15        0: T[0-9]+ <marker: tid [0-9]+ on core [0-9]+>
+       16        1: T[0-9]+ ifetch .*
diff --git a/clients/drcachesim/tests/offline-view.templatex b/clients/drcachesim/tests/offline-view.templatex
index cbccfde801c..c5b4e2df4d5 100644
--- a/clients/drcachesim/tests/offline-view.templatex
+++ b/clients/drcachesim/tests/offline-view.templatex
@@ -1,7 +1,15 @@
 Hello, world!
-.*
- *[0-9]*: T[0-9]* <marker: timestamp.*
- *[0-9]*: T[0-9]* <marker: tid [0-9]* on core [0-9]*>
+Output format:
+<record#> <instr#>: T<tid> <record details>
+------------------------------------------------------------
+        1        0: T[0-9]* <marker: version [0-9]>
+        2        0: T[0-9]* <marker: filetype 0x[0-9a-f]*>
+        3        0: T[0-9]* <marker: cache line size [0-9]*>
+        4        0: T[0-9]* <marker: chunk instruction count [0-9]*>
+        5        0: T[0-9]* <marker: page size [0-9]*>
+        6        0: T[0-9]* <marker: timestamp [0-9]*>
+        7        0: T[0-9]* <marker: tid [0-9]* on core [0-9]*>
+        8        1: T[0-9]* ifetch      .*
 .*
 View tool results:
     *[0-9]* : total instructions
diff --git a/clients/drcachesim/tests/view_test.cpp b/clients/drcachesim/tests/view_test.cpp
index bf2cceddc5b..aace35c58d9 100644
--- a/clients/drcachesim/tests/view_test.cpp
+++ b/clients/drcachesim/tests/view_test.cpp
@@ -120,19 +120,55 @@ class view_nomod_test_t : public view_t {
 std::string
 run_test_helper(view_t &view, const std::vector<memref_t> &memrefs)
 {
-    view.initialize();
-    // Capture cerr.
-    std::stringstream capture;
-    std::streambuf *prior = std::cerr.rdbuf(capture.rdbuf());
-    // Run the tool.
-    for (const auto &memref : memrefs) {
-        if (!view.process_memref(memref))
-            std::cout << "Hit error: " << view.get_error_string() << "\n";
-    }
-    // Return the result.
-    std::string res = capture.str();
-    std::cerr.rdbuf(prior);
-    return res;
+    class local_stream_t : public memtrace_stream_t {
+    public:
+        local_stream_t(view_t &view, const std::vector<memref_t> &memrefs)
+            : view_(view)
+            , memrefs_(memrefs)
+        {
+        }
+
+        std::string
+        run()
+        {
+            view_.initialize_stream(this);
+            // Capture cerr.
+            std::stringstream capture;
+            std::streambuf *prior = std::cerr.rdbuf(capture.rdbuf());
+            // Run the tool.
+            for (const auto &memref : memrefs_) {
+                ++ref_count_;
+                if (type_is_instr(memref.instr.type))
+                    ++instr_count_;
+                if (!view_.process_memref(memref))
+                    std::cout << "Hit error: " << view_.get_error_string() << "\n";
+            }
+            // Return the result.
+            std::string res = capture.str();
+            std::cerr.rdbuf(prior);
+            return res;
+        }
+
+        uint64_t
+        get_record_ordinal() override
+        {
+            return ref_count_;
+        }
+        uint64_t
+        get_instruction_ordinal() override
+        {
+            return instr_count_;
+        }
+
+    private:
+        view_t &view_;
+        const std::vector<memref_t> &memrefs_;
+        uint64_t ref_count_ = 0;
+        uint64_t instr_count_ = 0;
+    };
+
+    local_stream_t stream(view, memrefs);
+    return stream.run();
 }
 
 bool
@@ -217,7 +253,8 @@ test_skip_memrefs(void *drcontext, instrlist_t &ilist,
     ss >> prefix;
     if (prefix != 1 + skip_memrefs) {
         std::cerr << "Expect to start after skip count " << skip_memrefs << " but found "
-                  << prefix << "\n";
+                  << prefix << "\n"
+                  << res << "\n";
         return false;
     }
     return true;
diff --git a/clients/drcachesim/tools/view.cpp b/clients/drcachesim/tools/view.cpp
index b133445d4ce..88cab6657ea 100644
--- a/clients/drcachesim/tools/view.cpp
+++ b/clients/drcachesim/tools/view.cpp
@@ -70,15 +70,15 @@ view_t::view_t(const std::string &module_file_path, memref_tid_t thread,
     , num_disasm_instrs_(0)
     , prev_tid_(-1)
     , filetype_(-1)
-    , num_refs_(0)
     , timestamp_(0)
     , has_modules_(true)
 {
 }
 
 std::string
-view_t::initialize()
+view_t::initialize_stream(memtrace_stream_t *serial_stream)
 {
+    serial_stream_ = serial_stream;
     print_header();
     dcontext_.dcontext = dr_standalone_init();
     if (module_file_path_.empty()) {
@@ -124,9 +124,10 @@ view_t::parallel_shard_supported()
 }
 
 void *
-view_t::parallel_shard_init(int shard_index, void *worker_data)
+view_t::parallel_shard_init_stream(int shard_index, void *worker_data,
+                                   memtrace_stream_t *shard_stream)
 {
-    return nullptr;
+    return shard_stream;
 }
 
 bool
@@ -144,15 +145,8 @@ view_t::parallel_shard_error(void *shard_data)
 }
 
 bool
-view_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
-{
-    return process_memref(memref);
-}
-
-bool
-view_t::should_skip(const memref_t &memref)
+view_t::should_skip(memtrace_stream_t *memstream, const memref_t &memref)
 {
-    num_refs_++;
     if (skip_refs_left_ > 0) {
         skip_refs_left_--;
         // I considered printing the version and filetype even when skipped but
@@ -165,7 +159,14 @@ view_t::should_skip(const memref_t &memref)
             return true;
         sim_refs_left_--;
         if (sim_refs_left_ == 0 && timestamp_ > 0) {
-            print_prefix(memref, -1); // Already incremented for timestamp.
+            // Print this timestamp right before the final record.
+            int adjust = -1; // Already incremented for timestamp.
+            if (memref.marker.type == TRACE_TYPE_MARKER &&
+                memref.marker.marker_type == TRACE_MARKER_TYPE_TIMESTAMP) {
+                // This is the final record so no adjustment needed.
+                adjust = 0;
+            }
+            print_prefix(memstream, memref, adjust);
             std::cerr << "<marker: timestamp " << timestamp_ << ">\n";
             timestamp_ = 0;
         }
@@ -176,6 +177,13 @@ view_t::should_skip(const memref_t &memref)
 bool
 view_t::process_memref(const memref_t &memref)
 {
+    return parallel_shard_memref(serial_stream_, memref);
+}
+
+bool
+view_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
+{
+    memtrace_stream_t *memstream = reinterpret_cast<memtrace_stream_t *>(shard_data);
     if (knob_thread_ > 0 && memref.data.tid > 0 && memref.data.tid != knob_thread_)
         return true;
     // Even for -skip_refs we need to process the up-front version and type.
@@ -213,7 +221,7 @@ view_t::process_memref(const memref_t &memref)
             // We can't easily reorder and place window markers before timestamps
             // since memref iterators use the timestamps to order buffer units.
             timestamp_ = memref.marker.marker_value;
-            if (should_skip(memref))
+            if (should_skip(memstream, memref))
                 timestamp_ = 0;
             return true;
         default: break;
@@ -227,21 +235,21 @@ view_t::process_memref(const memref_t &memref)
         printed_header_.find(memref.marker.tid) == printed_header_.end()) {
         printed_header_.insert(memref.marker.tid);
         if (trace_version_ != -1) { // Old versions may not have a version marker.
-            if (!should_skip(memref)) {
-                print_prefix(memref);
+            if (!should_skip(memstream, memref)) {
+                print_prefix(memstream, memref, -2);
                 std::cerr << "<marker: version " << trace_version_ << ">\n";
             }
         }
         if (filetype_ != -1) { // Handle old/malformed versions.
-            if (!should_skip(memref)) {
-                print_prefix(memref);
+            if (!should_skip(memstream, memref)) {
+                print_prefix(memstream, memref, -1);
                 std::cerr << "<marker: filetype 0x" << std::hex << filetype_ << std::dec
                           << ">\n";
             }
         }
     }
 
-    if (should_skip(memref))
+    if (should_skip(memstream, memref))
         return true;
 
     if (memref.marker.type == TRACE_TYPE_MARKER) {
@@ -250,25 +258,27 @@ view_t::process_memref(const memref_t &memref)
             if (last_window_[memref.marker.tid] != memref.marker.marker_value) {
                 std::cerr
                     << "------------------------------------------------------------\n";
-                print_prefix(memref, -1); // Already incremented for timestamp above.
+                print_prefix(memstream, memref,
+                             -1); // Already incremented for timestamp above.
             }
             if (timestamp_ > 0) {
                 std::cerr << "<marker: timestamp " << timestamp_ << ">\n";
                 timestamp_ = 0;
-                print_prefix(memref);
+                print_prefix(memstream, memref);
             }
             std::cerr << "<marker: window " << memref.marker.marker_value << ">\n";
             last_window_[memref.marker.tid] = memref.marker.marker_value;
         }
         if (timestamp_ > 0) {
-            print_prefix(memref, -1); // Already incremented for timestamp above.
+            print_prefix(memstream, memref,
+                         -1); // Already incremented for timestamp above.
             std::cerr << "<marker: timestamp " << timestamp_ << ">\n";
             timestamp_ = 0;
         }
     }
 
     if (memref.instr.tid != 0) {
-        print_prefix(memref);
+        print_prefix(memstream, memref);
     }
 
     if (memref.marker.type == TRACE_TYPE_MARKER) {
@@ -485,7 +495,7 @@ view_t::process_memref(const memref_t &memref)
     auto newline = disasm.find('\n');
     if (newline != std::string::npos && newline < disasm.size() - 1) {
         std::stringstream prefix;
-        print_prefix(memref, 0, prefix);
+        print_prefix(memstream, memref, 0, prefix);
         std::string skip_name(name_width, ' ');
         disasm.insert(newline + 1,
                       prefix.str() + skip_name + "                               ");
diff --git a/clients/drcachesim/tools/view.h b/clients/drcachesim/tools/view.h
index ec60700d12e..24819e81fe6 100644
--- a/clients/drcachesim/tools/view.h
+++ b/clients/drcachesim/tools/view.h
@@ -53,11 +53,12 @@ class view_t : public analysis_tool_t {
            uint64_t sim_refs, const std::string &syntax, unsigned int verbose,
            const std::string &alt_module_dir = "");
     std::string
-    initialize() override;
+    initialize_stream(memtrace_stream_t *serial_stream) override;
     bool
     parallel_shard_supported() override;
     void *
-    parallel_shard_init(int shard_index, void *worker_data) override;
+    parallel_shard_init_stream(int shard_index, void *worker_data,
+                               memtrace_stream_t *shard_stream) override;
     bool
     parallel_shard_exit(void *shard_data) override;
     bool
@@ -81,25 +82,26 @@ class view_t : public analysis_tool_t {
     };
 
     bool
-    should_skip(const memref_t &memref);
+    should_skip(memtrace_stream_t *memstream, const memref_t &memref);
 
     inline void
     print_header()
     {
-        std::cerr << std::setw(9) << "Output format:\n<record#>"
+        std::cerr << std::setw(9) << "Output format:\n<record#> <instr#>"
                   << ": T<tid> <record details>\n"
                   << "------------------------------------------------------------\n";
     }
 
     inline void
-    print_prefix(const memref_t &memref, int ref_adjust = 0,
+    print_prefix(memtrace_stream_t *memstream, const memref_t &memref, int ref_adjust = 0,
                  std::ostream &stream = std::cerr)
     {
         if (prev_tid_ != -1 && prev_tid_ != memref.instr.tid)
             stream << "------------------------------------------------------------\n";
         prev_tid_ = memref.instr.tid;
-        stream << std::setw(9) << (num_refs_ + ref_adjust) << ": T" << memref.marker.tid
-               << " ";
+        stream << std::setw(9) << (memstream->get_record_ordinal() + ref_adjust)
+               << std::setw(9) << memstream->get_instruction_ordinal() << ": T"
+               << memref.marker.tid << " ";
     }
 
     /* We make this the first field so that dr_standalone_exit() is called after
@@ -130,10 +132,10 @@ class view_t : public analysis_tool_t {
     memref_tid_t prev_tid_;
     intptr_t filetype_;
     std::unordered_set<memref_tid_t> printed_header_;
-    uint64_t num_refs_;
     std::unordered_map<memref_tid_t, uintptr_t> last_window_;
     uintptr_t timestamp_;
     bool has_modules_;
+    memtrace_stream_t *serial_stream_ = nullptr;
 };
 
 #endif /* _VIEW_H_ */