i#5843 scheduler: Refactor readers to be single-input

Removes multi-input support from file_reader_t and other readers now that the scheduler_t owns that. Specifically: + Removes read_next_thread_entry() and requires that read_next_entry() always check the queue (via a provided helper function). + Removes skip_thread_instructions() and refactors the pre-skip header reading and the post-skip walking while remembering timestamps. Places these latter two inside reader_t for use by all readers, with zipfile overriding just the fast skip in the middle and sharing all the other code. This refactoring and sharing solves the problem of missing timestamps when skipping from the middle. + Removes the arrays of data for multiple inputs from file_reader_t and all subclasses. Updates the view_test to use a scheduler for its multiple-input mock reader. While at it, removes is_complete(). Issue: #5843, #5538
DynamoRIO · Mar 9, 2023 · d1c1db0 · d1c1db0
1 parent 0a26f93
commit d1c1db0
Show file tree

Hide file tree

Showing 18 changed files with 398 additions and 645 deletions.
diff --git a/api/docs/debug_memtrace.dox b/api/docs/debug_memtrace.dox
@@ -1,5 +1,5 @@
 /* ******************************************************************************
- * Copyright (c) 2010-2022 Google, Inc.  All rights reserved.
+ * Copyright (c) 2010-2023 Google, Inc.  All rights reserved.
  * ******************************************************************************/
 
 /*
@@ -100,7 +100,7 @@ Type cheat sheet (from trace_type_t enum):
 - 0x19 header
 - 0x16 thread
 - 0x18 pid
-- 0x1c marker: 2=timestamp; 3=cpuid
+- 0x1c marker: 2=timestamp; 3=cpuid; 0xc=version; 9=filetype
 - 0x0a instr (non-cti)
 - 0x0e direct call
 - 0x00 load

diff --git a/api/docs/release.dox b/api/docs/release.dox
@@ -149,6 +149,9 @@ changes:
  - Removed the drcachesim external iterator analyzer interface.  Users should instead
    use the new #dynamorio::drmemtrace::scheduler_tmpl_t interface for direct control
    over iteration.  See \ref sec_drcachesim_sched for example code.
+ - Refactored the drmemtrace reader and file reader classes to better fit the
+   new scheduler model: now each reader owns just one single stream of records
+   with all multi-stream interleaving owned by the scheduler.
 
 Further non-compatibility-affecting changes include:
  - Added AArchXX support for attaching to a running process.

diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt
@@ -715,7 +715,8 @@ if (BUILD_TESTS)
     add_executable(tool.drcacheoff.view_test tests/view_test.cpp reader/file_reader.cpp)
     configure_DynamoRIO_standalone(tool.drcacheoff.view_test)
     add_win32_flags(tool.drcacheoff.view_test)
-    target_link_libraries(tool.drcacheoff.view_test drmemtrace_view drmemtrace_raw2trace)
+    target_link_libraries(tool.drcacheoff.view_test drmemtrace_view drmemtrace_raw2trace
+      drmemtrace_analyzer)
     use_DynamoRIO_extension(tool.drcacheoff.view_test drreg_static)
     use_DynamoRIO_extension(tool.drcacheoff.view_test drcovlib_static)
     use_DynamoRIO_extension(tool.drcacheoff.view_test drdecode)

diff --git a/clients/drcachesim/reader/compressed_file_reader.cpp b/clients/drcachesim/reader/compressed_file_reader.cpp
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2017-2022 Google, Inc.  All rights reserved.
+ * Copyright (c) 2017-2023 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -46,25 +46,24 @@ open_single_file_common(const std::string &path, gzFile &out)
     return out != nullptr;
 }
 
-bool
-read_next_thread_entry_common(gzip_reader_t *gzip, OUT trace_entry_t *entry,
-                              OUT bool *eof)
+trace_entry_t *
+read_next_entry_common(gzip_reader_t *gzip, bool *eof)
 {
     if (gzip->cur_buf >= gzip->max_buf) {
         int len = gzread(gzip->file, gzip->buf, sizeof(gzip->buf));
         // Returns less than asked-for for end of file, or –1 for error.
         // We should always get a multiple of the record size.
-        if (len < static_cast<int>(sizeof(*entry)) ||
-            len % static_cast<int>(sizeof(*entry)) != 0) {
+        if (len < static_cast<int>(sizeof(trace_entry_t)) ||
+            len % static_cast<int>(sizeof(trace_entry_t)) != 0) {
             *eof = (len >= 0);
-            return false;
+            return nullptr;
         }
         gzip->cur_buf = gzip->buf;
         gzip->max_buf = gzip->buf + (len / sizeof(*gzip->max_buf));
     }
-    *entry = *gzip->cur_buf;
+    trace_entry_t *res = gzip->cur_buf;
     ++gzip->cur_buf;
-    return true;
+    return res;
 }
 
 } // namespace
@@ -78,9 +77,8 @@ template <>
 /* clang-format on */
 file_reader_t<gzip_reader_t>::~file_reader_t<gzip_reader_t>()
 {
-    for (auto &gzip : input_files_)
-        gzclose(gzip.file);
-    delete[] thread_eof_;
+    if (input_file_.file != nullptr)
+        gzclose(input_file_.file);
 }
 
 template <>
@@ -91,34 +89,24 @@ file_reader_t<gzip_reader_t>::open_single_file(const std::string &path)
     if (!open_single_file_common(path, file))
         return false;
     VPRINT(this, 1, "Opened input file %s\n", path.c_str());
-    input_files_.emplace_back(file);
+    input_file_ = gzip_reader_t(file);
     return true;
 }
 
 template <>
-bool
-file_reader_t<gzip_reader_t>::read_next_thread_entry(size_t thread_index,
-                                                     OUT trace_entry_t *entry,
-                                                     OUT bool *eof)
+trace_entry_t *
+file_reader_t<gzip_reader_t>::read_next_entry()
 {
-    if (!read_next_thread_entry_common(&input_files_[thread_index], entry, eof))
-        return false;
-    VPRINT(this, 4, "Read from thread #%zd file: type=%s (%d), size=%d, addr=%zu\n",
-           thread_index, trace_type_names[entry->type], entry->type, entry->size,
-           entry->addr);
-    return true;
-}
-
-template <>
-bool
-file_reader_t<gzip_reader_t>::is_complete()
-{
-    // The gzip reading interface does not support seeking to SEEK_END so there
-    // is no efficient way to read the footer.
-    // We could have the trace file writer seek back and set a bit at the start.
-    // Currently we are forced to not use this function.
-    // XXX: Should we just remove this interface, then?
-    return false;
+    trace_entry_t *entry = read_queue();
+    if (entry != nullptr)
+        return entry;
+    entry = read_next_entry_common(&input_file_, &at_eof_);
+    if (entry == nullptr)
+        return entry;
+    VPRINT(this, 4, "Read from file: type=%s (%d), size=%d, addr=%zu\n",
+           trace_type_names[entry->type], entry->type, entry->size, entry->addr);
+    entry_copy_ = *entry;
+    return &entry_copy_;
 }
 
 namespace dynamorio {
@@ -154,8 +142,10 @@ template <>
 bool
 record_file_reader_t<gzip_reader_t>::read_next_entry()
 {
-    if (!read_next_thread_entry_common(input_file_.get(), &cur_entry_, &eof_))
+    trace_entry_t *entry = read_next_entry_common(input_file_.get(), &eof_);
+    if (entry == nullptr)
         return false;
+    cur_entry_ = *entry;
     VPRINT(this, 4, "Read from file: type=%s (%d), size=%d, addr=%zu\n",
            trace_type_names[cur_entry_.type], cur_entry_.type, cur_entry_.size,
            cur_entry_.addr);

diff --git a/clients/drcachesim/reader/compressed_file_reader.h b/clients/drcachesim/reader/compressed_file_reader.h
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2017-2022 Google, Inc.  All rights reserved.
+ * Copyright (c) 2017-2023 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -40,6 +40,8 @@
 #include "record_file_reader.h"
 
 struct gzip_reader_t {
+    gzip_reader_t()
+        : file(nullptr) {};
     explicit gzip_reader_t(gzFile file)
         : file(file)
     {

diff --git a/clients/drcachesim/reader/file_reader.cpp b/clients/drcachesim/reader/file_reader.cpp
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2016-2022 Google, Inc.  All rights reserved.
+ * Copyright (c) 2016-2023 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -38,75 +38,33 @@ template <>
 /* clang-format on */
 file_reader_t<std::ifstream *>::~file_reader_t()
 {
-    for (auto fstream : input_files_) {
-        delete fstream;
-    }
-    delete[] thread_eof_;
+    delete input_file_;
 }
 
 template <>
 bool
 file_reader_t<std::ifstream *>::open_single_file(const std::string &path)
 {
-    auto fstream = new std::ifstream(path, std::ifstream::binary);
-    if (!*fstream)
+    input_file_ = new std::ifstream(path, std::ifstream::binary);
+    if (!*input_file_)
         return false;
     VPRINT(this, 1, "Opened input file %s\n", path.c_str());
-    input_files_.push_back(fstream);
-    return true;
-}
-
-template <>
-bool
-file_reader_t<std::ifstream *>::read_next_thread_entry(size_t thread_index,
-                                                       OUT trace_entry_t *entry,
-                                                       OUT bool *eof)
-{
-    if (!input_files_[thread_index]->read((char *)entry, sizeof(*entry))) {
-        *eof = input_files_[thread_index]->eof();
-        return false;
-    }
-    VPRINT(this, 4, "Read from thread #%zd file: type=%s (%d), size=%d, addr=%zu\n",
-           thread_index, trace_type_names[entry->type], entry->type, entry->size,
-           entry->addr);
     return true;
 }
 
 template <>
-bool
-file_reader_t<std::ifstream *>::is_complete()
+trace_entry_t *
+file_reader_t<std::ifstream *>::read_next_entry()
 {
-    // FIXME i#3230: this code is in the middle of refactoring for split thread files.
-    // We support the is_complete() call from analyzer_multi for a single file for now,
-    // but may have to abandon this function altogether since gzFile doesn't support it.
-    bool opened_temporarily = false;
-    if (input_files_.empty()) {
-        // Supporting analyzer_multi calling before init() for a single legacy file.
-        opened_temporarily = true;
-        if (!input_path_list_.empty() || input_path_.empty() ||
-            directory_iterator_t::is_directory(input_path_))
-            return false; // Not supported.
-        if (!open_single_file(input_path_))
-            return false;
-    }
-    bool res = false;
-    for (auto fstream : input_files_) {
-        res = false;
-        std::streampos pos = fstream->tellg();
-        fstream->seekg(-(int)sizeof(trace_entry_t), fstream->end);
-        // Avoid reaching eof b/c we can't seek away from it.
-        if (fstream->read((char *)&entry_copy_.type, sizeof(entry_copy_.type)) &&
-            entry_copy_.type == TRACE_TYPE_FOOTER)
-            res = true;
-        fstream->seekg(pos);
-        if (!res)
-            break;
-    }
-    if (opened_temporarily) {
-        // Put things back for init().
-        for (auto fstream : input_files_)
-            delete fstream;
-        input_files_.clear();
+    trace_entry_t *from_queue = read_queue();
+    if (from_queue != nullptr)
+        return from_queue;
+    if (!input_file_->read((char *)&entry_copy_, sizeof(entry_copy_))) {
+        at_eof_ = input_file_->eof();
+        return nullptr;
     }
-    return res;
+    VPRINT(this, 4, "Read from file: type=%s (%d), size=%d, addr=%zu\n",
+           trace_type_names[entry_copy_.type], entry_copy_.type, entry_copy_.size,
+           entry_copy_.addr);
+    return &entry_copy_;
 }