Skip to content

Commit

Permalink
i#5843 scheduler: Refactor readers to be single-input
Browse files Browse the repository at this point in the history
Removes multi-input support from file_reader_t and other readers now
that the scheduler_t owns that.  Specifically:

+ Removes read_next_thread_entry() and requires that read_next_entry()
  always check the queue (via a provided helper function).

+ Removes skip_thread_instructions() and refactors the pre-skip header
  reading and the post-skip walking while remembering timestamps.
  Places these latter two inside reader_t for use by all readers, with
  zipfile overriding just the fast skip in the middle and sharing all
  the other code.  This refactoring and sharing solves the problem of
  missing timestamps when skipping from the middle.

+ Removes the arrays of data for multiple inputs from file_reader_t
  and all subclasses.

Updates the view_test to use a scheduler for its multiple-input mock
reader.

While at it, removes is_complete().

Issue: #5843, #5538
  • Loading branch information
derekbruening committed Mar 9, 2023
1 parent 0a26f93 commit d1c1db0
Show file tree
Hide file tree
Showing 18 changed files with 398 additions and 645 deletions.
4 changes: 2 additions & 2 deletions api/docs/debug_memtrace.dox
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* ******************************************************************************
* Copyright (c) 2010-2022 Google, Inc. All rights reserved.
* Copyright (c) 2010-2023 Google, Inc. All rights reserved.
* ******************************************************************************/

/*
Expand Down Expand Up @@ -100,7 +100,7 @@ Type cheat sheet (from trace_type_t enum):
- 0x19 header
- 0x16 thread
- 0x18 pid
- 0x1c marker: 2=timestamp; 3=cpuid
- 0x1c marker: 2=timestamp; 3=cpuid; 0xc=version; 9=filetype
- 0x0a instr (non-cti)
- 0x0e direct call
- 0x00 load
Expand Down
3 changes: 3 additions & 0 deletions api/docs/release.dox
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ changes:
- Removed the drcachesim external iterator analyzer interface. Users should instead
use the new #dynamorio::drmemtrace::scheduler_tmpl_t interface for direct control
over iteration. See \ref sec_drcachesim_sched for example code.
- Refactored the drmemtrace reader and file reader classes to better fit the
new scheduler model: now each reader owns just one single stream of records
with all multi-stream interleaving owned by the scheduler.

Further non-compatibility-affecting changes include:
- Added AArchXX support for attaching to a running process.
Expand Down
3 changes: 2 additions & 1 deletion clients/drcachesim/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,8 @@ if (BUILD_TESTS)
add_executable(tool.drcacheoff.view_test tests/view_test.cpp reader/file_reader.cpp)
configure_DynamoRIO_standalone(tool.drcacheoff.view_test)
add_win32_flags(tool.drcacheoff.view_test)
target_link_libraries(tool.drcacheoff.view_test drmemtrace_view drmemtrace_raw2trace)
target_link_libraries(tool.drcacheoff.view_test drmemtrace_view drmemtrace_raw2trace
drmemtrace_analyzer)
use_DynamoRIO_extension(tool.drcacheoff.view_test drreg_static)
use_DynamoRIO_extension(tool.drcacheoff.view_test drcovlib_static)
use_DynamoRIO_extension(tool.drcacheoff.view_test drdecode)
Expand Down
62 changes: 26 additions & 36 deletions clients/drcachesim/reader/compressed_file_reader.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2017-2022 Google, Inc. All rights reserved.
* Copyright (c) 2017-2023 Google, Inc. All rights reserved.
* **********************************************************/

/*
Expand Down Expand Up @@ -46,25 +46,24 @@ open_single_file_common(const std::string &path, gzFile &out)
return out != nullptr;
}

bool
read_next_thread_entry_common(gzip_reader_t *gzip, OUT trace_entry_t *entry,
OUT bool *eof)
trace_entry_t *
read_next_entry_common(gzip_reader_t *gzip, bool *eof)
{
if (gzip->cur_buf >= gzip->max_buf) {
int len = gzread(gzip->file, gzip->buf, sizeof(gzip->buf));
// Returns less than asked-for for end of file, or –1 for error.
// We should always get a multiple of the record size.
if (len < static_cast<int>(sizeof(*entry)) ||
len % static_cast<int>(sizeof(*entry)) != 0) {
if (len < static_cast<int>(sizeof(trace_entry_t)) ||
len % static_cast<int>(sizeof(trace_entry_t)) != 0) {
*eof = (len >= 0);
return false;
return nullptr;
}
gzip->cur_buf = gzip->buf;
gzip->max_buf = gzip->buf + (len / sizeof(*gzip->max_buf));
}
*entry = *gzip->cur_buf;
trace_entry_t *res = gzip->cur_buf;
++gzip->cur_buf;
return true;
return res;
}

} // namespace
Expand All @@ -78,9 +77,8 @@ template <>
/* clang-format on */
file_reader_t<gzip_reader_t>::~file_reader_t<gzip_reader_t>()
{
for (auto &gzip : input_files_)
gzclose(gzip.file);
delete[] thread_eof_;
if (input_file_.file != nullptr)
gzclose(input_file_.file);
}

template <>
Expand All @@ -91,34 +89,24 @@ file_reader_t<gzip_reader_t>::open_single_file(const std::string &path)
if (!open_single_file_common(path, file))
return false;
VPRINT(this, 1, "Opened input file %s\n", path.c_str());
input_files_.emplace_back(file);
input_file_ = gzip_reader_t(file);
return true;
}

template <>
bool
file_reader_t<gzip_reader_t>::read_next_thread_entry(size_t thread_index,
OUT trace_entry_t *entry,
OUT bool *eof)
trace_entry_t *
file_reader_t<gzip_reader_t>::read_next_entry()
{
if (!read_next_thread_entry_common(&input_files_[thread_index], entry, eof))
return false;
VPRINT(this, 4, "Read from thread #%zd file: type=%s (%d), size=%d, addr=%zu\n",
thread_index, trace_type_names[entry->type], entry->type, entry->size,
entry->addr);
return true;
}

template <>
bool
file_reader_t<gzip_reader_t>::is_complete()
{
// The gzip reading interface does not support seeking to SEEK_END so there
// is no efficient way to read the footer.
// We could have the trace file writer seek back and set a bit at the start.
// Currently we are forced to not use this function.
// XXX: Should we just remove this interface, then?
return false;
trace_entry_t *entry = read_queue();
if (entry != nullptr)
return entry;
entry = read_next_entry_common(&input_file_, &at_eof_);
if (entry == nullptr)
return entry;
VPRINT(this, 4, "Read from file: type=%s (%d), size=%d, addr=%zu\n",
trace_type_names[entry->type], entry->type, entry->size, entry->addr);
entry_copy_ = *entry;
return &entry_copy_;
}

namespace dynamorio {
Expand Down Expand Up @@ -154,8 +142,10 @@ template <>
bool
record_file_reader_t<gzip_reader_t>::read_next_entry()
{
if (!read_next_thread_entry_common(input_file_.get(), &cur_entry_, &eof_))
trace_entry_t *entry = read_next_entry_common(input_file_.get(), &eof_);
if (entry == nullptr)
return false;
cur_entry_ = *entry;
VPRINT(this, 4, "Read from file: type=%s (%d), size=%d, addr=%zu\n",
trace_type_names[cur_entry_.type], cur_entry_.type, cur_entry_.size,
cur_entry_.addr);
Expand Down
4 changes: 3 additions & 1 deletion clients/drcachesim/reader/compressed_file_reader.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2017-2022 Google, Inc. All rights reserved.
* Copyright (c) 2017-2023 Google, Inc. All rights reserved.
* **********************************************************/

/*
Expand Down Expand Up @@ -40,6 +40,8 @@
#include "record_file_reader.h"

struct gzip_reader_t {
gzip_reader_t()
: file(nullptr) {};
explicit gzip_reader_t(gzFile file)
: file(file)
{
Expand Down
74 changes: 16 additions & 58 deletions clients/drcachesim/reader/file_reader.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2016-2022 Google, Inc. All rights reserved.
* Copyright (c) 2016-2023 Google, Inc. All rights reserved.
* **********************************************************/

/*
Expand Down Expand Up @@ -38,75 +38,33 @@ template <>
/* clang-format on */
file_reader_t<std::ifstream *>::~file_reader_t()
{
for (auto fstream : input_files_) {
delete fstream;
}
delete[] thread_eof_;
delete input_file_;
}

template <>
bool
file_reader_t<std::ifstream *>::open_single_file(const std::string &path)
{
auto fstream = new std::ifstream(path, std::ifstream::binary);
if (!*fstream)
input_file_ = new std::ifstream(path, std::ifstream::binary);
if (!*input_file_)
return false;
VPRINT(this, 1, "Opened input file %s\n", path.c_str());
input_files_.push_back(fstream);
return true;
}

template <>
bool
file_reader_t<std::ifstream *>::read_next_thread_entry(size_t thread_index,
OUT trace_entry_t *entry,
OUT bool *eof)
{
if (!input_files_[thread_index]->read((char *)entry, sizeof(*entry))) {
*eof = input_files_[thread_index]->eof();
return false;
}
VPRINT(this, 4, "Read from thread #%zd file: type=%s (%d), size=%d, addr=%zu\n",
thread_index, trace_type_names[entry->type], entry->type, entry->size,
entry->addr);
return true;
}

template <>
bool
file_reader_t<std::ifstream *>::is_complete()
trace_entry_t *
file_reader_t<std::ifstream *>::read_next_entry()
{
// FIXME i#3230: this code is in the middle of refactoring for split thread files.
// We support the is_complete() call from analyzer_multi for a single file for now,
// but may have to abandon this function altogether since gzFile doesn't support it.
bool opened_temporarily = false;
if (input_files_.empty()) {
// Supporting analyzer_multi calling before init() for a single legacy file.
opened_temporarily = true;
if (!input_path_list_.empty() || input_path_.empty() ||
directory_iterator_t::is_directory(input_path_))
return false; // Not supported.
if (!open_single_file(input_path_))
return false;
}
bool res = false;
for (auto fstream : input_files_) {
res = false;
std::streampos pos = fstream->tellg();
fstream->seekg(-(int)sizeof(trace_entry_t), fstream->end);
// Avoid reaching eof b/c we can't seek away from it.
if (fstream->read((char *)&entry_copy_.type, sizeof(entry_copy_.type)) &&
entry_copy_.type == TRACE_TYPE_FOOTER)
res = true;
fstream->seekg(pos);
if (!res)
break;
}
if (opened_temporarily) {
// Put things back for init().
for (auto fstream : input_files_)
delete fstream;
input_files_.clear();
trace_entry_t *from_queue = read_queue();
if (from_queue != nullptr)
return from_queue;
if (!input_file_->read((char *)&entry_copy_, sizeof(entry_copy_))) {
at_eof_ = input_file_->eof();
return nullptr;
}
return res;
VPRINT(this, 4, "Read from file: type=%s (%d), size=%d, addr=%zu\n",
trace_type_names[entry_copy_.type], entry_copy_.type, entry_copy_.size,
entry_copy_.addr);
return &entry_copy_;
}
Loading

0 comments on commit d1c1db0

Please sign in to comment.