Skip to content

Commit

Permalink
i#5538 memtrace seek, part 8: Add skip_thread_instructions() (#5731)
Browse files Browse the repository at this point in the history
Adds a new skip_instructions() reader iterator interface.  It is a
linear walk for every type of reader except a chunked zipfile walking
a single thread.

Adds a drcachesim command line option -skip_instrs which triggers the
analyzer to skip from the start before passing anything to the tool.

Refactors the reader_t++ to provide a process_input_entry to update
state while skipping.

Adds a unit test with an added trace file with a small chunk size.
The test checks the view output for every skip value from 0 to over
double the chunk size.

Leaves several pieces for future work:
+ Full support for skipping from the midde: the timestamp,cpuid will
  not always be duplicated with the current code.
+ Recording the record count in each chunk so we have an accurate
  count after skipping.
+ Presenting global headers skipped over as memtrace_stream_t values
  that tools can query.
+ Reading the schedule files for serial skipping (or the planned cpu
  iterator and skipping).
+ Repeating the timestamp+cpu for non-zipfile skipping.

Issue: #5538
  • Loading branch information
derekbruening authored Nov 11, 2022
1 parent 9fb14a7 commit 2c81eb8
Show file tree
Hide file tree
Showing 16 changed files with 647 additions and 236 deletions.
17 changes: 17 additions & 0 deletions clients/drcachesim/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,15 @@ if (ZLIB_FOUND)
"until then, disabling zip output and fast seeking")
set(zip_reader "")
set(zlib_libs ${ZLIB_LIBRARIES})
set(ZIP_FOUND OFF)
else ()
file(GLOB minizip_srcs "${minizip_dir}/*.c")
if (NOT WIN32)
list(REMOVE_ITEM minizip_srcs "${minizip_dir}/iowin32.c")
endif ()
add_library(minizip STATIC ${minizip_srcs})
add_definitions(-DHAS_ZIP)
set(ZIP_FOUND ON)
# We add "minizip/" to avoid collisions with system "zip.h" on Mac.
include_directories(${minizip_dir}/..)
DR_export_target(minizip)
Expand Down Expand Up @@ -768,6 +770,21 @@ if (BUILD_TESTS)
use_DynamoRIO_extension(tool.drcacheoff.burst_traceopts drcovlib_static)

endif ()

if (X86 AND X64 AND ZIP_FOUND)
# XXX i#5538: Add trace files for other arches.
set(zip_path
"${PROJECT_SOURCE_DIR}/clients/drcachesim/tests/drmemtrace.allasm_x86_64.trace.zip")
add_executable(tool.drcacheoff.skip_unit_tests tests/skip_unit_tests.cpp)
configure_DynamoRIO_standalone(tool.drcacheoff.skip_unit_tests)
target_link_libraries(tool.drcacheoff.skip_unit_tests drmemtrace_analyzer
drmemtrace_view drmemtrace_raw2trace)
use_DynamoRIO_extension(tool.drcacheoff.skip_unit_tests drreg_static)
use_DynamoRIO_extension(tool.drcacheoff.skip_unit_tests drcovlib_static)
use_DynamoRIO_extension(tool.drcacheoff.skip_unit_tests drdecode)
add_test(NAME tool.drcacheoff.skip_unit_tests
COMMAND tool.drcacheoff.skip_unit_tests --trace_file ${zip_path})
endif ()
endif ()

##################################################
Expand Down
19 changes: 16 additions & 3 deletions clients/drcachesim/analyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,12 +176,13 @@ analyzer_t::init_file_reader(const std::string &trace_path, int verbosity)
}

analyzer_t::analyzer_t(const std::string &trace_path, analysis_tool_t **tools,
int num_tools, int worker_count)
int num_tools, int worker_count, uint64_t skip_instrs)
: success_(true)
, num_tools_(num_tools)
, tools_(tools)
, parallel_(true)
, worker_count_(worker_count)
, skip_instrs_(skip_instrs)
{
if (!init_file_reader(trace_path)) {
success_ = false;
Expand Down Expand Up @@ -273,9 +274,17 @@ analyzer_t::process_tasks(std::vector<analyzer_shard_data_t *> *tasks)
tdata->index, worker_data[i], tdata->iter.get());
}
VPRINT(this, 1, "shard_data[0] is %p\n", shard_data[0]);
if (skip_instrs_ > 0) {
// We skip in each thread.
// TODO i#5538: Add top-level header data to memtrace_stream_t for
// access by tools, since we're skipping it here. We considered
// not skipping until we see the 1st timestamp but the stream access
// approach has other benefits and seems cleaner.
(*tdata->iter) = (*tdata->iter).skip_instructions(skip_instrs_);
}
for (; *tdata->iter != *trace_end_; ++(*tdata->iter)) {
const memref_t &memref = **tdata->iter;
for (int i = 0; i < num_tools_; ++i) {
const memref_t &memref = **tdata->iter;
if (!tools_[i]->parallel_shard_memref(shard_data[i], memref)) {
tdata->error = tools_[i]->parallel_shard_error(shard_data[i]);
VPRINT(this, 1,
Expand Down Expand Up @@ -314,9 +323,13 @@ analyzer_t::run()
if (!parallel_) {
if (!start_reading())
return false;
if (skip_instrs_ > 0) {
// TODO i#5538: Add top-level header data to memtrace_stream_t; see above.
(*serial_trace_iter_) = (*serial_trace_iter_).skip_instructions(skip_instrs_);
}
for (; *serial_trace_iter_ != *trace_end_; ++(*serial_trace_iter_)) {
const memref_t &memref = **serial_trace_iter_;
for (int i = 0; i < num_tools_; ++i) {
memref_t memref = **serial_trace_iter_;
// We short-circuit and exit on an error to avoid confusion over
// the results and avoid wasted continued work.
if (!tools_[i]->process_memref(memref)) {
Expand Down
8 changes: 5 additions & 3 deletions clients/drcachesim/analyzer.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2016-2020 Google, Inc. All rights reserved.
* Copyright (c) 2016-2022 Google, Inc. All rights reserved.
* **********************************************************/

/*
Expand Down Expand Up @@ -66,7 +66,8 @@ class analyzer_t {
analyzer_t();
virtual ~analyzer_t(); /**< Destructor. */
/** Returns whether the analyzer was created successfully. */
virtual bool operator!();
virtual bool
operator!();
/** Returns further information on an error in initializing the analyzer. */
virtual std::string
get_error_string();
Expand All @@ -83,7 +84,7 @@ class analyzer_t {
* The analyzer calls the initialize() function on each tool before use.
*/
analyzer_t(const std::string &trace_path, analysis_tool_t **tools, int num_tools,
int worker_count = 0);
int worker_count = 0, uint64_t skip_instrs = 0);
/** Launches the analysis process. */
virtual bool
run();
Expand Down Expand Up @@ -164,6 +165,7 @@ class analyzer_t {
std::vector<std::vector<analyzer_shard_data_t *>> worker_tasks_;
int verbosity_ = 0;
const char *output_prefix_ = "[analyzer]";
uint64_t skip_instrs_ = 0;
};

#endif /* _ANALYZER_H_ */
1 change: 1 addition & 0 deletions clients/drcachesim/analyzer_multi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
analyzer_multi_t::analyzer_multi_t()
{
worker_count_ = op_jobs.get_value();
skip_instrs_ = op_skip_instrs.get_value();
// Initial measurements show it's sometimes faster to keep the parallel model
// of using single-file readers but use them sequentially, as opposed to
// the every-file interleaving reader, but the user can specify -jobs 1, so
Expand Down
2 changes: 1 addition & 1 deletion clients/drcachesim/common/memtrace_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
* the record and instruction ordinals within the stream, in the presence of
* skipping: we could add fields to memref but we'd either have to append
* and have them at different offsets for each type or we'd have to break
* compatbility to prepend every time we added more; or we could add parameters
* compatibility to prepend every time we added more; or we could add parameters
* to process_memref(). Passing an interface to the init routines seems
* the simplest and most flexible.
*/
Expand Down
16 changes: 13 additions & 3 deletions clients/drcachesim/common/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -487,12 +487,22 @@ droption_t<int>
"For simulator types that support it, limits analyis to the single "
"thread with the given identifier. 0 enables all threads.");

droption_t<bytesize_t> op_skip_instrs(
DROPTION_SCOPE_FRONTEND, "skip_instrs", 0, "Number of instructions to skip",
"Specifies the number of instructions to skip in the beginning of the trace "
"analysis. For serial iteration, this number is "
"computed just once across the interleaving sequence of all threads; for parallel "
"iteration, each thread skips this many insructions. When built with zipfile "
"support, this skipping is optimized and large instruction counts can be quickly "
"skipped; this is not the case for -skip_refs.");

droption_t<bytesize_t>
op_skip_refs(DROPTION_SCOPE_FRONTEND, "skip_refs", 0,
"Number of memory references to skip",
"Specifies the number of references to skip "
"in the beginning of the application execution. "
"These memory references are dropped instead of being simulated.");
"Specifies the number of references to skip in the beginning of the "
"application execution. These memory references are dropped instead "
"of being simulated. This skipping may be slow for large skip values; "
"consider -skip_instrs for a faster method of skipping.");

droption_t<bytesize_t> op_warmup_refs(
DROPTION_SCOPE_FRONTEND, "warmup_refs", 0,
Expand Down
1 change: 1 addition & 0 deletions clients/drcachesim/common/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ extern droption_t<std::string> op_tracer;
extern droption_t<std::string> op_tracer_alt;
extern droption_t<std::string> op_tracer_ops;
extern droption_t<int> op_only_thread;
extern droption_t<bytesize_t> op_skip_instrs;
extern droption_t<bytesize_t> op_skip_refs;
extern droption_t<bytesize_t> op_warmup_refs;
extern droption_t<double> op_warmup_fraction;
Expand Down
55 changes: 50 additions & 5 deletions clients/drcachesim/reader/file_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#ifndef _FILE_READER_H_
#define _FILE_READER_H_ 1

#include <inttypes.h>
#include <string.h>
#include <fstream>
#include <queue>
Expand All @@ -55,11 +56,7 @@
# ifdef WINDOWS
# define ZHEX64_FORMAT_STRING "%016I64x"
# else
# if defined(__i386__) || defined(__arm__) || defined(__APPLE__)
# define ZHEX64_FORMAT_STRING "%016llx"
# else
# define ZHEX64_FORMAT_STRING "%016lx"
# endif
# define ZHEX64_FORMAT_STRING "%" PRIx64
# endif
#endif

Expand Down Expand Up @@ -100,6 +97,35 @@ template <typename T> class file_reader_t : public reader_t {
virtual bool
is_complete();

reader_t &
skip_instructions(uint64_t instruction_count) override
{
if (input_files_.size() > 1) {
// TODO i#5538: For fast thread-interleaved (whether serial here or the
// forthcoming per-cpu iteration) we need to read in the schedule file(s)
// that raw2trace writes out so that we can compute how far to separately
// fast-skip in each interleaved thread by calling the per-thread version.
// We'll also need to update the memref pid+tid state since we're not
// repeating top headers in every thread after a skip. For now this is a
// slow linear walk.
return reader_t::skip_instructions(instruction_count);
}
// If the user asks to skip from the very start, we still need to find the chunk
// count marker and drain the header queue.
// TODO i#5538: Record all of the header values until the first timestamp
// and present them as new memtrace_stream_t interfaces.
while (chunk_instr_count_ == 0) {
input_entry_ = read_next_entry();
process_input_entry();
}
if (!queues_[0].empty())
ERRMSG("Failed to drain header queue\n");
bool eof = false;
if (!skip_thread_instructions(0, instruction_count, &eof) || eof)
at_eof_ = true;
return *this;
}

protected:
bool
read_next_thread_entry(size_t thread_index, OUT trace_entry_t *entry,
Expand Down Expand Up @@ -297,6 +323,25 @@ template <typename T> class file_reader_t : public reader_t {
return nullptr;
}

virtual bool
skip_thread_instructions(size_t thread_index, uint64_t instruction_count,
OUT bool *eof)
{
// Default implementation for file types that have no fast seeking and must do a
// linear walk.
uint64_t stop_count_ = cur_instr_count_ + instruction_count + 1;
while (cur_instr_count_ < stop_count_) {
if (!read_next_thread_entry(thread_index, &entry_copy_, eof))
return false;
// Update core state.
input_entry_ = &entry_copy_;
process_input_entry();
// TODO i#5538: Remember the last timestamp+cpu and insert it; share
// code with the zipfile reader.
}
return true;
}

private:
std::string input_path_;
std::vector<std::string> input_path_list_;
Expand Down
Loading

0 comments on commit 2c81eb8

Please sign in to comment.