Skip to content

Commit

Permalink
i#6635 core filter, part 6: Add core-sharded record filter output (#6704
Browse files Browse the repository at this point in the history
)

Multiple changes to allow the record filter to operate in core-sharded
fashion:

Makes the pc2encoding table per-input, as one input can migrate across
multiple core shards and thus one core can see a later instruction
without ever having seen its encoding. To handle synchronization, there
is no C++11 std:: rwlock, so we use mutexes -- but we limit their use to
per-context-switch for the added global lock, and we assume there is no
contention for the per-input lock as only one shard operates on one
input at any one time.

Sets the memref counter reader to core_sharded_ to avoid asserts.

Appends footer records to ending-in-idle-record cores.

Adds an error check ensuring a single workload, as multiple will require
expanding the keys used in some tables.

Renames the output files to include "core.<shard_index>" and not the
tid. This is surprisingly complex, as an input filename is needed to
determine the output filename compression type: yet not all shards are
guaranteed to have an input at the start. A condition variable and mutex
are used to coordinate this among shards.

Adds support for started-idle cores by synthesizing headers in
record_filter; #6703 covers having the scheduler do this for all
analyzers. Adds the version as another field available up front from the
scheduler, and adds an idle-tid sentinel needed to be distinct from
INVALID_THREAD_ID.

Adds two end-to-end tests, one with a single-threaded app scheduled onto
4 cores to test start-idle cores and one to test multiple threads. Adds
a macro to share code with the existing end-to-end test.

Updates the unit test mock classes.

Issue: #6635, #6703
  • Loading branch information
derekbruening authored Mar 13, 2024
1 parent 5238a6a commit 6d7b1a4
Show file tree
Hide file tree
Showing 14 changed files with 367 additions and 46 deletions.
4 changes: 4 additions & 0 deletions clients/drcachesim/common/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ namespace drmemtrace {
#define INVALID_THREAD_ID 0
// We avoid collisions with DR's INVALID_PROCESS_ID by using our own name.
#define INVALID_PID -1
// A separate sentinel for an idle core with no software thread.
// XXX i#6703: Export this in scheduler.h as part of its API when we have
// the scheduler insert synthetic headers.
#define IDLE_THREAD_ID -1

// XXX: perhaps we should use a C++-ish stream approach instead
// This cannot be named ERROR as that conflicts with Windows headers.
Expand Down
15 changes: 11 additions & 4 deletions clients/drcachesim/reader/reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,8 @@ reader_t::process_input_entry()
// Look for encoding bits that belong to this instr.
if (last_encoding_.size > 0) {
if (last_encoding_.size != cur_ref_.instr.size) {
ERRMSG("Encoding size %zu != instr size %zu\n", last_encoding_.size,
cur_ref_.instr.size);
ERRMSG("Encoding size %zu != instr size %zu for PC 0x%zx\n",
last_encoding_.size, cur_ref_.instr.size, cur_ref_.instr.addr);
assert(false);
}
memcpy(cur_ref_.instr.encoding, last_encoding_.bits, last_encoding_.size);
Expand All @@ -216,7 +216,11 @@ reader_t::process_input_entry()
const auto &it = encodings_.find(cur_ref_.instr.addr);
if (it != encodings_.end()) {
memcpy(cur_ref_.instr.encoding, it->second.bits, it->second.size);
} else if (!expect_no_encodings_) {
} else if (!expect_no_encodings_ &&
// A thread can migrate after encoding records are seen.
// It is up to the user to properly handle encodings
// in this mode.
!core_sharded_) {
ERRMSG("Missing encoding for 0x%zx\n", cur_ref_.instr.addr);
assert(false);
}
Expand Down Expand Up @@ -288,7 +292,9 @@ reader_t::process_input_entry()
break;
case TRACE_TYPE_MARKER:
cur_ref_.marker.type = (trace_type_t)input_entry_->type;
assert((cur_tid_ != 0 && cur_pid_ != 0) || core_sharded_);
assert((cur_tid_ != 0 && cur_pid_ != 0) || core_sharded_ ||
// We have to wait for the filetype to see whether we're core-sharded.
!found_filetype_);
cur_ref_.marker.pid = cur_pid_;
cur_ref_.marker.tid = cur_tid_;
cur_ref_.marker.marker_type = (trace_marker_type_t)input_entry_->size;
Expand Down Expand Up @@ -327,6 +333,7 @@ reader_t::process_input_entry()
version_ = cur_ref_.marker.marker_value;
else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) {
filetype_ = cur_ref_.marker.marker_value;
found_filetype_ = true;
if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, filetype_)) {
expect_no_encodings_ = false;
}
Expand Down
3 changes: 3 additions & 0 deletions clients/drcachesim/reader/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,10 @@ class reader_t : public std::iterator<std::input_iterator_tag, memref_t>,
};

std::unordered_map<addr_t, encoding_info_t> encodings_;
// Whether this reader's input stream interleaves software threads and thus
// some thread-based checks may not apply.
bool core_sharded_ = false;
bool found_filetype_ = false;

private:
memref_t cur_ref_;
Expand Down
11 changes: 9 additions & 2 deletions clients/drcachesim/scheduler/scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1827,7 +1827,12 @@ scheduler_tmpl_t<RecordType, ReaderType>::skip_instructions(output_ordinal_t out

// If we skipped from the start we may not have seen the initial headers:
// use the input's cached copies.
if (stream->version_ == 0) {
// We set the version and filetype up front for outputs with
// an initial input, so we check a different field to detect a
// skip.
if (stream->cache_line_size_ == 0 ||
// Check the version too as a fallback for inputs with no cache size.
stream->version_ == 0) {
stream->version_ = input.reader->get_version();
stream->last_timestamp_ = input.reader->get_last_timestamp();
stream->first_timestamp_ = input.reader->get_first_timestamp();
Expand Down Expand Up @@ -2127,7 +2132,9 @@ scheduler_tmpl_t<RecordType, ReaderType>::set_cur_input(output_ordinal_t output,
std::lock_guard<std::mutex> lock(*inputs_[input].lock);

if (prev_input < 0 && outputs_[output].stream->filetype_ == 0) {
// Set the filetype up front, to let the user query at init time as documented.
// Set the version and filetype up front, to let the user query at init time
// as documented.
outputs_[output].stream->version_ = inputs_[input].reader->get_version();
outputs_[output].stream->filetype_ = inputs_[input].reader->get_filetype();
}

Expand Down
4 changes: 3 additions & 1 deletion clients/drcachesim/scheduler/scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -867,6 +867,9 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
/**
* Returns the #trace_version_t value from the
* #TRACE_MARKER_TYPE_VERSION record in the trace header.
* This can be queried prior to explicitly retrieving any records from
* output streams, unless #dynamorio::drmemtrace::scheduler_tmpl_t::
* scheduler_options_t.read_inputs_in_init is false.
*/
uint64_t
get_version() const override
Expand All @@ -881,7 +884,6 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
* This can be queried prior to explicitly retrieving any records from
* output streams, unless #dynamorio::drmemtrace::scheduler_tmpl_t::
* scheduler_options_t.read_inputs_in_init is false.
*/
uint64_t
get_filetype() const override
Expand Down
8 changes: 8 additions & 0 deletions clients/drcachesim/tests/record_filter_bycore_multi.templatex
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Estimation of pi is 3.14.*
Trace invariant checks passed
Output .* entries from .* entries.
Schedule stats tool results:
.*
Core #0 schedule: .*
Core #1 schedule: .*
Core #2 schedule: .*
14 changes: 14 additions & 0 deletions clients/drcachesim/tests/record_filter_bycore_uni.templatex
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#ifdef WINDOWS
Hit delay threshold: enabling tracing.
Exiting process after .* references.
#else
Hello, world!
#endif
Trace invariant checks passed
Output .* entries from .* entries.
Schedule stats tool results:
.*
Core #0 schedule: .*
Core #1 schedule: .*
Core #2 schedule: .*
Core #3 schedule: .*
7 changes: 7 additions & 0 deletions clients/drcachesim/tests/record_filter_unit_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ class test_record_filter_t : public dynamorio::drmemtrace::record_filter_t {
{
output_.push_back(entry);
shard->cur_refs += shard->memref_counter.entry_memref_count(&entry);
shard->last_written_record = entry;
return true;
}
std::string
Expand Down Expand Up @@ -146,6 +147,12 @@ class local_stream_t : public default_memtrace_stream_t {
{
last_timestamp_ = last_timestamp;
}
int64_t
get_input_id() const override
{
// Just one input for our tests.
return 0;
}

private:
uint64_t last_timestamp_;
Expand Down
Loading

0 comments on commit 6d7b1a4

Please sign in to comment.