i#6635 core filter, part 6: Add core-sharded record filter output (#6704

) Multiple changes to allow the record filter to operate in core-sharded fashion: Makes the pc2encoding table per-input, as one input can migrate across multiple core shards and thus one core can see a later instruction without ever having seen its encoding. To handle synchronization, there is no C++11 std:: rwlock, so we use mutexes -- but we limit their use to per-context-switch for the added global lock, and we assume there is no contention for the per-input lock as only one shard operates on one input at any one time. Sets the memref counter reader to core_sharded_ to avoid asserts. Appends footer records to ending-in-idle-record cores. Adds an error check ensuring a single workload, as multiple will require expanding the keys used in some tables. Renames the output files to include "core.<shard_index>" and not the tid. This is surprisingly complex, as an input filename is needed to determine the output filename compression type: yet not all shards are guaranteed to have an input at the start. A condition variable and mutex are used to coordinate this among shards. Adds support for started-idle cores by synthesizing headers in record_filter; #6703 covers having the scheduler do this for all analyzers. Adds the version as another field available up front from the scheduler, and adds an idle-tid sentinel needed to be distinct from INVALID_THREAD_ID. Adds two end-to-end tests, one with a single-threaded app scheduled onto 4 cores to test start-idle cores and one to test multiple threads. Adds a macro to share code with the existing end-to-end test. Updates the unit test mock classes. Issue: #6635, #6703
DynamoRIO · Mar 13, 2024 · 6d7b1a4 · 6d7b1a4
1 parent 5238a6a
commit 6d7b1a4
Show file tree

Hide file tree

Showing 14 changed files with 367 additions and 46 deletions.
diff --git a/clients/drcachesim/common/utils.h b/clients/drcachesim/common/utils.h
@@ -59,6 +59,10 @@ namespace drmemtrace {
 #define INVALID_THREAD_ID 0
 // We avoid collisions with DR's INVALID_PROCESS_ID by using our own name.
 #define INVALID_PID -1
+// A separate sentinel for an idle core with no software thread.
+// XXX i#6703: Export this in scheduler.h as part of its API when we have
+// the scheduler insert synthetic headers.
+#define IDLE_THREAD_ID -1
 
 // XXX: perhaps we should use a C++-ish stream approach instead
 // This cannot be named ERROR as that conflicts with Windows headers.

diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp
@@ -204,8 +204,8 @@ reader_t::process_input_entry()
             // Look for encoding bits that belong to this instr.
             if (last_encoding_.size > 0) {
                 if (last_encoding_.size != cur_ref_.instr.size) {
-                    ERRMSG("Encoding size %zu != instr size %zu\n", last_encoding_.size,
-                           cur_ref_.instr.size);
+                    ERRMSG("Encoding size %zu != instr size %zu for PC 0x%zx\n",
+                           last_encoding_.size, cur_ref_.instr.size, cur_ref_.instr.addr);
                     assert(false);
                 }
                 memcpy(cur_ref_.instr.encoding, last_encoding_.bits, last_encoding_.size);
@@ -216,7 +216,11 @@ reader_t::process_input_entry()
                 const auto &it = encodings_.find(cur_ref_.instr.addr);
                 if (it != encodings_.end()) {
                     memcpy(cur_ref_.instr.encoding, it->second.bits, it->second.size);
-                } else if (!expect_no_encodings_) {
+                } else if (!expect_no_encodings_ &&
+                           // A thread can migrate after encoding records are seen.
+                           // It is up to the user to properly handle encodings
+                           // in this mode.
+                           !core_sharded_) {
                     ERRMSG("Missing encoding for 0x%zx\n", cur_ref_.instr.addr);
                     assert(false);
                 }
@@ -288,7 +292,9 @@ reader_t::process_input_entry()
         break;
     case TRACE_TYPE_MARKER:
         cur_ref_.marker.type = (trace_type_t)input_entry_->type;
-        assert((cur_tid_ != 0 && cur_pid_ != 0) || core_sharded_);
+        assert((cur_tid_ != 0 && cur_pid_ != 0) || core_sharded_ ||
+               // We have to wait for the filetype to see whether we're core-sharded.
+               !found_filetype_);
         cur_ref_.marker.pid = cur_pid_;
         cur_ref_.marker.tid = cur_tid_;
         cur_ref_.marker.marker_type = (trace_marker_type_t)input_entry_->size;
@@ -327,6 +333,7 @@ reader_t::process_input_entry()
             version_ = cur_ref_.marker.marker_value;
         else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) {
             filetype_ = cur_ref_.marker.marker_value;
+            found_filetype_ = true;
             if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, filetype_)) {
                 expect_no_encodings_ = false;
             }

diff --git a/clients/drcachesim/reader/reader.h b/clients/drcachesim/reader/reader.h
@@ -272,7 +272,10 @@ class reader_t : public std::iterator<std::input_iterator_tag, memref_t>,
     };
 
     std::unordered_map<addr_t, encoding_info_t> encodings_;
+    // Whether this reader's input stream interleaves software threads and thus
+    // some thread-based checks may not apply.
     bool core_sharded_ = false;
+    bool found_filetype_ = false;
 
 private:
     memref_t cur_ref_;

diff --git a/clients/drcachesim/scheduler/scheduler.cpp b/clients/drcachesim/scheduler/scheduler.cpp
@@ -1827,7 +1827,12 @@ scheduler_tmpl_t<RecordType, ReaderType>::skip_instructions(output_ordinal_t out
 
     // If we skipped from the start we may not have seen the initial headers:
     // use the input's cached copies.
-    if (stream->version_ == 0) {
+    // We set the version and filetype up front for outputs with
+    // an initial input, so we check a different field to detect a
+    // skip.
+    if (stream->cache_line_size_ == 0 ||
+        // Check the version too as a fallback for inputs with no cache size.
+        stream->version_ == 0) {
         stream->version_ = input.reader->get_version();
         stream->last_timestamp_ = input.reader->get_last_timestamp();
         stream->first_timestamp_ = input.reader->get_first_timestamp();
@@ -2127,7 +2132,9 @@ scheduler_tmpl_t<RecordType, ReaderType>::set_cur_input(output_ordinal_t output,
     std::lock_guard<std::mutex> lock(*inputs_[input].lock);
 
     if (prev_input < 0 && outputs_[output].stream->filetype_ == 0) {
-        // Set the filetype up front, to let the user query at init time as documented.
+        // Set the version and filetype up front, to let the user query at init time
+        // as documented.
+        outputs_[output].stream->version_ = inputs_[input].reader->get_version();
         outputs_[output].stream->filetype_ = inputs_[input].reader->get_filetype();
     }
 

diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h
@@ -867,6 +867,9 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
         /**
          * Returns the #trace_version_t value from the
          * #TRACE_MARKER_TYPE_VERSION record in the trace header.
+         * This can be queried prior to explicitly retrieving any records from
+         * output streams, unless #dynamorio::drmemtrace::scheduler_tmpl_t::
+         * scheduler_options_t.read_inputs_in_init is false.
          */
         uint64_t
         get_version() const override
@@ -881,7 +884,6 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
          * This can be queried prior to explicitly retrieving any records from
          * output streams, unless #dynamorio::drmemtrace::scheduler_tmpl_t::
          * scheduler_options_t.read_inputs_in_init is false.
-
          */
         uint64_t
         get_filetype() const override

diff --git a/clients/drcachesim/tests/record_filter_bycore_multi.templatex b/clients/drcachesim/tests/record_filter_bycore_multi.templatex
@@ -0,0 +1,8 @@
+Estimation of pi is 3.14.*
+Trace invariant checks passed
+Output .* entries from .* entries.
+Schedule stats tool results:
+.*
+Core #0 schedule: .*
+Core #1 schedule: .*
+Core #2 schedule: .*
diff --git a/clients/drcachesim/tests/record_filter_bycore_uni.templatex b/clients/drcachesim/tests/record_filter_bycore_uni.templatex
@@ -0,0 +1,14 @@
+#ifdef WINDOWS
+Hit delay threshold: enabling tracing.
+Exiting process after .* references.
+#else
+Hello, world!
+#endif
+Trace invariant checks passed
+Output .* entries from .* entries.
+Schedule stats tool results:
+.*
+Core #0 schedule: .*
+Core #1 schedule: .*
+Core #2 schedule: .*
+Core #3 schedule: .*
diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp
@@ -106,6 +106,7 @@ class test_record_filter_t : public dynamorio::drmemtrace::record_filter_t {
     {
         output_.push_back(entry);
         shard->cur_refs += shard->memref_counter.entry_memref_count(&entry);
+        shard->last_written_record = entry;
         return true;
     }
     std::string
@@ -146,6 +147,12 @@ class local_stream_t : public default_memtrace_stream_t {
     {
         last_timestamp_ = last_timestamp;
     }
+    int64_t
+    get_input_id() const override
+    {
+        // Just one input for our tests.
+        return 0;
+    }
 
 private:
     uint64_t last_timestamp_;