i#6712 record bounds: Add record filter sanity checks (#6749)

Adds two sanity checks developed to identify what was at first believed to be a new bug but turned out to be #6712: + Ensure that last_encoding is empty on an input switch, to avoid recording the wrong encoding in pc2encoding. + Ensure the encoding size matches the instr size when inserting chunk-new encodings. + Augment the reader check for mismatching encoding vs instr sizes, and abort in release build too. Although #6712 is fixed, these checks are still useful to prevent regressions. Plus, the reader_t check detects other bugs as well and we know at least one other is out there in #6303. Making the reader check abort in release build should help avoid wasted work as already happened here where we didn't notice the printed warnings in release build. Tested by running record_filter on large proprietary inputs: no checks failed. Issue: #6712
DynamoRIO · Apr 2, 2024 · 0838ea7 · 0838ea7
1 parent 60eb0b1
commit 0838ea7
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 6 deletions.
diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp
@@ -49,6 +49,13 @@
 namespace dynamorio {
 namespace drmemtrace {
 
+// We want to abort in release build for some cases.
+#define assert_release_too(cond) \
+    do {                         \
+        if (!(cond))             \
+            abort();             \
+    } while (0)
+
 // Work around clang-format bug: no newline after return type for single-char operator.
 // clang-format off
 const memref_t &
@@ -204,9 +211,14 @@ reader_t::process_input_entry()
             // Look for encoding bits that belong to this instr.
             if (last_encoding_.size > 0) {
                 if (last_encoding_.size != cur_ref_.instr.size) {
-                    ERRMSG("Encoding size %zu != instr size %zu for PC 0x%zx\n",
-                           last_encoding_.size, cur_ref_.instr.size, cur_ref_.instr.addr);
-                    assert(false);
+                    ERRMSG(
+                        "Encoding size %zu != instr size %zu for PC 0x%zx at ord %" PRIu64
+                        " instr %" PRIu64 " last_timestamp=0x%" PRIx64 "\n",
+                        last_encoding_.size, cur_ref_.instr.size, cur_ref_.instr.addr,
+                        get_record_ordinal(), get_instruction_ordinal(),
+                        get_last_timestamp());
+                    // Encoding errors indicate serious problems so we always abort.
+                    assert_release_too(false);
                 }
                 memcpy(cur_ref_.instr.encoding, last_encoding_.bits, last_encoding_.size);
                 cur_ref_.instr.encoding_is_new = true;
@@ -222,7 +234,8 @@ reader_t::process_input_entry()
                            // in this mode.
                            !core_sharded_) {
                     ERRMSG("Missing encoding for 0x%zx\n", cur_ref_.instr.addr);
-                    assert(false);
+                    // Encoding errors indicate serious problems so we always abort.
+                    assert_release_too(false);
                 }
             }
             last_encoding_.size = 0;

diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp
@@ -603,8 +603,18 @@ record_filter_t::process_chunk_encodings(per_shard_t *per_shard, trace_entry_t &
             VPRINT(this, 3,
                    "output new-chunk encoding chunk=%" PRIu64 " ref=%" PRIu64 "\n",
                    per_shard->chunk_ordinal, per_shard->cur_refs);
-            if (!write_trace_entries(per_shard,
-                                     per_shard->per_input->pc2encoding[entry.addr])) {
+            // Sanity check that the encoding size is correct.
+            const auto &enc = per_shard->per_input->pc2encoding[entry.addr];
+            size_t enc_sz = 0;
+            // Since all but the last entry are fixed-size we could avoid a loop
+            // but the loop is easier to read and we have just 1 or 2 iters.
+            for (const auto &record : enc)
+                enc_sz += record.size;
+            if (enc_sz != entry.size) {
+                return "New-chunk encoding size " + std::to_string(enc_sz) +
+                    " != instr size " + std::to_string(entry.size);
+            }
+            if (!write_trace_entries(per_shard, enc)) {
                 return "Failed to write";
             }
             // Avoid emitting the encoding twice.
@@ -708,6 +718,12 @@ record_filter_t::parallel_shard_memref(void *shard_data, const trace_entry_t &in
         // It would be nice to assert that this pointer is not in use in other shards
         // but that is too expensive.
         per_shard->per_input = it->second.get();
+        // Not supposed to see a switch that splits an encoding from its instr.
+        // That would cause recording an incorrect encoding into pc2encoding.
+        if (!per_shard->last_encoding.empty()) {
+            per_shard->error = "Input switch immediately after encoding not supported";
+            return false;
+        }
     }
     if (per_shard->enabled && stop_timestamp_ != 0 &&
         per_shard->shard_stream->get_last_timestamp() >= stop_timestamp_) {