diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index 9771714fc4d..490162eaabb 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -560,6 +560,14 @@ typedef enum { */ TRACE_MARKER_TYPE_SYSCALL_FAILED, + /** + * This marker is emitted prior to a system call that causes an immediate switch to + * another thread on the same core (with the current thread entering an unscheduled + * state), bypassing the kernel scheduler's normal dynamic switch code based on run + * queues. The marker value holds the thread id of the target thread. + */ + TRACE_MARKER_TYPE_DIRECT_THREAD_SWITCH, + // ... // These values are reserved for future built-in marker types. // ... diff --git a/clients/drcachesim/tools/view.cpp b/clients/drcachesim/tools/view.cpp index eb6eca236dc..a8a0c8b5337 100644 --- a/clients/drcachesim/tools/view.cpp +++ b/clients/drcachesim/tools/view.cpp @@ -410,6 +410,10 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref) case TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL: std::cerr << "\n"; break; + case TRACE_MARKER_TYPE_DIRECT_THREAD_SWITCH: + std::cerr << "\n"; + break; case TRACE_MARKER_TYPE_WINDOW_ID: // Handled above. break; diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index a00d7dbcd7c..362c0bc505a 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -585,62 +585,12 @@ raw2trace_t::process_offline_entry(raw2trace_thread_data_t *tdata, uintptr_t marker_val = 0; if (!get_marker_value(tdata, &in_entry, &marker_val)) return false; - buf += trace_metadata_writer_t::write_marker( - buf, (trace_marker_type_t)in_entry->extended.valueB, marker_val); - if (in_entry->extended.valueB == TRACE_MARKER_TYPE_KERNEL_EVENT) { - log(4, "Signal/exception between bbs\n"); - // An rseq side exit may next hit a signal which is then the - // boundary of the rseq region. - if (tdata->rseq_past_end_) { - if (!adjust_and_emit_rseq_buffer(tdata, marker_val)) - return false; - } - } else if (in_entry->extended.valueB == TRACE_MARKER_TYPE_RSEQ_ABORT) { - log(4, "Rseq abort %d\n", tdata->rseq_past_end_); - if (!adjust_and_emit_rseq_buffer(tdata, marker_val, marker_val)) - return false; - } else if (in_entry->extended.valueB == TRACE_MARKER_TYPE_RSEQ_ENTRY) { - if (tdata->rseq_want_rollback_) { - if (tdata->rseq_buffering_enabled_) { - // Our rollback schemes do the minimal rollback: for a side - // exit, taking the last branch. This means we don't need the - // prior iterations in the buffer. - log(4, "Rseq was already buffered: assuming loop; emitting\n"); - if (!adjust_and_emit_rseq_buffer(tdata, marker_val)) - return false; - } - log(4, - "--- Reached rseq entry (end=0x%zx): buffering all output ---\n", - marker_val); - if (!tdata->rseq_ever_saw_entry_) - tdata->rseq_ever_saw_entry_ = true; - tdata->rseq_buffering_enabled_ = true; - tdata->rseq_end_pc_ = marker_val; - } - } else if (in_entry->extended.valueB == TRACE_MARKER_TYPE_FILTER_ENDPOINT) { - log(2, "Reached filter endpoint\n"); - - // The file type needs to be updated during the switch to correctly - // process the entries that follow after. This does not affect the - // written-out type. - int file_type = get_file_type(tdata); - // We do not remove OFFLINE_FILE_TYPE_BIMODAL_FILTERED_WARMUP here - // because that still stands true for this trace. - file_type &= ~(OFFLINE_FILE_TYPE_FILTERED | OFFLINE_FILE_TYPE_IFILTERED | - OFFLINE_FILE_TYPE_DFILTERED); - set_file_type(tdata, (offline_file_type_t)file_type); - - // For the full trace, the cache contains block-level info unlike the - // filtered trace which contains instr-level info. Since we cannot use - // the decode cache entries after the transition, we need to flush the - // cache here. - *flush_decode_cache = true; - } else if (in_entry->extended.valueB == TRACE_MARKER_TYPE_SYSCALL && - is_maybe_blocking_syscall(marker_val)) { - log(2, "Maybe-blocking syscall %zu\n", marker_val); - buf += trace_metadata_writer_t::write_marker( - buf, TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL, 0); - } + trace_marker_type_t marker_type = + static_cast(in_entry->extended.valueB); + buf += trace_metadata_writer_t::write_marker(buf, marker_type, marker_val); + if (!process_marker_additionally(tdata, marker_type, marker_val, buf, + flush_decode_cache)) + return false; // If there is currently a delayed branch that has not been emitted yet, // delay most markers since intra-block markers can cause issues with // tools that do not expect markers amid records for a single instruction @@ -649,14 +599,13 @@ raw2trace_t::process_offline_entry(raw2trace_thread_data_t *tdata, // OFFLINE_TYPE_TIMESTAMP which is handled at a higher level in // process_next_thread_buffer() so there is no need to have a separate // check for it here. - if (in_entry->extended.valueB != TRACE_MARKER_TYPE_CPU_ID) { + if (marker_type != TRACE_MARKER_TYPE_CPU_ID) { if (delayed_branches_exist(tdata)) { return write_delayed_branches(tdata, buf_base, reinterpret_cast(buf)); } } - log(3, "Appended marker type %u value " PIFX "\n", - (trace_marker_type_t)in_entry->extended.valueB, + log(3, "Appended marker type %u value " PIFX "\n", marker_type, (uintptr_t)in_entry->extended.valueA); } else { std::stringstream ss; @@ -718,6 +667,68 @@ raw2trace_t::process_offline_entry(raw2trace_thread_data_t *tdata, return true; } +bool +raw2trace_t::process_marker_additionally(raw2trace_thread_data_t *tdata, + trace_marker_type_t marker_type, + uintptr_t marker_val, byte *&buf, + OUT bool *flush_decode_cache) +{ + if (marker_type == TRACE_MARKER_TYPE_KERNEL_EVENT) { + log(4, "Signal/exception between bbs\n"); + // An rseq side exit may next hit a signal which is then the + // boundary of the rseq region. + if (tdata->rseq_past_end_) { + if (!adjust_and_emit_rseq_buffer(tdata, marker_val)) + return false; + } + } else if (marker_type == TRACE_MARKER_TYPE_RSEQ_ABORT) { + log(4, "Rseq abort %d\n", tdata->rseq_past_end_); + if (!adjust_and_emit_rseq_buffer(tdata, marker_val, marker_val)) + return false; + } else if (marker_type == TRACE_MARKER_TYPE_RSEQ_ENTRY) { + if (tdata->rseq_want_rollback_) { + if (tdata->rseq_buffering_enabled_) { + // Our rollback schemes do the minimal rollback: for a side + // exit, taking the last branch. This means we don't need the + // prior iterations in the buffer. + log(4, "Rseq was already buffered: assuming loop; emitting\n"); + if (!adjust_and_emit_rseq_buffer(tdata, marker_val)) + return false; + } + log(4, "--- Reached rseq entry (end=0x%zx): buffering all output ---\n", + marker_val); + if (!tdata->rseq_ever_saw_entry_) + tdata->rseq_ever_saw_entry_ = true; + tdata->rseq_buffering_enabled_ = true; + tdata->rseq_end_pc_ = marker_val; + } + } else if (marker_type == TRACE_MARKER_TYPE_FILTER_ENDPOINT) { + log(2, "Reached filter endpoint\n"); + + // The file type needs to be updated during the switch to correctly + // process the entries that follow after. This does not affect the + // written-out type. + int file_type = get_file_type(tdata); + // We do not remove OFFLINE_FILE_TYPE_BIMODAL_FILTERED_WARMUP here + // because that still stands true for this trace. + file_type &= ~(OFFLINE_FILE_TYPE_FILTERED | OFFLINE_FILE_TYPE_IFILTERED | + OFFLINE_FILE_TYPE_DFILTERED); + set_file_type(tdata, (offline_file_type_t)file_type); + + // For the full trace, the cache contains block-level info unlike the + // filtered trace which contains instr-level info. Since we cannot use + // the decode cache entries after the transition, we need to flush the + // cache here. + *flush_decode_cache = true; + } else if (marker_type == TRACE_MARKER_TYPE_SYSCALL && + is_maybe_blocking_syscall(marker_val)) { + log(2, "Maybe-blocking syscall %zu\n", marker_val); + buf += trace_metadata_writer_t::write_marker( + buf, TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL, 0); + } + return true; +} + bool raw2trace_t::read_header(raw2trace_thread_data_t *tdata, OUT trace_header_t *header) { diff --git a/clients/drcachesim/tracer/raw2trace.h b/clients/drcachesim/tracer/raw2trace.h index ac5df453cf7..78580fe56a8 100644 --- a/clients/drcachesim/tracer/raw2trace.h +++ b/clients/drcachesim/tracer/raw2trace.h @@ -1132,6 +1132,15 @@ class raw2trace_t { thread_id_t tid, OUT bool *end_of_record, OUT bool *last_bb_handled, OUT bool *flush_decode_cache); + /** + * Performs any additional actions for the marker "marker_type" with value + * "marker_val", beyond writing out a marker record. New records can be written to + * "buf". Returns whether successful. + */ + virtual bool + process_marker_additionally(raw2trace_thread_data_t *tdata, + trace_marker_type_t marker_type, uintptr_t marker_val, + byte *&buf, OUT bool *flush_decode_cache); /** * Read the header of a thread, by calling get_next_entry() successively to * populate the header values. The timestamp field is populated only @@ -1266,6 +1275,10 @@ class raw2trace_t { modmap_ptr_ = modmap; } + /** Returns whether this system number *might* block. */ + virtual bool + is_maybe_blocking_syscall(uintptr_t number); + const module_mapper_t *modmap_ptr_ = nullptr; uint64 count_elided_ = 0; @@ -1492,9 +1505,6 @@ class raw2trace_t { bool should_omit_syscall(raw2trace_thread_data_t *tdata); - bool - is_maybe_blocking_syscall(uintptr_t number); - int worker_count_; std::vector> worker_tasks_;