Skip to content

Commit

Permalink
i#5199 warmup trace: Add a mode for L0_filter
Browse files Browse the repository at this point in the history
Add a separate L0_filter mode to enable switching from warmup/L0_filter
mode to the unfiltered mode.

Issue: #5199
  • Loading branch information
prasun3 committed Nov 8, 2022
1 parent e6694ac commit a62aa9e
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 18 deletions.
11 changes: 9 additions & 2 deletions clients/drcachesim/tracer/instr_counter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ instr_count_threshold()
static void
hit_instr_count_threshold(app_pc next_pc)
{
uintptr_t mode;
if (!has_instr_count_threshold_to_enable_tracing())
return;
#ifdef DELAYED_CHECK_INLINED
Expand All @@ -116,7 +117,8 @@ hit_instr_count_threshold(app_pc next_pc)
}
#endif
dr_mutex_lock(mutex);
if (tracing_mode.load(std::memory_order_acquire) == BBDUP_MODE_TRACE) {
if (tracing_mode.load(std::memory_order_acquire) == BBDUP_MODE_TRACE ||
tracing_mode.load(std::memory_order_acquire) == BBDUP_MODE_L0_FILTER) {
// Another thread already changed the mode.
dr_mutex_unlock(mutex);
return;
Expand All @@ -142,7 +144,12 @@ hit_instr_count_threshold(app_pc next_pc)
instr_count = 0;
#endif
DR_ASSERT(tracing_mode.load(std::memory_order_acquire) == BBDUP_MODE_COUNT);
tracing_mode.store(BBDUP_MODE_TRACE, std::memory_order_release);

if (need_l0_filter_mode)
mode = BBDUP_MODE_L0_FILTER;
else
mode = BBDUP_MODE_TRACE;
tracing_mode.store(mode, std::memory_order_release);
dr_mutex_unlock(mutex);
}

Expand Down
7 changes: 5 additions & 2 deletions clients/drcachesim/tracer/output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ reached_traced_instrs_threshold(void *drcontext)
tracing_window.fetch_add(1, std::memory_order_release);
// We delay creating a new ouput dir until tracing is enabled again, to avoid
// an empty final dir.
DR_ASSERT(tracing_mode.load(std::memory_order_acquire) == BBDUP_MODE_TRACE);
DR_ASSERT(tracing_mode.load(std::memory_order_acquire) == BBDUP_MODE_TRACE ||
tracing_mode.load(std::memory_order_acquire) == BBDUP_MODE_L0_FILTER);
tracing_mode.store(BBDUP_MODE_COUNT, std::memory_order_release);
cur_window_instr_count.store(0, std::memory_order_release);
dr_mutex_unlock(mutex);
Expand Down Expand Up @@ -1112,7 +1113,8 @@ init_thread_io(void *drcontext)
set_local_window(drcontext, tracing_window.load(std::memory_order_acquire));

if (op_offline.get_value()) {
if (tracing_mode.load(std::memory_order_acquire) == BBDUP_MODE_TRACE) {
if (tracing_mode.load(std::memory_order_acquire) == BBDUP_MODE_TRACE ||
tracing_mode.load(std::memory_order_acquire) == BBDUP_MODE_L0_FILTER) {
open_new_thread_file(drcontext, get_local_window(data));
}
if (!has_tracing_windows()) {
Expand Down Expand Up @@ -1146,6 +1148,7 @@ exit_thread_io(void *drcontext)
per_thread_t *data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx);

if (tracing_mode.load(std::memory_order_acquire) == BBDUP_MODE_TRACE ||
tracing_mode.load(std::memory_order_acquire) == BBDUP_MODE_L0_FILTER ||
(has_tracing_windows() && !op_split_windows.get_value()) ||
// For attach we switch to BBDUP_MODE_NOP but still need to finalize
// each thread. However, we omit threads that did nothing the entire time
Expand Down
45 changes: 31 additions & 14 deletions clients/drcachesim/tracer/tracer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,8 @@ instru_notify(uint level, const char *fmt, ...)
*/
std::atomic<ptr_int_t> tracing_mode;

bool need_l0_filter_mode;

static dr_emit_flags_t
event_bb_analysis(void *drcontext, void *tag, instrlist_t *bb, bool for_trace,
bool translating, void **user_data);
Expand All @@ -219,7 +221,7 @@ event_bb_analysis_cleanup(void *drcontext, void *user_data);

static dr_emit_flags_t
event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *instr,
instr_t *where, bool for_trace, bool translating,
instr_t *where, bool for_trace, bool translating, uintptr_t mode,
void *orig_analysis_data, void *user_data);

static dr_emit_flags_t
Expand All @@ -245,6 +247,7 @@ event_bb_setup(void *drbbdup_ctx, void *drcontext, void *tag, instrlist_t *bb,
DR_ASSERT(enable_dups != NULL && enable_dynamic_handling != NULL);
if (bbdup_duplication_enabled()) {
*enable_dups = true;
/* make sure to update opts.non_default_case_limit if adding an encoding here */
drbbdup_status_t res;
if (align_attach_detach_endpoints()) {
res = drbbdup_register_case_encoding(drbbdup_ctx, BBDUP_MODE_NOP);
Expand All @@ -254,6 +257,10 @@ event_bb_setup(void *drbbdup_ctx, void *drcontext, void *tag, instrlist_t *bb,
res = drbbdup_register_case_encoding(drbbdup_ctx, BBDUP_MODE_COUNT);
DR_ASSERT(res == DRBBDUP_SUCCESS);
}
if (need_l0_filter_mode) {
res = drbbdup_register_case_encoding(drbbdup_ctx, BBDUP_MODE_L0_FILTER);
DR_ASSERT(res == DRBBDUP_SUCCESS);
}
// XXX i#2039: We have possible future use cases for BBDUP_MODE_FUNC_ONLY
// to track functions during no-tracing periods, possibly replacing the
// NOP mode for some of those. For now it is not enabled.
Expand Down Expand Up @@ -289,7 +296,7 @@ event_bb_analyze_case(void *drcontext, void *tag, instrlist_t *bb, bool for_trac
bool translating, uintptr_t mode, void *user_data,
void *orig_analysis_data, void **analysis_data)
{
if (mode == BBDUP_MODE_TRACE) {
if (mode == BBDUP_MODE_TRACE || mode == BBDUP_MODE_L0_FILTER) {
return event_bb_analysis(drcontext, tag, bb, for_trace, translating,
analysis_data);
} else if (mode == BBDUP_MODE_COUNT) {
Expand All @@ -308,7 +315,7 @@ static void
event_bb_analyze_case_cleanup(void *drcontext, uintptr_t mode, void *user_data,
void *orig_analysis_data, void *analysis_data)
{
if (mode == BBDUP_MODE_TRACE)
if (mode == BBDUP_MODE_TRACE || mode == BBDUP_MODE_L0_FILTER)
event_bb_analysis_cleanup(drcontext, analysis_data);
else if (mode == BBDUP_MODE_COUNT)
; /* no cleanup needed */
Expand All @@ -326,9 +333,10 @@ event_app_instruction_case(void *drcontext, void *tag, instrlist_t *bb, instr_t
uintptr_t mode, void *user_data, void *orig_analysis_data,
void *analysis_data)
{
if (mode == BBDUP_MODE_TRACE) {
if (mode == BBDUP_MODE_TRACE || mode == BBDUP_MODE_L0_FILTER) {
return event_app_instruction(drcontext, tag, bb, instr, where, for_trace,
translating, orig_analysis_data, analysis_data);
translating, mode, orig_analysis_data,
analysis_data);
} else if (mode == BBDUP_MODE_COUNT) {
// This includes func_trace_disabled_instrument_event() for drwrap cleanup.
return event_inscount_app_instruction(drcontext, tag, bb, instr, where, for_trace,
Expand Down Expand Up @@ -381,6 +389,8 @@ instrumentation_drbbdup_init()
++opts.non_default_case_limit; // BBDUP_MODE_NOP.
if (bbdup_instr_counting_enabled())
++opts.non_default_case_limit; // BBDUP_MODE_COUNT.
if (need_l0_filter_mode)
++opts.non_default_case_limit; // BBDUP_MODE_COUNT.
// Save per-thread heap for a feature we do not need.
opts.never_enable_dynamic_handling = true;
drbbdup_status_t res = drbbdup_init(&opts);
Expand All @@ -404,6 +414,8 @@ instrumentation_init()
tracing_mode.store(BBDUP_MODE_NOP, std::memory_order_release);
else if (op_trace_after_instrs.get_value() != 0)
tracing_mode.store(BBDUP_MODE_COUNT, std::memory_order_release);
else if (need_l0_filter_mode)
tracing_mode.store(BBDUP_MODE_L0_FILTER, std::memory_order_release);

#ifdef DELAYED_CHECK_INLINED
drx_init();
Expand Down Expand Up @@ -1021,14 +1033,17 @@ is_last_instr(void *drcontext, instr_t *instr)
*/
static dr_emit_flags_t
event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *instr,
instr_t *where, bool for_trace, bool translating,
instr_t *where, bool for_trace, bool translating, uintptr_t mode,
void *orig_analysis_data, void *user_data)
{
int i, adjust = 0;
user_data_t *ud = (user_data_t *)user_data;
reg_id_t reg_ptr;
drvector_t rvec;
dr_emit_flags_t flags = DR_EMIT_DEFAULT;
bool is_l0_filter = (op_L0I_filter.get_value() || op_L0D_filter.get_value());
if (need_l0_filter_mode)
is_l0_filter = (mode == BBDUP_MODE_L0_FILTER && is_l0_filter);

// We need drwrap's instrumentation to go first so that function trace
// entries will not be appended to the middle of a BB's PC and Memory Access
Expand All @@ -1040,8 +1055,7 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst

drmgr_disable_auto_predication(drcontext, bb);

if ((op_L0I_filter.get_value() || op_L0D_filter.get_value()) && ud->repstr &&
is_first_nonlabel(drcontext, instr)) {
if (is_l0_filter && ud->repstr && is_first_nonlabel(drcontext, instr)) {
// XXX: the control flow added for repstr ends up jumping over the
// aflags spill for the memref, yet it hits the lazily-delayed aflags
// restore. We don't have a great solution (repstr violates drreg's
Expand Down Expand Up @@ -1118,7 +1132,7 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst
// flow and other complications that could cause us to skip an instruction.
!drmgr_in_emulation_region(drcontext, NULL) &&
// We can't bundle with a filter.
!(op_L0I_filter.get_value() || op_L0D_filter.get_value()) &&
!is_l0_filter &&
// The delay instr buffer is not full.
ud->num_delay_instrs < MAX_NUM_DELAY_INSTRS) {
ud->delay_instrs[ud->num_delay_instrs++] = instr_fetch;
Expand Down Expand Up @@ -1149,7 +1163,7 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst
instr_t *skip_instru = INSTR_CREATE_label(drcontext);
reg_id_t reg_skip = DR_REG_NULL;
reg_id_set_t app_regs_at_skip;
if (!(op_L0I_filter.get_value() || op_L0D_filter.get_value())) {
if (!is_l0_filter) {
insert_load_buf_ptr(drcontext, bb, where, reg_ptr);
if (thread_filtering_enabled) {
bool short_reaches = false;
Expand Down Expand Up @@ -1225,7 +1239,7 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst
* assuming the clean call does not need the two register values.
*/
if (is_last_instr(drcontext, instr)) {
if (op_L0I_filter.get_value() || op_L0D_filter.get_value())
if (is_l0_filter)
insert_load_buf_ptr(drcontext, bb, where, reg_ptr);
instrument_clean_call(drcontext, bb, where, reg_ptr);
}
Expand Down Expand Up @@ -1302,7 +1316,8 @@ static bool
event_pre_syscall(void *drcontext, int sysnum)
{
per_thread_t *data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx);
if (tracing_mode.load(std::memory_order_acquire) != BBDUP_MODE_TRACE)
if (tracing_mode.load(std::memory_order_acquire) != BBDUP_MODE_TRACE &&
tracing_mode.load(std::memory_order_acquire) != BBDUP_MODE_L0_FILTER)
return true;
if (BUF_PTR(data->seg_base) == NULL)
return true; /* This thread was filtered out. */
Expand Down Expand Up @@ -1347,7 +1362,8 @@ event_post_syscall(void *drcontext, int sysnum)
#ifdef BUILD_PT_TRACER
if (!op_offline.get_value() || !op_enable_kernel_tracing.get_value())
return;
if (tracing_mode.load(std::memory_order_acquire) != BBDUP_MODE_TRACE)
if (tracing_mode.load(std::memory_order_acquire) != BBDUP_MODE_TRACE &&
tracing_mode.load(std::memory_order_acquire) != BBDUP_MODE_L0_FILTER)
return;
if (!syscall_pt_trace_t::is_syscall_pt_trace_enabled(sysnum))
return;
Expand Down Expand Up @@ -1382,7 +1398,8 @@ event_kernel_xfer(void *drcontext, const dr_kernel_xfer_info_t *info)
per_thread_t *data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx);
trace_marker_type_t marker_type;
uintptr_t marker_val = 0;
if (tracing_mode.load(std::memory_order_acquire) != BBDUP_MODE_TRACE)
if (tracing_mode.load(std::memory_order_acquire) != BBDUP_MODE_TRACE &&
tracing_mode.load(std::memory_order_acquire) != BBDUP_MODE_L0_FILTER)
return;
if (BUF_PTR(data->seg_base) == NULL)
return; /* This thread was filtered out. */
Expand Down
2 changes: 2 additions & 0 deletions clients/drcachesim/tracer/tracer.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ extern std::atomic<ptr_int_t> tracing_mode;
extern std::atomic<ptr_int_t> tracing_window;
extern bool attached_midway;
extern std::atomic<uint64> attached_timestamp;
extern bool need_l0_filter_mode;

/* We have multiple modes. While just 2 results in a more efficient dispatch,
* the power of extra modes justifies the extra overhead.
Expand All @@ -168,6 +169,7 @@ enum {
BBDUP_MODE_COUNT = 1, /* Instr counting for delayed tracing or trace windows. */
BBDUP_MODE_FUNC_ONLY = 2, /* Function tracing during no-full-trace periods. */
BBDUP_MODE_NOP = 3, /* No tracing or counting for pre-attach or post-detach. */
BBDUP_MODE_L0_FILTER = 4, /* Full address tracing with L0_filter */
};

#if defined(X86_64) || defined(AARCH64)
Expand Down

0 comments on commit a62aa9e

Please sign in to comment.