Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#2039 trace trim, part 3: Add nop mode to drmemtrace #5700

Merged
merged 4 commits into from
Oct 25, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api/docs/release.dox
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ Further non-compatibility-affecting changes include:
dr_register_pre_detach_event(), and dr_unregister_pre_detach_event().
- Added insruction encodings to drmemtrace offline traces.
- Added drmemtrace_replace_file_ops_ex().
- Added -align_endpoints to drmemtrace to avoid uneven attach/detach periods.

The changes between version 9.0.1 and 9.0.0 include the following compatibility
changes:
Expand Down
13 changes: 13 additions & 0 deletions clients/drcachesim/common/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,19 @@ droption_t<bytesize_t> op_max_global_trace_refs(
"This is similar to -exit_after_tracing but without terminating the process."
"The reference count is approximate.");

droption_t<bool> op_align_endpoints(
// XXX i#2039,i#5686: Make this true by default (and maybe remove it altogether) once
// robustness issues with drbbdup are fixed (restore state for scatter/gather and
// other libs; yet-diagnosed other state restore issues).
derekbruening marked this conversation as resolved.
Show resolved Hide resolved
DROPTION_SCOPE_CLIENT, "align_endpoints", false,
"Nop tracing when partially attached or detached",
"When using attach/detach to trace a burst, the attach and detach processes are "
"staggered, with the set of threads producing trace data incrementally growing or "
"shrinking. This results in uneven thread activity at the start and end of the "
"burst. If this option is enabled, tracing is nop-ed until fully attached to "
"all threads and is nop-ed as soon as detach starts, eliminating the unevenness. "
"This also allows omitting threads that did nothing during the burst.");

droption_t<bytesize_t> op_trace_after_instrs(
DROPTION_SCOPE_CLIENT, "trace_after_instrs", 0,
"Do not start tracing until N instructions",
Expand Down
1 change: 1 addition & 0 deletions clients/drcachesim/common/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ extern droption_t<unsigned int> op_virt2phys_freq;
extern droption_t<bool> op_cpu_scheduling;
extern droption_t<bytesize_t> op_max_trace_size;
extern droption_t<bytesize_t> op_max_global_trace_refs;
extern droption_t<bool> op_align_endpoints;
extern droption_t<bytesize_t> op_trace_after_instrs;
extern droption_t<bytesize_t> op_trace_for_instrs;
extern droption_t<bytesize_t> op_retrace_every_instrs;
Expand Down
3 changes: 2 additions & 1 deletion clients/drcachesim/tests/burst_replace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,8 @@ post_process()
dir.modfile_bytes_, parse_cb, process_cb, MAGIC_VALUE, free_cb);
assert(module_mapper->get_last_error().empty());
// Test back-compat of deprecated APIs.
raw2trace_t raw2trace(dir.modfile_bytes_, dir.in_files_, dir.out_files_, {});
raw2trace_t raw2trace(dir.modfile_bytes_, dir.in_files_, dir.out_files_,
dir.out_archives_);
std::string error =
raw2trace.handle_custom_data(parse_cb, process_cb, MAGIC_VALUE, free_cb);
assert(error.empty());
Expand Down
79 changes: 69 additions & 10 deletions clients/drcachesim/tests/burst_threads.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2016-2018 Google, Inc. All rights reserved.
* Copyright (c) 2016-2022 Google, Inc. All rights reserved.
* **********************************************************/

/*
Expand Down Expand Up @@ -53,9 +53,13 @@
#include "../../../suite/tests/condvar.h"

static const int num_threads = 8;
static const int num_idle_threads = 40;
static const int burst_owner = 4;
static bool finished[num_threads];
static void *burst_owner_starting;
static void *burst_owner_finished;
static void *idle_thread_started[num_idle_threads];
static void *idle_should_exit;

bool
my_setenv(const char *var, const char *value)
Expand Down Expand Up @@ -105,6 +109,11 @@ void *
assert(res == 0);
#endif

if (idx != burst_owner)
wait_cond_var(burst_owner_starting);
else
signal_cond_var(burst_owner_starting);

/* We use an outer loop to test re-attaching (i#2157). */
for (int j = 0; j < reattach_iters; ++j) {
if (idx == burst_owner) {
Expand Down Expand Up @@ -153,26 +162,64 @@ void *
return 0;
}

#ifdef WINDOWS
unsigned int __stdcall
#else
void *
#endif
idle_thread_func(void *arg)
{
unsigned int idx = (unsigned int)(uintptr_t)arg;
signal_cond_var(idle_thread_started[idx]);
wait_cond_var(idle_should_exit);
return 0;
}

int
main(int argc, const char *argv[])
{
#ifdef UNIX
pthread_t thread[num_threads];
pthread_t idle_thread[num_idle_threads];
#else
uintptr_t thread[num_threads];
uintptr_t idle_thread[num_idle_threads];
#endif

/* While the start/stop thread only runs 4 iters, the other threads end up
* running more and their trace files get up to 65MB or more, with the
* merged result several GB's: too much for a test. We thus cap each thread.
*/
if (!my_setenv("DYNAMORIO_OPTIONS",
// We set -disable_traces to help stress state recreation
// in drbbdup with prefixes on every block.
"-stderr_mask 0xc -disable_traces -client_lib ';;"
"-offline -max_trace_size 256K'"))
// While the start/stop thread only runs 4 iters, the other threads end up
// running more and their trace files get up to 65MB or more, with the
// merged result several GB's: too much for a test. We thus cap each thread.
// We set -disable_traces to help stress state recreation
// in drbbdup with prefixes on every block.
std::string ops = std::string(
"-stderr_mask 0xc -disable_traces -client_lib ';;-offline -align_endpoints "
"-max_trace_size 256K ");
/* Support passing in extra tracer options. */
for (int i = 1; i < argc; ++i)
ops += std::string(argv[i]) + " ";
ops += "'";
if (!my_setenv("DYNAMORIO_OPTIONS", ops.c_str()))
std::cerr << "failed to set env var!\n";

// Create some threads that do nothing to test -align_endpoints omitting them.
// On Linux, DR's attach wakes these up and the auto-restart SYS_futex runs
// one syscall instruction which depending on end-of-attach timing may be
// emitted and so the thread will show up. But with enough threads we can be
// pretty sure very few of them will be in the trace.
idle_should_exit = create_cond_var();
for (uint i = 0; i < num_idle_threads; i++) {
idle_thread_started[i] = create_cond_var();
#ifdef UNIX
pthread_create(&idle_thread[i], NULL, idle_thread_func, (void *)(uintptr_t)i);
#else
idle_thread[i] =
_beginthreadex(NULL, 0, idle_thread_func, (void *)(uintptr_t)i, 0, NULL);
#endif
wait_cond_var(idle_thread_started[i]);
}

// Create the main thread pool.
burst_owner_starting = create_cond_var();
burst_owner_finished = create_cond_var();
for (uint i = 0; i < num_threads; i++) {
#ifdef UNIX
Expand All @@ -192,7 +239,19 @@ main(int argc, const char *argv[])
if (!finished[i])
std::cerr << "thread " << i << " failed to finish\n";
}
// Exit the idle threads.
signal_cond_var(idle_should_exit);
for (uint i = 0; i < num_idle_threads; i++) {
#ifdef UNIX
pthread_join(idle_thread[i], NULL);
#else
WaitForSingleObject((HANDLE)idle_thread[i], INFINITE);
#endif
destroy_cond_var(idle_thread_started[i]);
}
std::cerr << "all done\n";
destroy_cond_var(burst_owner_starting);
destroy_cond_var(burst_owner_finished);
destroy_cond_var(idle_should_exit);
return 0;
}
5 changes: 5 additions & 0 deletions clients/drcachesim/tests/offline-burst_replace.templatex
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,18 @@ create dir .*
create dir .*
open file .*
open file .*
open file .*
open file .*
pre-DR start
0: writing [0-9]+ bytes .*
1: writing [0-9]+ bytes .*
restore the write file function
pre-DR detach
close file .*
2: writing [0-9]+ bytes .*
3: writing [0-9]+ bytes .*
close file .*
close file .*
close file .*
all done
Cache simulation results:
Expand Down
25 changes: 25 additions & 0 deletions clients/drcachesim/tests/offline-burst_threads_counts.templatex
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
pre-DR init
pre-DR start
pre-DR detach
pre-DR init
pre-DR start
pre-DR detach
pre-DR init
pre-DR start
pre-DR detach
pre-DR init
pre-DR start
pre-DR detach
all done
Basic counts tool results:
Total counts:
.* total \(fetched\) instructions
.* total unique \(fetched\) instructions
.* total non-fetched instructions
.* total prefetches
.* total data loads
.* total data stores
.* total icache flushes
.* total dcache flushes
[ 1][0-9] total threads
.*
6 changes: 3 additions & 3 deletions clients/drcachesim/tracer/instr_counter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ hit_instr_count_threshold(app_pc next_pc)
}
#endif
dr_mutex_lock(mutex);
if (tracing_disabled.load(std::memory_order_acquire) == BBDUP_MODE_TRACE) {
if (tracing_mode.load(std::memory_order_acquire) == BBDUP_MODE_TRACE) {
// Another thread already changed the mode.
dr_mutex_unlock(mutex);
return;
Expand All @@ -141,8 +141,8 @@ hit_instr_count_threshold(app_pc next_pc)
// portably safe to take the address of std::atomic, so we rely on our mutex.
instr_count = 0;
#endif
DR_ASSERT(tracing_disabled.load(std::memory_order_acquire) == BBDUP_MODE_COUNT);
tracing_disabled.store(BBDUP_MODE_TRACE, std::memory_order_release);
DR_ASSERT(tracing_mode.load(std::memory_order_acquire) == BBDUP_MODE_COUNT);
tracing_mode.store(BBDUP_MODE_TRACE, std::memory_order_release);
dr_mutex_unlock(mutex);
}

Expand Down
20 changes: 14 additions & 6 deletions clients/drcachesim/tracer/output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@ reached_traced_instrs_threshold(void *drcontext)
tracing_window.fetch_add(1, std::memory_order_release);
// We delay creating a new ouput dir until tracing is enabled again, to avoid
// an empty final dir.
DR_ASSERT(tracing_disabled.load(std::memory_order_acquire) == BBDUP_MODE_TRACE);
tracing_disabled.store(BBDUP_MODE_COUNT, std::memory_order_release);
DR_ASSERT(tracing_mode.load(std::memory_order_acquire) == BBDUP_MODE_TRACE);
tracing_mode.store(BBDUP_MODE_COUNT, std::memory_order_release);
cur_window_instr_count.store(0, std::memory_order_release);
dr_mutex_unlock(mutex);
}
Expand Down Expand Up @@ -911,7 +911,8 @@ process_and_output_buffer(void *drcontext, bool skip_size_cap)

if (op_offline.get_value() && data->file == INVALID_FILE) {
// We've delayed opening a new window file to avoid an empty final file.
DR_ASSERT(has_tracing_windows() || op_trace_after_instrs.get_value() > 0);
DR_ASSERT(has_tracing_windows() || op_trace_after_instrs.get_value() > 0 ||
attached_to_process);
derekbruening marked this conversation as resolved.
Show resolved Hide resolved
open_new_thread_file(drcontext, get_local_window(data));
}

Expand Down Expand Up @@ -1087,7 +1088,7 @@ init_thread_io(void *drcontext)
set_local_window(drcontext, tracing_window.load(std::memory_order_acquire));

if (op_offline.get_value()) {
if (tracing_disabled.load(std::memory_order_acquire) == BBDUP_MODE_TRACE) {
if (tracing_mode.load(std::memory_order_acquire) == BBDUP_MODE_TRACE) {
open_new_thread_file(drcontext, get_local_window(data));
}
if (!has_tracing_windows()) {
Expand Down Expand Up @@ -1115,8 +1116,15 @@ exit_thread_io(void *drcontext)
{
per_thread_t *data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx);

if (tracing_disabled.load(std::memory_order_acquire) == BBDUP_MODE_TRACE ||
!op_split_windows.get_value()) {
if (tracing_mode.load(std::memory_order_acquire) == BBDUP_MODE_TRACE ||
(has_tracing_windows() && !op_split_windows.get_value()) ||
// For attach we switch to BBDUP_MODE_NOP but still need to finalize
// each thread. However, we omit threads that did nothing the entire time
// we were attached.
(align_attach_detach_endpoints() &&
(data->bytes_written > 0 ||
BUF_PTR(data->seg_base) - data->buf_base >
static_cast<ssize_t>(data->init_header_size + buf_hdr_slots_size)))) {
BUF_PTR(data->seg_base) += instru->append_thread_exit(
BUF_PTR(data->seg_base), dr_get_thread_id(drcontext));

Expand Down
Loading