Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#4462: Add new global memtrace size limit #4479

Merged
merged 4 commits into from
Oct 8, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions api/docs/release.dox
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ Further non-compatibility-affecting changes include:
CLIENT{32,64}_{ABS,REL} in tool files.
Added dr_get_client_info_ex() and dr_client_iterator_next_ex() to support
querying other-bitwidth client registration.
- Added a new drcachesim option \p -max_global_trace_refs for specifying a global
trace size limit that does not terminate the process.

**************************************************
<hr>
Expand Down
11 changes: 10 additions & 1 deletion clients/drcachesim/common/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,14 @@ droption_t<bytesize_t> op_max_trace_size(
"of one internal buffer. Once reached, instrumentation continues for that thread, "
"but no further data is recorded.");

droption_t<bytesize_t> op_max_global_trace_refs(
DROPTION_SCOPE_CLIENT, "max_global_trace_refs", 0,
"Cap on the total references traced",
abhinav92003 marked this conversation as resolved.
Show resolved Hide resolved
"If non-zero, this sets a maximum size on the amount of trace references recorded. "
"Once reached, instrumented execution continues, but no further data is recorded. "
"This is similar to -exit_after_tracing but without terminating the process."
"The reference count is approximate.");

droption_t<bytesize_t> op_trace_after_instrs(
DROPTION_SCOPE_CLIENT, "trace_after_instrs", 0,
"Do not start tracing until N instructions",
Expand All @@ -238,7 +246,8 @@ droption_t<bytesize_t> op_exit_after_tracing(
DROPTION_SCOPE_CLIENT, "exit_after_tracing", 0,
"Exit the process after tracing N references",
"If non-zero, after tracing the specified number of references, the process is "
"exited with an exit code of 0. The reference count is approximate.");
"exited with an exit code of 0. The reference count is approximate. "
"Use -max_global_trace_refs instead to avoid terminating the process.");

droption_t<bool> op_online_instr_types(
DROPTION_SCOPE_CLIENT, "online_instr_types", false,
Expand Down
1 change: 1 addition & 0 deletions clients/drcachesim/common/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ extern droption_t<bool> op_use_physical;
extern droption_t<unsigned int> op_virt2phys_freq;
extern droption_t<bool> op_cpu_scheduling;
extern droption_t<bytesize_t> op_max_trace_size;
extern droption_t<bytesize_t> op_max_global_trace_refs;
extern droption_t<bytesize_t> op_trace_after_instrs;
extern droption_t<bytesize_t> op_exit_after_tracing;
extern droption_t<bool> op_online_instr_types;
Expand Down
17 changes: 15 additions & 2 deletions clients/drcachesim/drcachesim.dox.in
Original file line number Diff line number Diff line change
Expand Up @@ -719,8 +719,21 @@ during a desired window of execution.

The \p -trace_after_instrs option delays tracing by the specified number of
dynamic instruction executions. This can be used to skip initialization
and arrive at the desired starting point. The trace's length can also be
limited by the \p -exit_after_tracing option.
and arrive at the desired starting point. The trace's length can be
limited in several ways:

- The \p -max_global_trace_refs option causes the recording of trace
data to cease once the specified threshold is exceeded by the sum of
all trace references across all threads. One trace reference entry
equals one recorded address, but due to post-processing expansion a
final offline line trace will be larger. Once recording ceases, the
application will continue to run. Threads that are newly created after
the threshold is reached will not appear in the trace.
- The \p -exit_after_tracing option similarly specifies a global trace
reference count, but once it is exceeded, the process is terminated.
- The \p -max_trace_size option sets a cap on the number of bytes written
by each thread. This is a per-thread limit, and if one thread hits the
limit it does not affect the trace recoding of other threads.

If the application can be modified, it can be linked with the \p drcachesim
tracer and use DynamoRIO's start/stop API routines dr_app_setup_and_start()
Expand Down
11 changes: 11 additions & 0 deletions clients/drcachesim/tests/delay-global.templatex
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Hit delay threshold: enabling tracing.
Hit -max_global_trace_refs: disabling tracing.
.*
Total Number Of iterations : 3
...................................................................
---- <application exited with code 0> ----
Basic counts tool results:
Total counts:
.*
1 total threads
.*
10 changes: 10 additions & 0 deletions clients/drcachesim/tests/offline-max-global.templatex
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Hit delay threshold: enabling tracing.
Hit -max_global_trace_refs: disabling tracing.
.*
Total Number Of iterations : 3
...................................................................
Basic counts tool results:
Total counts:
.*
1 total threads
.*
51 changes: 44 additions & 7 deletions clients/drcachesim/tracer/tracer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ static size_t max_buf_size;

static drvector_t scratch_reserve_vec;

/* thread private buffer and counter */
/* Thread private data. This is all set to 0 at thread init. */
typedef struct {
byte *seg_base;
byte *buf_base;
Expand All @@ -128,6 +128,8 @@ typedef struct {
/* For level 0 filters */
byte *l0_dcache;
byte *l0_icache;
/* For max output thresholds. */
bool output_disabled;
} per_thread_t;

#define MAX_NUM_DELAY_INSTRS 32
Expand Down Expand Up @@ -390,6 +392,13 @@ is_ok_to_split_before(trace_type_t type)
type == TRACE_TYPE_MARKER || type == TRACE_TYPE_THREAD_EXIT;
}

static inline bool
is_beyond_global_max(void)
abhinav92003 marked this conversation as resolved.
Show resolved Hide resolved
{
return op_max_global_trace_refs.get_value() > 0 &&
num_refs_racy > op_max_global_trace_refs.get_value();
}

static void
memtrace(void *drcontext, bool skip_size_cap)
{
Expand All @@ -413,13 +422,29 @@ memtrace(void *drcontext, bool skip_size_cap)
dr_get_thread_id(drcontext));
pipe_start = data->buf_base;
pipe_end = pipe_start;
if (!skip_size_cap && op_max_trace_size.get_value() > 0 &&
data->bytes_written > op_max_trace_size.get_value()) {
if (!skip_size_cap &&
(data->output_disabled ||
derekbruening marked this conversation as resolved.
Show resolved Hide resolved
(((op_max_trace_size.get_value() > 0 &&
data->bytes_written > op_max_trace_size.get_value()) ||
abhinav92003 marked this conversation as resolved.
Show resolved Hide resolved
is_beyond_global_max())))) {
/* We don't guarantee to match the limit exactly so we allow one buffer
* beyond. We also don't put much effort into reducing overhead once
* beyond the limit: we still instrument and come here.
*/
do_write = false;
if (!data->output_disabled && is_beyond_global_max()) {
derekbruening marked this conversation as resolved.
Show resolved Hide resolved
data->output_disabled = true;
/* std::atomic *should* be safe (we can assert std::atomic_is_lock_free())
* but to avoid any risk we use DR's atomics and add 1. This will only
* happen once per thread so the int should never overflow (even if it does
* an extra print is not disastrous).
*/
static int notify_once;
int count = dr_atomic_add32_return_sum(&notify_once, 1);
if (count == 1) {
abhinav92003 marked this conversation as resolved.
Show resolved Hide resolved
NOTIFY(0, "Hit -max_global_trace_refs: disabling tracing.\n");
}
}
} else
data->bytes_written += buf_ptr - pipe_start;

Expand Down Expand Up @@ -1555,9 +1580,10 @@ event_thread_init(void *drcontext)
data->seg_base = (byte *)dr_get_dr_segment_base(tls_seg);
DR_ASSERT(data->seg_base != NULL);

if (should_trace_thread_cb != NULL &&
!(*should_trace_thread_cb)(dr_get_thread_id(drcontext),
trace_thread_cb_user_data))
if ((should_trace_thread_cb != NULL &&
!(*should_trace_thread_cb)(dr_get_thread_id(drcontext),
trace_thread_cb_user_data)) ||
is_beyond_global_max())
BUF_PTR(data->seg_base) = NULL;
else {
create_buffer(data);
Expand All @@ -1582,7 +1608,11 @@ event_thread_exit(void *drcontext)
BUF_PTR(data->seg_base) += instru->append_thread_exit(
BUF_PTR(data->seg_base), dr_get_thread_id(drcontext));

memtrace(drcontext, true);
memtrace(drcontext,
/* If this thread already wrote some data, include its exit even
abhinav92003 marked this conversation as resolved.
Show resolved Hide resolved
* if we're over a size limit.
*/
data->bytes_written > 0);

if (op_offline.get_value())
file_ops_func.close_file(data->file);
Expand Down Expand Up @@ -1663,6 +1693,8 @@ event_exit(void)
should_trace_thread_cb = nullptr;
trace_thread_cb_user_data = nullptr;
thread_filtering_enabled = false;
num_refs = 0;
abhinav92003 marked this conversation as resolved.
Show resolved Hide resolved
num_refs_racy = 0;

dr_mutex_destroy(mutex);
drutil_exit();
Expand Down Expand Up @@ -1921,6 +1953,11 @@ drmemtrace_client_main(client_id_t id, int argc, const char *argv[])
NOTIFY(0, "-use_physical is unsafe with statically linked clients\n");
#endif
}

if (op_max_global_trace_refs.get_value() > 0) {
/* We need the same is-buffer-zero checks in the instrumentation. */
thread_filtering_enabled = true;
}
}

/* To support statically linked multiple clients, we add drmemtrace_client_main
Expand Down
12 changes: 12 additions & 0 deletions suite/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3177,6 +3177,13 @@ if (CLIENT_INTERFACE)
torunonly_drcachesim(delay-simple ${ci_shared_app}
"-trace_after_instrs 20000 -exit_after_tracing 10000" "")

# We use a many-threaded test with a small max and test that we only see
# 1 thread, testing the thread ignore logic. The max should be small enough
# to not be flaky on any platform.
derekbruening marked this conversation as resolved.
Show resolved Hide resolved
torunonly_drcachesim(delay-global client.annotation-concurrency
"-simulator_type basic_counts -trace_after_instrs 20K -max_global_trace_refs 10K"
"${annotation_test_args_shorter}")

# Test that "Warmup hits" and "Warmup misses" are printed out
torunonly_drcachesim(warmup-valid ${ci_shared_app} "-warmup_refs 1" "")

Expand Down Expand Up @@ -3399,6 +3406,11 @@ if (CLIENT_INTERFACE)
torunonly_drcacheoff(instr-only-trace ${ci_shared_app} "-instr_only_trace" "" "")
torunonly_drcacheoff(filter-and-instr-only-trace ${ci_shared_app} "-instr_only_trace -L0_filter" "" "")

# As for the online test, we check that only 1 thread is in the final trace.
torunonly_drcacheoff(max-global client.annotation-concurrency
"-trace_after_instrs 20K -max_global_trace_refs 10K"
"@-simulator_type@basic_counts" "${annotation_test_args_shorter}")

# __builtin_prefetch used in the test is not defined on MSVC.
if (NOT MSVC)
torunonly_drcacheoff(builtin-prefetch-basic-counts builtin_prefetch "" "@-simulator_type@basic_counts" "")
Expand Down