Skip to content

Commit

Permalink
i#5411 bbdup mem: Shrink per-thread heap usage (#5419)
Browse files Browse the repository at this point in the history
Moves all drbbdup per-thread memory to unreachable heap.

Moves the drbbdup dynamic case hit table to be dynamically allocated
based on a new drbbdup option "never_enable_dynamic_handling" not
being set.  This eliminates 128K per thread for drmemtrace, which
makes a big difference on large applications.

Sanity test:
Before:
  $ bin64/drrun -rstats_to_stderr -t drcachesim -offline -max_global_trace_refs 10K -- suite/tests/bin/client.annotation-concurrency libclient.annotation-concurrency.appdll.so A 4 64 3
              Peak threads under DynamoRIO control :                 5
              Peak vmm blocks for unreachable heap :               767
                Peak vmm blocks for reachable heap :               726
            Peak vmm virtual memory in use (bytes) :          10293248
After:
              Peak threads under DynamoRIO control :                 5
              Peak vmm blocks for unreachable heap :               736
                Peak vmm blocks for reachable heap :               250
            Peak vmm virtual memory in use (bytes) :           8183808

Issue: #5411
  • Loading branch information
derekbruening authored Mar 15, 2022
1 parent 38281cd commit 6e16e55
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 14 deletions.
2 changes: 2 additions & 0 deletions clients/drcachesim/tracer/tracer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -928,6 +928,8 @@ instrumentation_drbbdup_init()
opts.runtime_case_opnd = OPND_CREATE_ABSMEM(&tracing_disabled, OPSZ_PTR);
opts.atomic_load_encoding = true;
opts.non_default_case_limit = 1;
// Save per-thread heap for a feature we do not need.
opts.never_enable_dynamic_handling = true;
drbbdup_status_t res = drbbdup_init(&opts);
DR_ASSERT(res == DRBBDUP_SUCCESS);
/* We just want barriers and atomic ops: no locks b/c they are not safe. */
Expand Down
61 changes: 47 additions & 14 deletions ext/drbbdup/drbbdup.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,10 @@ typedef struct {
hashtable_t manager_table; /* Maps bbs with book-keeping data (for thread-private
caches only). */
int case_index; /* Used to keep track of the current case during insertion. */
void *orig_analysis_data; /* Analysis data accessible for all cases. */
void *default_analysis_data; /* Analysis data specific to default case. */
void **case_analysis_data; /* Analysis data specific to cases. */
uint16_t hit_counts[TABLE_SIZE]; /* Keeps track of hit-counts of unhandled cases. */
void *orig_analysis_data; /* Analysis data accessible for all cases. */
void *default_analysis_data; /* Analysis data specific to default case. */
void **case_analysis_data; /* Analysis data specific to cases. */
uint16_t *hit_counts; /* Keeps track of hit-counts of unhandled cases. */
instr_t *first_instr; /* The first instr of the bb copy being considered. */
instr_t *first_nonlabel_instr; /* The first non label instr of the bb copy. */
instr_t *last_instr; /* The last instr of the bb copy being considered. */
Expand Down Expand Up @@ -324,6 +324,10 @@ drbbdup_create_manager(void *drcontext, void *tag, instrlist_t *bb)
/* XXX i#3778: To remove once we support specific fragment deletion. */
DR_ASSERT_MSG(!manager->enable_dynamic_handling,
"dynamic case generation is not yet supported");
if (opts.never_enable_dynamic_handling) {
DR_ASSERT_MSG(!manager->enable_dynamic_handling,
"dynamic case generation was disabled globally: cannot enable");
}

/* Check whether user wants copies for this particular bb. */
if (!manager->enable_dup && manager->cases != NULL) {
Expand Down Expand Up @@ -1284,6 +1288,8 @@ drbbdup_insert_dynamic_handling(void *drcontext, void *tag, instrlist_t *bb,

ASSERT(new_case_cache_pc != NULL,
"new case cache for dynamic handling must be already initialised.");
DR_ASSERT_MSG(!opts.never_enable_dynamic_handling,
"should not reach here if dynamic cases were disabled globally");

/* Check whether case limit has not been reached. */
if (drbbdup_do_dynamic_handling(manager)) {
Expand Down Expand Up @@ -1803,6 +1809,9 @@ drbbdup_handle_new_case()
drbbdup_per_thread *pt =
(drbbdup_per_thread *)drmgr_get_tls_field(drcontext, tls_idx);

DR_ASSERT_MSG(!opts.never_enable_dynamic_handling,
"should not reach here if dynamic cases were disabled globally");

/* Must use DR_MC_ALL due to dr_redirect_execution. */
dr_mcontext_t mcontext;
mcontext.size = sizeof(mcontext);
Expand Down Expand Up @@ -1867,6 +1876,9 @@ init_fp_cache(void (*clean_call_func)())
size_t size = dr_page_size();
ilist = instrlist_create(drcontext);

DR_ASSERT_MSG(!opts.never_enable_dynamic_handling,
"should not reach here if dynamic cases were disabled globally");

dr_insert_clean_call(drcontext, ilist, NULL, (void *)clean_call_func, false, 0);

/* Allocate code cache, and set Read-Write-Execute permissions using
Expand Down Expand Up @@ -1987,8 +1999,14 @@ drbbdup_get_stats(OUT drbbdup_stats_t *stats_in)
static void
drbbdup_thread_init(void *drcontext)
{
drbbdup_per_thread *pt =
(drbbdup_per_thread *)dr_thread_alloc(drcontext, sizeof(drbbdup_per_thread));
/* We use unreachable heap here too, though with the hit_counts array
* dynamically allocated the usage is now small enough to not matter for
* most non_default_case_limit values.
*/
drbbdup_per_thread *pt = (drbbdup_per_thread *)dr_custom_alloc(
drcontext, DR_ALLOC_THREAD_PRIVATE, sizeof(drbbdup_per_thread),
DR_MEMPROT_READ | DR_MEMPROT_WRITE, NULL);
memset(pt, 0, sizeof(*pt));

if (is_thread_private) {
/* Initialise hash table that keeps track of defined cases per
Expand All @@ -2001,14 +2019,23 @@ drbbdup_thread_init(void *drcontext)
pt->case_index = 0;
pt->orig_analysis_data = NULL;
ASSERT(opts.non_default_case_limit > 0, "dup limit should be greater than zero");
pt->case_analysis_data =
dr_thread_alloc(drcontext, sizeof(void *) * opts.non_default_case_limit);
pt->case_analysis_data = dr_custom_alloc(drcontext, DR_ALLOC_THREAD_PRIVATE,
sizeof(void *) * opts.non_default_case_limit,
DR_MEMPROT_READ | DR_MEMPROT_WRITE, NULL);
memset(pt->case_analysis_data, 0, sizeof(void *) * opts.non_default_case_limit);

/* Init hit table. */
for (int i = 0; i < TABLE_SIZE; i++)
pt->hit_counts[i] = opts.hit_threshold;
drbbdup_set_tls_raw_slot_val(DRBBDUP_HIT_TABLE_SLOT, (uintptr_t)pt->hit_counts);
if (!opts.never_enable_dynamic_handling) {
/* Dynamically allocated to avoid using space when not needed (128K per
* thread adds up on large apps), and with explicit unreachable heap.
*/
pt->hit_counts = (uint16_t *)dr_custom_alloc(
drcontext, DR_ALLOC_THREAD_PRIVATE, TABLE_SIZE * sizeof(pt->hit_counts[0]),
DR_MEMPROT_READ | DR_MEMPROT_WRITE, NULL);
/* Init hit table. */
for (int i = 0; i < TABLE_SIZE; i++)
pt->hit_counts[i] = opts.hit_threshold;
drbbdup_set_tls_raw_slot_val(DRBBDUP_HIT_TABLE_SLOT, (uintptr_t)pt->hit_counts);
}

drmgr_set_tls_field(drcontext, tls_idx, (void *)pt);
}
Expand All @@ -2024,9 +2051,15 @@ drbbdup_thread_exit(void *drcontext)
if (is_thread_private)
hashtable_delete(&pt->manager_table);

dr_thread_free(drcontext, pt->case_analysis_data,
dr_custom_free(drcontext, DR_ALLOC_THREAD_PRIVATE, pt->case_analysis_data,
sizeof(void *) * opts.non_default_case_limit);
dr_thread_free(drcontext, pt, sizeof(drbbdup_per_thread));
if (pt->hit_counts != NULL) {
DR_ASSERT_MSG(!opts.never_enable_dynamic_handling,
"should not reach here if dynamic cases were disabled globally");
dr_custom_free(drcontext, DR_ALLOC_THREAD_PRIVATE, pt->hit_counts,
TABLE_SIZE * sizeof(pt->hit_counts[0]));
}
dr_custom_free(drcontext, DR_ALLOC_THREAD_PRIVATE, pt, sizeof(drbbdup_per_thread));
}

/****************************************************************************
Expand Down
6 changes: 6 additions & 0 deletions ext/drbbdup/drbbdup.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,12 @@ typedef struct {
* Either this or the instrument_instr field must be set.
*/
drbbdup_instrument_instr_ex_t instrument_instr_ex;
/**
* If \p enable_dynamic_handling will *never* be set by \p set_up_bb_dups for
* *any* basic block, this field can be set to true. This reduces memory
* usage by not allocating bookkeeping data needed for dynamic handling.
*/
bool never_enable_dynamic_handling;
} drbbdup_options_t;

/**
Expand Down
5 changes: 5 additions & 0 deletions suite/tests/client-interface/drbbdup-test.dll.c
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,11 @@ dr_init(client_id_t id)
opts.user_data = USER_DATA_VAL;
opts.non_default_case_limit = 2;
opts.is_stat_enabled = true;
/* Test not triggering lazy allocation paths.
* Since subsequent enabling for a block results in an assert rather than a failure
* return code or something we can't easily test that.
*/
opts.never_enable_dynamic_handling = true;

drbbdup_status_t res = drbbdup_init(&opts);
CHECK(res == DRBBDUP_SUCCESS, "drbbdup init failed");
Expand Down

0 comments on commit 6e16e55

Please sign in to comment.