From d4b3e61fe8bde4036b6f8cae54ca98faf5e70c71 Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Thu, 15 Sep 2016 15:41:41 -0400 Subject: [PATCH] i#1729 offline traces: generate unoptimized traces Adds two new options for offline trace generation in drcachesim: -offline and -outdir. Updates the launcher to skip the simulator launch when -offline is requested. Updates the tracer to write to a file instead of a pipe when -offline is set. The trace entry format is still the same: it will be modified in the future to include timestamps and to shrink its size, with a postprocessor component filling in statically-identifiable information. Adds packing to trace_entry_t to ensure it can be used as a cross-platform persistent representation. A test will be added once file reading is in place. Review-URL: https://codereview.appspot.com/310890043 --- clients/drcachesim/common/options.cpp | 16 ++- clients/drcachesim/common/options.h | 4 +- clients/drcachesim/common/trace_entry.h | 9 +- clients/drcachesim/common/utils.h | 21 +++- clients/drcachesim/launcher.cpp | 62 ++++++++---- clients/drcachesim/tracer/tracer.cpp | 124 ++++++++++++++++-------- 6 files changed, 172 insertions(+), 64 deletions(-) diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index 826cec74e83..0b16daafb35 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015 Google, Inc. All rights reserved. + * Copyright (c) 2015-2016 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -36,12 +36,24 @@ #include "droption.h" #include "options.h" +droption_t op_offline +(DROPTION_SCOPE_ALL, "offline", false, "Store trace files for offline analysis", + "By default, traces are processed online, sent over a pipe to a simulator. " + "If this option is enabled, trace data is instead written to files in -outdir " + "for later offline analysis. No simulator is executed."); + droption_t op_ipc_name (DROPTION_SCOPE_ALL, "ipc_name", "drcachesimpipe", "Base name of named pipe", - "Specifies the base name of the named pipe used to communicate between the target " + "For online tracing and simulation (the default, unless -offline is requested), " + "specifies the base name of the named pipe used to communicate between the target " "application processes and the caching device simulator. A unique name must be chosen " "for each instance of the simulator being run at any one time."); +droption_t op_outdir +(DROPTION_SCOPE_ALL, "outdir", ".", "Target directory for offline trace files", + "For the offline analysis mode (when -offline is requested), specifies the path " + "to a directory where per-thread trace files will be written."); + droption_t op_num_cores (DROPTION_SCOPE_FRONTEND, "cores", 4, "Number of cores", "Specifies the number of cores to simulate."); diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h index 688f4197747..be7f87229e5 100644 --- a/clients/drcachesim/common/options.h +++ b/clients/drcachesim/common/options.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015 Google, Inc. All rights reserved. + * Copyright (c) 2015-2016 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -45,7 +45,9 @@ #include #include "droption.h" +extern droption_t op_offline; extern droption_t op_ipc_name; +extern droption_t op_outdir; extern droption_t op_num_cores; extern droption_t op_line_size; extern droption_t op_L1I_size; diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index 134bb91cc75..059dea3788c 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015 Google, Inc. All rights reserved. + * Copyright (c) 2015-2016 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -44,6 +44,7 @@ #define _TRACE_ENTRY_H_ 1 #include +#include "utils.h" typedef uintptr_t addr_t; @@ -116,7 +117,8 @@ extern const char * const trace_type_names[]; // - a flush request // - a prefetch request // - a thread/process -typedef struct _trace_entry_t { +START_PACKED_STRUCTURE +struct _trace_entry_t { unsigned short type; // 2 bytes: trace_type_t // 2 bytes: mem ref size, instr length, or num of instrs for instr bundle unsigned short size; @@ -125,7 +127,8 @@ typedef struct _trace_entry_t { // The length of each instr in the instr bundle unsigned char length[sizeof(addr_t)]; }; -} trace_entry_t; +} END_PACKED_STRUCTURE; +typedef struct _trace_entry_t trace_entry_t; static inline bool type_is_prefetch(unsigned short type) diff --git a/clients/drcachesim/common/utils.h b/clients/drcachesim/common/utils.h index 8f069e6ba8b..da3c4dc39d5 100644 --- a/clients/drcachesim/common/utils.h +++ b/clients/drcachesim/common/utils.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015 Google, Inc. All rights reserved. + * Copyright (c) 2015-2016 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -48,6 +48,25 @@ #define BUFFER_LAST_ELEMENT(buf) buf[BUFFER_SIZE_ELEMENTS(buf) - 1] #define NULL_TERMINATE_BUFFER(buf) BUFFER_LAST_ELEMENT(buf) = 0 +#define BOOLS_MATCH(b1, b2) (!!(b1) == !!(b2)) + +#ifdef WINDOWS +/* Use special C99 operator _Pragma to generate a pragma from a macro */ +# if _MSC_VER <= 1200 +# define ACTUAL_PRAGMA(p) _Pragma ( #p ) +# else +# define ACTUAL_PRAGMA(p) __pragma ( p ) +# endif +/* Usage: if planning to typedef, that must be done separately, as MSVC will + * not take _pragma after typedef. + */ +# define START_PACKED_STRUCTURE ACTUAL_PRAGMA( pack(push,1) ) +# define END_PACKED_STRUCTURE ACTUAL_PRAGMA( pack(pop) ) +#else +# define START_PACKED_STRUCTURE /* nothing */ +# define END_PACKED_STRUCTURE __attribute__ ((__packed__)) +#endif + static inline int compute_log2(int value) { diff --git a/clients/drcachesim/launcher.cpp b/clients/drcachesim/launcher.cpp index 05989aa523e..fbaad64e0ba 100644 --- a/clients/drcachesim/launcher.cpp +++ b/clients/drcachesim/launcher.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015 Google, Inc. All rights reserved. + * Copyright (c) 2015-2016 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -81,6 +81,13 @@ file_is_readable(const char *path) return (drfront_access(path, DRFRONT_READ, &ret) == DRFRONT_SUCCESS && ret); } +static bool +file_is_writable(const char *path) +{ + bool ret = false; + return (drfront_access(path, DRFRONT_WRITE, &ret) == DRFRONT_SUCCESS && ret); +} + static void get_full_path(const char *app, char *buf, size_t buflen/*# elements*/) { @@ -157,6 +164,9 @@ _tmain(int argc, const TCHAR *targv[]) bool is64, is32; analyzer_t *analyzer = NULL; std::string tracer_ops; +#ifdef UNIX + pid_t child; +#endif #if defined(WINDOWS) && !defined(_UNICODE) # error _UNICODE must be defined @@ -222,20 +232,29 @@ _tmain(int argc, const TCHAR *targv[]) assert(false); // won't get here } - // declare the analyzer based on its type - if (op_simulator_type.get_value() == CPU_CACHE) - analyzer = new cache_simulator_t; - else if (op_simulator_type.get_value() == TLB) - analyzer = new tlb_simulator_t; - else { - ERROR("Usage error: unsupported analyzer type. " - "Please choose " CPU_CACHE" or " TLB".\n"); - return false; - } + if (op_offline.get_value()) { + // Initial sanity check: may still be unwritable by this user, but this + // serves as at least an existence check. + if (!file_is_writable(op_outdir.get_value().c_str())) { + FATAL_ERROR("invalid -outdir %s", op_outdir.get_value().c_str()); + assert(false); // won't get here + } + } else { + // declare the analyzer based on its type + if (op_simulator_type.get_value() == CPU_CACHE) + analyzer = new cache_simulator_t; + else if (op_simulator_type.get_value() == TLB) + analyzer = new tlb_simulator_t; + else { + ERROR("Usage error: unsupported analyzer type. " + "Please choose " CPU_CACHE" or " TLB".\n"); + return false; + } - if (!analyzer->init()) { - FATAL_ERROR("failed to initialize analyzer"); - assert(false); // won't get here + if (!analyzer->init()) { + FATAL_ERROR("failed to initialize analyzer"); + assert(false); // won't get here + } } tracer_ops = op_tracer_ops.get_value(); @@ -245,12 +264,15 @@ _tmain(int argc, const TCHAR *targv[]) NOTIFY(1, "INFO", "DynamoRIO configuration directory is %s", buf); #ifdef UNIX - pid_t child = fork(); + if (op_offline.get_value()) + child = 0; + else + child = fork(); if (child < 0) { FATAL_ERROR("failed to fork"); assert(false); // won't get here } else if (child == 0) { - /* child */ + /* child, or offline where we exec this process */ if (!configure_application(app_name, app_argv, tracer_ops, &inject_data) || !dr_inject_process_inject(inject_data, false/*!force*/, NULL)) { FATAL_ERROR("unable to inject"); @@ -268,9 +290,11 @@ _tmain(int argc, const TCHAR *targv[]) dr_inject_process_run(inject_data); #endif - if (!analyzer->run()) { - FATAL_ERROR("failed to run analyzer"); - assert(false); // won't get here + if (!op_offline.get_value()) { + if (!analyzer->run()) { + FATAL_ERROR("failed to run analyzer"); + assert(false); // won't get here + } } #ifdef WINDOWS diff --git a/clients/drcachesim/tracer/tracer.cpp b/clients/drcachesim/tracer/tracer.cpp index 129b707413f..85cc8e0695b 100644 --- a/clients/drcachesim/tracer/tracer.cpp +++ b/clients/drcachesim/tracer/tracer.cpp @@ -33,8 +33,8 @@ /* tracer.cpp: tracing client for feeding data to cache simulator. * - * Based on the memtrace_opt.c sample. - * XXX i#1703: add in more optimizations to improve performance. + * Originally built from the memtrace_opt.c sample. + * XXX i#1703, i#2001: add in more optimizations to improve performance. * XXX i#1703: perhaps refactor and split up to make it more * modular. */ @@ -47,6 +47,7 @@ #include "drmgr.h" #include "drreg.h" #include "drutil.h" +#include "drx.h" #include "droption.h" #include "physaddr.h" #include "../common/trace_entry.h" @@ -57,12 +58,6 @@ # include "../../../core/unix/include/syscall_linux_arm.h" // for SYS_cacheflush #endif -// XXX: share these instead of duplicating -#define BUFFER_SIZE_BYTES(buf) sizeof(buf) -#define BUFFER_SIZE_ELEMENTS(buf) (BUFFER_SIZE_BYTES(buf) / sizeof((buf)[0])) -#define BUFFER_LAST_ELEMENT(buf) (buf)[BUFFER_SIZE_ELEMENTS(buf) - 1] -#define NULL_TERMINATE_BUFFER(buf) BUFFER_LAST_ELEMENT(buf) = 0 - #define NOTIFY(level, ...) do { \ if (op_verbose.get_value() >= (level)) \ dr_fprintf(STDERR, __VA_ARGS__); \ @@ -87,6 +82,7 @@ typedef struct { byte *seg_base; trace_entry_t *buf_base; uint64 num_refs; + file_t file; /* For offline traces */ } per_thread_t; #define MAX_NUM_DELAY_INSTRS 32 @@ -98,7 +94,7 @@ typedef struct { instr_t *delay_instrs[MAX_NUM_DELAY_INSTRS]; } user_data_t; -/* we write to a single global pipe */ +/* For online simulation, we write to a single global pipe */ static named_pipe_t ipc_pipe; static client_id_t client_id; @@ -148,6 +144,21 @@ atomic_pipe_write(void *drcontext, byte *pipe_start, byte *pipe_end) return pipe_start; } +static inline byte * +write_trace_data(void *drcontext, byte *towrite_start, byte *towrite_end) +{ + per_thread_t *data = (per_thread_t *) drmgr_get_tls_field(drcontext, tls_idx); + if (op_offline.get_value()) { + ssize_t size = towrite_end - towrite_start; + if (dr_write_file(data->file, towrite_start, size) < size) { + NOTIFY(0, "Fatal error: failed to write trace"); + dr_abort(); + } + return towrite_start; + } else + return atomic_pipe_write(drcontext, towrite_start, towrite_end); +} + static void memtrace(void *drcontext) { @@ -157,6 +168,7 @@ memtrace(void *drcontext) buf_ptr = BUF_PTR(data->seg_base); /* The initial slot is left empty for the thread entry, which we add here */ + /* FIXME i#1729: for offline, change this to a timestamp entry */ init_thread_entry(drcontext, data->buf_base); pipe_start = (byte *)data->buf_base; pipe_end = pipe_start; @@ -182,23 +194,29 @@ memtrace(void *drcontext) } } } - // Split up the buffer into multiple writes to ensure atomic pipe writes. - // We can only split before TRACE_TYPE_INSTR, assuming only a few data - // entries in between instr entries. - if (mem_ref->type == TRACE_TYPE_INSTR) { - if (((byte *)mem_ref - pipe_start) > ipc_pipe.get_atomic_write_size()) - pipe_start = atomic_pipe_write(drcontext, pipe_start, pipe_end); - // Advance pipe_end pointer - pipe_end = (byte *)mem_ref; + if (!op_offline.get_value()) { + // Split up the buffer into multiple writes to ensure atomic pipe writes. + // We can only split before TRACE_TYPE_INSTR, assuming only a few data + // entries in between instr entries. + if (mem_ref->type == TRACE_TYPE_INSTR) { + if (((byte *)mem_ref - pipe_start) > ipc_pipe.get_atomic_write_size()) + pipe_start = atomic_pipe_write(drcontext, pipe_start, pipe_end); + // Advance pipe_end pointer + pipe_end = (byte *)mem_ref; + } } } - // Write the rest to pipe - // The last few entries (e.g., instr + refs) may exceed the atomic write size, - // so we may need two writes. - if (((byte *)buf_ptr - pipe_start) > ipc_pipe.get_atomic_write_size()) - pipe_start = atomic_pipe_write(drcontext, pipe_start, pipe_end); - if (((byte *)buf_ptr - pipe_start) > (ssize_t)BUF_HDR_SLOTS_SIZE) - atomic_pipe_write(drcontext, pipe_start, (byte *)buf_ptr); + if (op_offline.get_value()) { + write_trace_data(drcontext, pipe_start, (byte *)buf_ptr); + } else { + // Write the rest to pipe + // The last few entries (e.g., instr + refs) may exceed the atomic write size, + // so we may need two writes. + if (((byte *)buf_ptr - pipe_start) > ipc_pipe.get_atomic_write_size()) + pipe_start = atomic_pipe_write(drcontext, pipe_start, pipe_end); + if (((byte *)buf_ptr - pipe_start) > (ssize_t)BUF_HDR_SLOTS_SIZE) + atomic_pipe_write(drcontext, pipe_start, (byte *)buf_ptr); + } // Our instrumentation reads from buffer and skips the clean call if the // content is 0, so we need set zero in the trace buffer and set non-zero @@ -785,6 +803,7 @@ static void event_thread_init(void *drcontext) { trace_entry_t pid_info[2]; + char buf[MAXIMUM_PATH]; per_thread_t *data = (per_thread_t *) dr_thread_alloc(drcontext, sizeof(per_thread_t)); DR_ASSERT(data != NULL); @@ -803,15 +822,34 @@ event_thread_init(void *drcontext) memset((byte *)data->buf_base + TRACE_BUF_SIZE, -1, REDZONE_SIZE); /* put buf_base to TLS plus header slots as starting buf_ptr */ BUF_PTR(data->seg_base) = data->buf_base + BUF_HDR_SLOTS; + data->num_refs = 0; + + if (op_offline.get_value()) { + /* We do not need to call drx_init before using drx_open_unique_appid_file. + * Should we create a subdir for this process to group all of its thread files? + */ + data->file = drx_open_unique_appid_file(op_outdir.get_value().c_str(), + dr_get_thread_id(drcontext), + "memtrace", "log", +#ifndef WINDOWS + DR_FILE_CLOSE_ON_FORK | +#endif + DR_FILE_ALLOW_LARGE, + buf, BUFFER_SIZE_ELEMENTS(buf)); + NULL_TERMINATE_BUFFER(buf); + if (data->file == INVALID_FILE) { + NOTIFY(0, "Fatal error: failed to create trace file %s", buf); + dr_abort(); + } + NOTIFY(1, "Created trace file %s\n", buf); + } /* pass pid and tid to the simulator to register current thread */ init_thread_entry(drcontext, &pid_info[0]); pid_info[1].type = TRACE_TYPE_PID; pid_info[1].size = sizeof(process_id_t); pid_info[1].addr = (addr_t) dr_get_process_id(); - if (ipc_pipe.write((void *)pid_info, sizeof(pid_info)) < (ssize_t)sizeof(pid_info)) - DR_ASSERT(false); - data->num_refs = 0; + write_trace_data(drcontext, (byte *)pid_info, (byte *)pid_info + sizeof(pid_info)); } static void @@ -828,6 +866,9 @@ event_thread_exit(void *drcontext) memtrace(drcontext); + if (op_offline.get_value()) + dr_close_file(data->file); + dr_mutex_lock(mutex); num_refs += data->num_refs; dr_mutex_unlock(mutex); @@ -839,7 +880,8 @@ static void event_exit(void) { dr_log(NULL, LOG_ALL, 1, "drcachesim num refs seen: " SZFMT"\n", num_refs); - ipc_pipe.close(); + if (!op_offline.get_value()) + ipc_pipe.close(); if (!dr_raw_tls_cfree(tls_offs, MEMTRACE_TLS_COUNT)) DR_ASSERT(false); @@ -875,21 +917,27 @@ dr_client_main(client_id_t id, int argc, const char *argv[]) droption_parser_t::usage_short(DROPTION_SCOPE_ALL).c_str()); dr_abort(); } - if (op_ipc_name.get_value().empty()) { + if (!op_offline.get_value() && op_ipc_name.get_value().empty()) { NOTIFY(0, "Usage error: ipc name is required\nUsage:\n%s", droption_parser_t::usage_short(DROPTION_SCOPE_ALL).c_str()); dr_abort(); + } else if (op_offline.get_value() && op_outdir.get_value().empty()) { + NOTIFY(0, "Usage error: outdir is required\nUsage:\n%s", + droption_parser_t::usage_short(DROPTION_SCOPE_ALL).c_str()); + dr_abort(); } - if (!ipc_pipe.set_name(op_ipc_name.get_value().c_str())) - DR_ASSERT(false); - /* we want an isolated fd so we don't use ipc_pipe.open_for_write() */ - int fd = dr_open_file(ipc_pipe.get_pipe_path().c_str(), DR_FILE_WRITE_ONLY); - DR_ASSERT(fd != INVALID_FILE); - if (!ipc_pipe.set_fd(fd)) - DR_ASSERT(false); - if (!ipc_pipe.maximize_buffer()) - NOTIFY(1, "Failed to maximize pipe buffer: performance may suffer.\n"); + if (!op_offline.get_value()) { + if (!ipc_pipe.set_name(op_ipc_name.get_value().c_str())) + DR_ASSERT(false); + /* we want an isolated fd so we don't use ipc_pipe.open_for_write() */ + int fd = dr_open_file(ipc_pipe.get_pipe_path().c_str(), DR_FILE_WRITE_ONLY); + DR_ASSERT(fd != INVALID_FILE); + if (!ipc_pipe.set_fd(fd)) + DR_ASSERT(false); + if (!ipc_pipe.maximize_buffer()) + NOTIFY(1, "Failed to maximize pipe buffer: performance may suffer.\n"); + } if (!drmgr_init() || !drutil_init() || drreg_init(&ops) != DRREG_SUCCESS) DR_ASSERT(false);