From 43db19b99b87570726051289c7887556b93d13a0 Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Thu, 5 Oct 2023 17:55:26 -0400 Subject: [PATCH] i#6344: Add -record_syscall to drmemtrace (#6348) Adds a new option -record_sysall to drmemtrace which records the parameter and success values for the given system call numbers. Just like with -record_function, the user must specify the parameter count. SYS_futex is left as traced by default, but it can be disabled. Adds documentation and a test. Further manual testing: ``` --------------------------------------------------------------------------- $ rm -rf drmemtrace.*.dir; bin64/drrun -t drcachesim -offline -record_syscall '1|-3&12|4&9|2' -record_syscall '12|2&158|4' -- suite/tests/bin/simple_app && bin64/drrun -t drcachesim -indir drmemtrace.*.dir -simulator_type view 2>&1 | egrep 'system call |function' Error: -record_syscall invalid parameter count -3 --------------------------------------------------------------------------- --------------------------------------------------------------------------- $ rm -rf drmemtrace.*.dir; bin64/drrun -t drcachesim -offline -record_syscall '1|-3&12|4&9|2' -record_syscall '12|2&158|4' -- suite/tests/bin/simple_app && bin64/drrun -t drcachesim -indir drmemtrace.*.dir -simulator_type view 2>&1 | egrep 'system call |function' --------------------------------------------------------------------------- --------------------------------------------------------------------------- $ rm -rf drmemtrace.*.dir; bin64/drrun -t drcachesim -offline -record_syscall '1|3&12|4&9|2' -record_syscall '12|2&158|4' -- suite/tests/bin/simple_app && bin64/drrun -t drcachesim -indir drmemtrace.*.dir -simulator_type view 2>&1 | egrep 'system call |function' ... Hello, world! 32411 26808: 484049 32412 26808: 484049 32413 26808: 484049 32414 26808: 484049 32415 26808: 484049 32416 26808: 484049 50436 41126: 484049 50437 41126: 484049 50438 41126: 484049 50439 41126: 484049 50440 41126: 484049 50441 41126: 484049 50980 41485: 484049 52193 42391: 484049 52223 42409: 484049 52253 42430: 484049 52254 42430: 484049 52255 42430: 484049 52256 42430: 484049 52257 42430: 484049 52258 42430: 484049 52274 42439: 484049 54224 44056: 484049 54252 44074: 484049 54329 44130: 484049 54455 44208: 484049 54988 44591: 484049 55653 45105: 484049 55654 45105: 484049 55655 45105: 484049 55656 45105: 484049 55657 45105: 484049 55658 45105: 484049 ... --------------------------------------------------------------------------- ``` Fixes #6344 --- api/docs/release.dox | 1 + clients/drcachesim/common/options.cpp | 18 +++++ clients/drcachesim/common/options.h | 5 ++ clients/drcachesim/common/utils.h | 16 +++++ clients/drcachesim/docs/drcachesim.dox.in | 5 ++ .../offline-allasm-record-syscall.templatex | 24 +++++++ clients/drcachesim/tracer/func_trace.cpp | 18 ----- clients/drcachesim/tracer/tracer.cpp | 71 +++++++++++++++++-- suite/tests/CMakeLists.txt | 4 ++ 9 files changed, 137 insertions(+), 25 deletions(-) create mode 100644 clients/drcachesim/tests/offline-allasm-record-syscall.templatex diff --git a/api/docs/release.dox b/api/docs/release.dox index e917aa7d3d1..7ccae078cc1 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -157,6 +157,7 @@ Further non-compatibility-affecting changes include: - Added several routines to the #dynamorio::drmemtrace::memtrace_stream_t interface for drmemtrace analysis tools: get_output_cpuid(), get_workload_id(), get_input_id(), get_input_interface(). + - Added -record_syscall to drmemtrace for recording syscall parameters. **************************************************
diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index 21f0a689549..40ce3cf8a18 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -749,6 +749,24 @@ droption_t op_record_replace_retaddr( "replacement, which has lower overhead, but runs the risk of breaking an " "application that examines or changes its own return addresses in the recorded " "functions."); +droption_t op_record_syscall( + DROPTION_SCOPE_CLIENT, "record_syscall", DROPTION_FLAG_ACCUMULATE, + OP_RECORD_FUNC_ITEM_SEP, "", "Record parameters for the specified syscall number(s).", + "Record the parameters and success of the specified system call number(s)." + " The option value should fit this format:" + " sycsall_number|parameter_number" + " E.g., -record_syscall \"2|2\" will record SYS_open's 2 parameters and whether" + " successful (1 for success or 0 for failure, in a function return value record)" + " for x86 Linux. SYS_futex is recorded by default on Linux and this option's value" + " adds to futex rather than replacing it (setting futex to 0 parameters disables)." + " The trace identifies which syscall owns each set of parameter and return value" + " records via a numeric ID equal to the syscall number + TRACE_FUNC_ID_SYSCALL_BASE." + " Recording multiple syscalls can be achieved by using the separator" + " \"" OP_RECORD_FUNC_ITEM_SEP + "\" (e.g., -record_syscall \"202|6" OP_RECORD_FUNC_ITEM_SEP "3|1\"), or" + " specifying multiple -record_syscall options." + " It is up to the user to ensure the values are correct; a too-large parameter" + " count may cause tracing to fail with an error mid-run."); droption_t op_miss_count_threshold( DROPTION_SCOPE_FRONTEND, "miss_count_threshold", 50000, "For cache miss analysis: minimum LLC miss count for a load to be eligible for " diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h index bb7fab05a4b..373dfa4f305 100644 --- a/clients/drcachesim/common/options.h +++ b/clients/drcachesim/common/options.h @@ -57,6 +57,10 @@ #define CACHE_TYPE_DATA "data" #define CACHE_TYPE_UNIFIED "unified" #define CACHE_PARENT_MEMORY "memory" +// The expected pattern for a single_op_value is: +// function_name|function_id|arguments_num +// where function_name can contain spaces (for instance, C++ namespace prefix) +#define PATTERN_SEPARATOR "|" #ifdef HAS_ZIP # define DEFAULT_TRACE_COMPRESSION_TYPE "zip" @@ -169,6 +173,7 @@ extern dynamorio::droption::droption_t op_record_heap; extern dynamorio::droption::droption_t op_record_heap_value; extern dynamorio::droption::droption_t op_record_dynsym_only; extern dynamorio::droption::droption_t op_record_replace_retaddr; +extern dynamorio::droption::droption_t op_record_syscall; extern dynamorio::droption::droption_t op_miss_count_threshold; extern dynamorio::droption::droption_t op_miss_frac_threshold; extern dynamorio::droption::droption_t op_confidence_threshold; diff --git a/clients/drcachesim/common/utils.h b/clients/drcachesim/common/utils.h index d24268be1bf..7bc5bec97ff 100644 --- a/clients/drcachesim/common/utils.h +++ b/clients/drcachesim/common/utils.h @@ -39,6 +39,7 @@ #include #include #include +#include namespace dynamorio { namespace drmemtrace { @@ -166,6 +167,21 @@ starts_with(const std::string &str, const std::string &with) return pos == 0; } +static inline std::vector +split_by(std::string s, const std::string &sep) +{ + size_t pos; + std::vector vec; + if (s.empty()) + return vec; + do { + pos = s.find(sep); + vec.push_back(s.substr(0, pos)); + s.erase(0, pos + sep.length()); + } while (pos != std::string::npos); + return vec; +} + } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/docs/drcachesim.dox.in b/clients/drcachesim/docs/drcachesim.dox.in index 9bf58176848..71983630b94 100644 --- a/clients/drcachesim/docs/drcachesim.dox.in +++ b/clients/drcachesim/docs/drcachesim.dox.in @@ -1482,6 +1482,11 @@ The -record_heap parameter requests recording of a pre-determined set of functions related to heap allocation. The -record_heap_value paramter controls the contents of this set. +The tracer also supports recording system call argument and success +values via the option -record_syscall, which functions similarly to +-record_function with the system call number replacing the function +name. + **************************************************************************** \page sec_drcachesim_newtool Creating New Analysis Tools diff --git a/clients/drcachesim/tests/offline-allasm-record-syscall.templatex b/clients/drcachesim/tests/offline-allasm-record-syscall.templatex new file mode 100644 index 00000000000..ae4e25ee074 --- /dev/null +++ b/clients/drcachesim/tests/offline-allasm-record-syscall.templatex @@ -0,0 +1,24 @@ +Adios world! +Adios world! +Adios world! +Adios world! +Adios world! +Adios world! +Adios world! +Adios world! +Adios world! +Adios world! +.* + 43 20: .* ifetch 2 byte\(s\) @ 0x.* 0f 05 syscall + 44 20: .* + 45 20: .* + 46 20: .* + 47 20: .* + 48 20: .* + 49 20: .* + 50 20: .* + 51 20: .* + 52 20: .* + 53 20: .* + 54 20: .* +.* diff --git a/clients/drcachesim/tracer/func_trace.cpp b/clients/drcachesim/tracer/func_trace.cpp index b66091d01b6..ac9373aa063 100644 --- a/clients/drcachesim/tracer/func_trace.cpp +++ b/clients/drcachesim/tracer/func_trace.cpp @@ -59,11 +59,6 @@ namespace dynamorio { namespace drmemtrace { -// The expected pattern for a single_op_value is: -// function_name|function_id|arguments_num -// where function_name can contain spaces (for instance, C++ namespace prefix) -#define PATTERN_SEPARATOR "|" - #define NOTIFY(level, ...) \ do { \ if (op_verbose.get_value() >= (level)) \ @@ -384,19 +379,6 @@ func_trace_disabled_instrument_event(void *drcontext, void *tag, instrlist_t *bb translating, user_data); } -static std::vector -split_by(std::string s, std::string sep) -{ - size_t pos; - std::vector vec; - do { - pos = s.find(sep); - vec.push_back(s.substr(0, pos)); - s.erase(0, pos + sep.length()); - } while (pos != std::string::npos); - return vec; -} - static void init_funcs_str_and_sep() { diff --git a/clients/drcachesim/tracer/tracer.cpp b/clients/drcachesim/tracer/tracer.cpp index 7171db6ea70..f2be561315e 100644 --- a/clients/drcachesim/tracer/tracer.cpp +++ b/clients/drcachesim/tracer/tracer.cpp @@ -60,6 +60,7 @@ #include "drwrap.h" #include "drx.h" #include "func_trace.h" +#include "hashtable.h" #include "instr_counter.h" #include "instru.h" #include "named_pipe.h" @@ -179,6 +180,11 @@ bool attached_midway; static bool reported_sg_warning = false; #endif +// We may be able to safely use std::unordered_map as at runtime we only need +// to do lookups which shouldn't need heap or locks, but to be safe we use +// the DR hashtable. +static hashtable_t syscall2args; + static bool bbdup_instr_counting_enabled() { @@ -1469,6 +1475,52 @@ event_filter_syscall(void *drcontext, int sysnum) return true; } +static void +init_record_syscall() +{ + // We only modify the table at init time and do not want a lock for runtime + // lookups. + hashtable_init_ex(&syscall2args, 8, HASH_INTPTR, /*strdup=*/false, /*synch=*/false, + nullptr, nullptr, nullptr); +#ifdef LINUX + // We trace futex by default. Add it first so a use can disable. + static constexpr int FUTEX_ARG_COUNT = 6; + if (!hashtable_add(&syscall2args, + reinterpret_cast(static_cast(SYS_futex)), + reinterpret_cast(static_cast(FUTEX_ARG_COUNT)))) + DR_ASSERT(false && "Failed to add to syscall2args internal hashtable"); +#endif + auto op_values = + split_by(op_record_syscall.get_value(), op_record_syscall.get_value_separator()); + for (auto &single_op_value : op_values) { + auto items = split_by(single_op_value, PATTERN_SEPARATOR); + if (items.size() != 2) { + FATAL("Error: -record_syscall takes exactly 2 fields for each item: %s\n", + op_record_syscall.get_value().c_str()); + } + int num = atoi(items[0].c_str()); + if (num < 0) + FATAL("Error: -record_syscall invalid number %d\n", num); + int args = atoi(items[1].c_str()); + // Sanity check. Some Windows syscalls have dozens of parameters but we + // should not see anything as high as 100. + static constexpr int MAX_SYSCALL_ARGS = 100; + if (args < 0 || args > MAX_SYSCALL_ARGS) + FATAL("Error: -record_syscall invalid parameter count %d\n", args); + dr_log(NULL, DR_LOG_ALL, 1, "Tracing syscall #%d args=%d\n", num, args); + NOTIFY(1, "Tracing syscall #%d args=%d\n", num, args); + hashtable_add_replace(&syscall2args, + reinterpret_cast(static_cast(num)), + reinterpret_cast(static_cast(args))); + } +} + +static void +exit_record_syscall() +{ + hashtable_delete(&syscall2args); +} + static bool event_pre_syscall(void *drcontext, int sysnum) { @@ -1498,20 +1550,21 @@ event_pre_syscall(void *drcontext, int sysnum) BUF_PTR(data->seg_base) += instru->append_marker( BUF_PTR(data->seg_base), TRACE_MARKER_TYPE_SYSCALL, sysnum); -#ifdef LINUX - if (sysnum == SYS_futex) { - static constexpr int FUTEX_ARG_COUNT = 6; + + // Record parameter values, if requested. + int args = static_cast(reinterpret_cast(hashtable_lookup( + &syscall2args, reinterpret_cast(static_cast(sysnum))))); + if (args > 0) { BUF_PTR(data->seg_base) += instru->append_marker( BUF_PTR(data->seg_base), TRACE_MARKER_TYPE_FUNC_ID, static_cast(func_trace_t::TRACE_FUNC_ID_SYSCALL_BASE) + IF_X64_ELSE(sysnum, (sysnum & 0xffff))); - for (int i = 0; i < FUTEX_ARG_COUNT; ++i) { + for (int i = 0; i < args; ++i) { BUF_PTR(data->seg_base) += instru->append_marker( BUF_PTR(data->seg_base), TRACE_MARKER_TYPE_FUNC_ARG, dr_syscall_get_param(drcontext, i)); } } -#endif } // Filtered traces take a while to fill up the buffer, so we do an output // before each syscall so we can check for various thresholds more frequently. @@ -1576,7 +1629,9 @@ event_post_syscall(void *drcontext, int sysnum) #ifdef LINUX if (!op_L0I_filter.get_value()) { /* No syscall data unless full instr trace. */ - if (sysnum == SYS_futex) { + if (hashtable_lookup(&syscall2args, + reinterpret_cast(static_cast(sysnum))) != + nullptr) { dr_syscall_result_info_t info = { sizeof(info), }; @@ -1921,6 +1976,7 @@ event_exit(void) num_refs_racy = 0; num_filter_refs_racy = 0; + exit_record_syscall(); exit_io(); dr_mutex_destroy(mutex); @@ -1944,8 +2000,8 @@ init_offline_dir(void) */ dr_snprintf(subdir_prefix, BUFFER_SIZE_ELEMENTS(subdir_prefix), "%s", op_subdir_prefix.get_value().c_str()); - NULL_TERMINATE_BUFFER(subdir_prefix); /* We do not need to call drx_init before using drx_open_unique_appid_file. */ + NULL_TERMINATE_BUFFER(subdir_prefix); for (i = 0; i < NUM_OF_TRIES; i++) { /* We use drx_open_unique_appid_file with DRX_FILE_SKIP_OPEN to get a * directory name for creation. Retry if the same name directory already @@ -2227,6 +2283,7 @@ drmemtrace_client_main(client_id_t id, int argc, const char *argv[]) op_L0D_filter.get_value()) op_disable_optimizations.set_value(true); + init_record_syscall(); event_inscount_init(); init_io(); diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 46fea1842a2..09e3a163c60 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4294,6 +4294,10 @@ if (BUILD_CLIENTS) # We test counting encodings for online. "-instr_encodings -simulator_type basic_counts" "") unset(tool.drcachesim.allasm-repstr-basic-counts_rawtemp) # use preprocessor + + # Test -record_syscall on SYS_write == #1 with 3 args. + torunonly_drcacheoff(allasm-record-syscall allasm_repstr + "-record_syscall 1|3" "@-simulator_type@view" "") endif (UNIX AND X86 AND X64) torunonly_drcacheoff(invariant_checker ${ci_shared_app}