Skip to content

Commit

Permalink
i#6344: Add -record_syscall to drmemtrace (#6348)
Browse files Browse the repository at this point in the history
Adds a new option -record_sysall to drmemtrace which records the
parameter and success values for the given system call numbers. Just
like with -record_function, the user must specify the parameter count.

SYS_futex is left as traced by default, but it can be disabled.

Adds documentation and a test.

Further manual testing:
```
  ---------------------------------------------------------------------------
  $ rm -rf drmemtrace.*.dir; bin64/drrun -t drcachesim -offline -record_syscall '1|-3&12|4&9|2' -record_syscall '12|2&158|4' -- suite/tests/bin/simple_app && bin64/drrun -t drcachesim -indir drmemtrace.*.dir -simulator_type view 2>&1 | egrep 'system call |function'
  Error: -record_syscall invalid parameter count -3
  ---------------------------------------------------------------------------

  ---------------------------------------------------------------------------
  $ rm -rf drmemtrace.*.dir; bin64/drrun -t drcachesim -offline -record_syscall '1|-3&12|4&9|2' -record_syscall '12|2&158|4' -- suite/tests/bin/simple_app && bin64/drrun -t drcachesim -indir drmemtrace.*.dir -simulator_type view 2>&1 | egrep 'system call |function'
  <Application simple_app (484125) DynamoRIO usage error : invalid system call parameter number>
  ---------------------------------------------------------------------------

  ---------------------------------------------------------------------------
  $ rm -rf drmemtrace.*.dir; bin64/drrun -t drcachesim -offline -record_syscall '1|3&12|4&9|2' -record_syscall '12|2&158|4' -- suite/tests/bin/simple_app && bin64/drrun -t drcachesim -indir drmemtrace.*.dir -simulator_type view 2>&1 | egrep 'system call |function'
  ...
  Hello, world!
  <Stopping application simple_app (484049)>
         32411       26808:      484049 <marker: system call 12>
         32412       26808:      484049 <marker: function==syscall #12>
         32413       26808:      484049 <marker: function argument 0x0>
         32414       26808:      484049 <marker: function argument 0x7ffc87c52d4c>
         32415       26808:      484049 <marker: function==syscall #12>
         32416       26808:      484049 <marker: function return value 0x1>
         50436       41126:      484049 <marker: system call 9>
         50437       41126:      484049 <marker: function==syscall #9>
         50438       41126:      484049 <marker: function argument 0x0>
         50439       41126:      484049 <marker: function argument 0x2000>
         50440       41126:      484049 <marker: function==syscall #9>
         50441       41126:      484049 <marker: function return value 0x1>
         50980       41485:      484049 <marker: system call 21>
         52193       42391:      484049 <marker: system call 257>
         52223       42409:      484049 <marker: system call 262>
         52253       42430:      484049 <marker: system call 9>
         52254       42430:      484049 <marker: function==syscall #9>
         52255       42430:      484049 <marker: function argument 0x0>
         52256       42430:      484049 <marker: function argument 0x1b5c7>
         52257       42430:      484049 <marker: function==syscall #9>
         52258       42430:      484049 <marker: function return value 0x1>
         52274       42439:      484049 <marker: system call 3>
         54224       44056:      484049 <marker: system call 257>
         54252       44074:      484049 <marker: system call 0>
         54329       44130:      484049 <marker: system call 17>
         54455       44208:      484049 <marker: system call 262>
         54988       44591:      484049 <marker: system call 17>
         55653       45105:      484049 <marker: system call 9>
         55654       45105:      484049 <marker: function==syscall #9>
         55655       45105:      484049 <marker: function argument 0x0>
         55656       45105:      484049 <marker: function argument 0x1e1f50>
         55657       45105:      484049 <marker: function==syscall #9>
         55658       45105:      484049 <marker: function return value 0x1>
         ...
  ---------------------------------------------------------------------------
```
Fixes #6344
  • Loading branch information
derekbruening authored Oct 5, 2023
1 parent d3304e7 commit 43db19b
Show file tree
Hide file tree
Showing 9 changed files with 137 additions and 25 deletions.
1 change: 1 addition & 0 deletions api/docs/release.dox
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ Further non-compatibility-affecting changes include:
- Added several routines to the #dynamorio::drmemtrace::memtrace_stream_t interface
for drmemtrace analysis tools: get_output_cpuid(), get_workload_id(),
get_input_id(), get_input_interface().
- Added -record_syscall to drmemtrace for recording syscall parameters.

**************************************************
<hr>
Expand Down
18 changes: 18 additions & 0 deletions clients/drcachesim/common/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -749,6 +749,24 @@ droption_t<bool> op_record_replace_retaddr(
"replacement, which has lower overhead, but runs the risk of breaking an "
"application that examines or changes its own return addresses in the recorded "
"functions.");
droption_t<std::string> op_record_syscall(
DROPTION_SCOPE_CLIENT, "record_syscall", DROPTION_FLAG_ACCUMULATE,
OP_RECORD_FUNC_ITEM_SEP, "", "Record parameters for the specified syscall number(s).",
"Record the parameters and success of the specified system call number(s)."
" The option value should fit this format:"
" sycsall_number|parameter_number"
" E.g., -record_syscall \"2|2\" will record SYS_open's 2 parameters and whether"
" successful (1 for success or 0 for failure, in a function return value record)"
" for x86 Linux. SYS_futex is recorded by default on Linux and this option's value"
" adds to futex rather than replacing it (setting futex to 0 parameters disables)."
" The trace identifies which syscall owns each set of parameter and return value"
" records via a numeric ID equal to the syscall number + TRACE_FUNC_ID_SYSCALL_BASE."
" Recording multiple syscalls can be achieved by using the separator"
" \"" OP_RECORD_FUNC_ITEM_SEP
"\" (e.g., -record_syscall \"202|6" OP_RECORD_FUNC_ITEM_SEP "3|1\"), or"
" specifying multiple -record_syscall options."
" It is up to the user to ensure the values are correct; a too-large parameter"
" count may cause tracing to fail with an error mid-run.");
droption_t<unsigned int> op_miss_count_threshold(
DROPTION_SCOPE_FRONTEND, "miss_count_threshold", 50000,
"For cache miss analysis: minimum LLC miss count for a load to be eligible for "
Expand Down
5 changes: 5 additions & 0 deletions clients/drcachesim/common/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@
#define CACHE_TYPE_DATA "data"
#define CACHE_TYPE_UNIFIED "unified"
#define CACHE_PARENT_MEMORY "memory"
// The expected pattern for a single_op_value is:
// function_name|function_id|arguments_num
// where function_name can contain spaces (for instance, C++ namespace prefix)
#define PATTERN_SEPARATOR "|"

#ifdef HAS_ZIP
# define DEFAULT_TRACE_COMPRESSION_TYPE "zip"
Expand Down Expand Up @@ -169,6 +173,7 @@ extern dynamorio::droption::droption_t<bool> op_record_heap;
extern dynamorio::droption::droption_t<std::string> op_record_heap_value;
extern dynamorio::droption::droption_t<bool> op_record_dynsym_only;
extern dynamorio::droption::droption_t<bool> op_record_replace_retaddr;
extern dynamorio::droption::droption_t<std::string> op_record_syscall;
extern dynamorio::droption::droption_t<unsigned int> op_miss_count_threshold;
extern dynamorio::droption::droption_t<double> op_miss_frac_threshold;
extern dynamorio::droption::droption_t<double> op_confidence_threshold;
Expand Down
16 changes: 16 additions & 0 deletions clients/drcachesim/common/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include <iomanip>
#include <sstream>
#include <string>
#include <vector>

namespace dynamorio {
namespace drmemtrace {
Expand Down Expand Up @@ -166,6 +167,21 @@ starts_with(const std::string &str, const std::string &with)
return pos == 0;
}

static inline std::vector<std::string>
split_by(std::string s, const std::string &sep)
{
size_t pos;
std::vector<std::string> vec;
if (s.empty())
return vec;
do {
pos = s.find(sep);
vec.push_back(s.substr(0, pos));
s.erase(0, pos + sep.length());
} while (pos != std::string::npos);
return vec;
}

} // namespace drmemtrace
} // namespace dynamorio

Expand Down
5 changes: 5 additions & 0 deletions clients/drcachesim/docs/drcachesim.dox.in
Original file line number Diff line number Diff line change
Expand Up @@ -1482,6 +1482,11 @@ The -record_heap parameter requests recording of a pre-determined set
of functions related to heap allocation. The -record_heap_value
paramter controls the contents of this set.

The tracer also supports recording system call argument and success
values via the option -record_syscall, which functions similarly to
-record_function with the system call number replacing the function
name.

****************************************************************************
\page sec_drcachesim_newtool Creating New Analysis Tools

Expand Down
24 changes: 24 additions & 0 deletions clients/drcachesim/tests/offline-allasm-record-syscall.templatex
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Adios world!
Adios world!
Adios world!
Adios world!
Adios world!
Adios world!
Adios world!
Adios world!
Adios world!
Adios world!
.*
43 20: .* ifetch 2 byte\(s\) @ 0x.* 0f 05 syscall
44 20: .* <marker: timestamp .*>
45 20: .* <marker: tid .* on core .*>
46 20: .* <marker: system call 1>
47 20: .* <marker: maybe-blocking system call>
48 20: .* <marker: function==syscall #1>
49 20: .* <marker: function argument 0x2>
50 20: .* <marker: function argument 0x.*>
51 20: .* <marker: function argument 0xd>
52 20: .* <marker: function==syscall #1>
53 20: .* <marker: function return value 0x1>
54 20: .* <marker: timestamp .*>
.*
18 changes: 0 additions & 18 deletions clients/drcachesim/tracer/func_trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,6 @@
namespace dynamorio {
namespace drmemtrace {

// The expected pattern for a single_op_value is:
// function_name|function_id|arguments_num
// where function_name can contain spaces (for instance, C++ namespace prefix)
#define PATTERN_SEPARATOR "|"

#define NOTIFY(level, ...) \
do { \
if (op_verbose.get_value() >= (level)) \
Expand Down Expand Up @@ -384,19 +379,6 @@ func_trace_disabled_instrument_event(void *drcontext, void *tag, instrlist_t *bb
translating, user_data);
}

static std::vector<std::string>
split_by(std::string s, std::string sep)
{
size_t pos;
std::vector<std::string> vec;
do {
pos = s.find(sep);
vec.push_back(s.substr(0, pos));
s.erase(0, pos + sep.length());
} while (pos != std::string::npos);
return vec;
}

static void
init_funcs_str_and_sep()
{
Expand Down
71 changes: 64 additions & 7 deletions clients/drcachesim/tracer/tracer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
#include "drwrap.h"
#include "drx.h"
#include "func_trace.h"
#include "hashtable.h"
#include "instr_counter.h"
#include "instru.h"
#include "named_pipe.h"
Expand Down Expand Up @@ -179,6 +180,11 @@ bool attached_midway;
static bool reported_sg_warning = false;
#endif

// We may be able to safely use std::unordered_map as at runtime we only need
// to do lookups which shouldn't need heap or locks, but to be safe we use
// the DR hashtable.
static hashtable_t syscall2args;

static bool
bbdup_instr_counting_enabled()
{
Expand Down Expand Up @@ -1469,6 +1475,52 @@ event_filter_syscall(void *drcontext, int sysnum)
return true;
}

static void
init_record_syscall()
{
// We only modify the table at init time and do not want a lock for runtime
// lookups.
hashtable_init_ex(&syscall2args, 8, HASH_INTPTR, /*strdup=*/false, /*synch=*/false,
nullptr, nullptr, nullptr);
#ifdef LINUX
// We trace futex by default. Add it first so a use can disable.
static constexpr int FUTEX_ARG_COUNT = 6;
if (!hashtable_add(&syscall2args,
reinterpret_cast<void *>(static_cast<ptr_int_t>(SYS_futex)),
reinterpret_cast<void *>(static_cast<ptr_int_t>(FUTEX_ARG_COUNT))))
DR_ASSERT(false && "Failed to add to syscall2args internal hashtable");
#endif
auto op_values =
split_by(op_record_syscall.get_value(), op_record_syscall.get_value_separator());
for (auto &single_op_value : op_values) {
auto items = split_by(single_op_value, PATTERN_SEPARATOR);
if (items.size() != 2) {
FATAL("Error: -record_syscall takes exactly 2 fields for each item: %s\n",
op_record_syscall.get_value().c_str());
}
int num = atoi(items[0].c_str());
if (num < 0)
FATAL("Error: -record_syscall invalid number %d\n", num);
int args = atoi(items[1].c_str());
// Sanity check. Some Windows syscalls have dozens of parameters but we
// should not see anything as high as 100.
static constexpr int MAX_SYSCALL_ARGS = 100;
if (args < 0 || args > MAX_SYSCALL_ARGS)
FATAL("Error: -record_syscall invalid parameter count %d\n", args);
dr_log(NULL, DR_LOG_ALL, 1, "Tracing syscall #%d args=%d\n", num, args);
NOTIFY(1, "Tracing syscall #%d args=%d\n", num, args);
hashtable_add_replace(&syscall2args,
reinterpret_cast<void *>(static_cast<ptr_int_t>(num)),
reinterpret_cast<void *>(static_cast<ptr_int_t>(args)));
}
}

static void
exit_record_syscall()
{
hashtable_delete(&syscall2args);
}

static bool
event_pre_syscall(void *drcontext, int sysnum)
{
Expand Down Expand Up @@ -1498,20 +1550,21 @@ event_pre_syscall(void *drcontext, int sysnum)

BUF_PTR(data->seg_base) += instru->append_marker(
BUF_PTR(data->seg_base), TRACE_MARKER_TYPE_SYSCALL, sysnum);
#ifdef LINUX
if (sysnum == SYS_futex) {
static constexpr int FUTEX_ARG_COUNT = 6;

// Record parameter values, if requested.
int args = static_cast<int>(reinterpret_cast<ptr_int_t>(hashtable_lookup(
&syscall2args, reinterpret_cast<void *>(static_cast<ptr_int_t>(sysnum)))));
if (args > 0) {
BUF_PTR(data->seg_base) += instru->append_marker(
BUF_PTR(data->seg_base), TRACE_MARKER_TYPE_FUNC_ID,
static_cast<uintptr_t>(func_trace_t::TRACE_FUNC_ID_SYSCALL_BASE) +
IF_X64_ELSE(sysnum, (sysnum & 0xffff)));
for (int i = 0; i < FUTEX_ARG_COUNT; ++i) {
for (int i = 0; i < args; ++i) {
BUF_PTR(data->seg_base) += instru->append_marker(
BUF_PTR(data->seg_base), TRACE_MARKER_TYPE_FUNC_ARG,
dr_syscall_get_param(drcontext, i));
}
}
#endif
}
// Filtered traces take a while to fill up the buffer, so we do an output
// before each syscall so we can check for various thresholds more frequently.
Expand Down Expand Up @@ -1576,7 +1629,9 @@ event_post_syscall(void *drcontext, int sysnum)

#ifdef LINUX
if (!op_L0I_filter.get_value()) { /* No syscall data unless full instr trace. */
if (sysnum == SYS_futex) {
if (hashtable_lookup(&syscall2args,
reinterpret_cast<void *>(static_cast<ptr_int_t>(sysnum))) !=
nullptr) {
dr_syscall_result_info_t info = {
sizeof(info),
};
Expand Down Expand Up @@ -1921,6 +1976,7 @@ event_exit(void)
num_refs_racy = 0;
num_filter_refs_racy = 0;

exit_record_syscall();
exit_io();

dr_mutex_destroy(mutex);
Expand All @@ -1944,8 +2000,8 @@ init_offline_dir(void)
*/
dr_snprintf(subdir_prefix, BUFFER_SIZE_ELEMENTS(subdir_prefix), "%s",
op_subdir_prefix.get_value().c_str());
NULL_TERMINATE_BUFFER(subdir_prefix);
/* We do not need to call drx_init before using drx_open_unique_appid_file. */
NULL_TERMINATE_BUFFER(subdir_prefix);
for (i = 0; i < NUM_OF_TRIES; i++) {
/* We use drx_open_unique_appid_file with DRX_FILE_SKIP_OPEN to get a
* directory name for creation. Retry if the same name directory already
Expand Down Expand Up @@ -2227,6 +2283,7 @@ drmemtrace_client_main(client_id_t id, int argc, const char *argv[])
op_L0D_filter.get_value())
op_disable_optimizations.set_value(true);

init_record_syscall();
event_inscount_init();
init_io();

Expand Down
4 changes: 4 additions & 0 deletions suite/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4294,6 +4294,10 @@ if (BUILD_CLIENTS)
# We test counting encodings for online.
"-instr_encodings -simulator_type basic_counts" "")
unset(tool.drcachesim.allasm-repstr-basic-counts_rawtemp) # use preprocessor

# Test -record_syscall on SYS_write == #1 with 3 args.
torunonly_drcacheoff(allasm-record-syscall allasm_repstr
"-record_syscall 1|3" "@-simulator_type@view" "")
endif (UNIX AND X86 AND X64)

torunonly_drcacheoff(invariant_checker ${ci_shared_app}
Expand Down

0 comments on commit 43db19b

Please sign in to comment.