From 2fe13f253c27ca769e156afd6e6feb709649b600 Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Tue, 27 Aug 2024 17:30:17 -0400 Subject: [PATCH 1/3] i#6940: Do not always load binaries in view tool (#6941) The view tool was blindly loading binaries even for traces that have encodings. This leads to fatal errors when binaries have changed, even when the change has no impact on viewing a trace. We fix that here by reading the filtype at init time. Work around the #6942 crash by always setting the disasm syntax to DR style for REGDEPS traces. This change actually sets the disasm syntax to AT&T by default if no module path is passed in; which is what it is supposed to do: but it was not doing that and this breaks 3 tests comparing DR-style output. We put in a quick fix to request DR style for those tests. Tested locally where the view tool asserts without this fix. Issue: #6940, #6942 Fixes #6940 --- clients/drcachesim/tools/view.cpp | 41 +++++++++++++++++++++---------- suite/tests/CMakeLists.txt | 6 ++--- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/clients/drcachesim/tools/view.cpp b/clients/drcachesim/tools/view.cpp index 6a05363ab9f..7817169c89d 100644 --- a/clients/drcachesim/tools/view.cpp +++ b/clients/drcachesim/tools/view.cpp @@ -97,6 +97,30 @@ view_t::initialize_stream(memtrace_stream_t *serial_stream) serial_stream_ = serial_stream; print_header(); dcontext_.dcontext = dr_standalone_init(); + + dr_disasm_flags_t flags = IF_X86_ELSE( + DR_DISASM_ATT, + IF_AARCH64_ELSE(DR_DISASM_DR, IF_RISCV64_ELSE(DR_DISASM_RISCV, DR_DISASM_ARM))); + if (knob_syntax_ == "intel") { + flags = DR_DISASM_INTEL; + } else if (knob_syntax_ == "dr") { + flags = DR_DISASM_DR; + } else if (knob_syntax_ == "arm") { + flags = DR_DISASM_ARM; + } else if (knob_syntax_ == "riscv") { + flags = DR_DISASM_RISCV; + } + disassemble_set_syntax(flags); + + // Get the filetype up front if available. + // We leave setting and processing filetype_ to when we see the marker. + if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, serial_stream->get_filetype())) { + // We do not need to try to find and load the binaries, so don't (as trying + // can result in errors if those are not present or have changed). + has_modules_ = false; + return ""; + } + if (module_file_path_.empty()) { has_modules_ = false; } else { @@ -118,19 +142,6 @@ view_t::initialize_stream(memtrace_stream_t *serial_stream) std::string error = module_mapper_->get_last_error(); if (!error.empty()) return "Failed to load binaries: " + error; - dr_disasm_flags_t flags = IF_X86_ELSE( - DR_DISASM_ATT, - IF_AARCH64_ELSE(DR_DISASM_DR, IF_RISCV64_ELSE(DR_DISASM_RISCV, DR_DISASM_ARM))); - if (knob_syntax_ == "intel") { - flags = DR_DISASM_INTEL; - } else if (knob_syntax_ == "dr") { - flags = DR_DISASM_DR; - } else if (knob_syntax_ == "arm") { - flags = DR_DISASM_ARM; - } else if (knob_syntax_ == "riscv") { - flags = DR_DISASM_RISCV; - } - disassemble_set_syntax(flags); return ""; } @@ -236,6 +247,10 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref) */ if (TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, filetype_)) { dr_set_isa_mode(dcontext_.dcontext, DR_ISA_REGDEPS, nullptr); + // Ignore the requested syntax: we only support DR style. + // XXX i#6942: Should we return an error if the users asks for + // another syntax? Should DR's libraries return an error? + disassemble_set_syntax(DR_DISASM_DR); } return true; // Do not count toward -sim_refs yet b/c we don't have tid. case TRACE_MARKER_TYPE_TIMESTAMP: diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index a40a5073300..dc55f029ab1 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4276,14 +4276,14 @@ if (BUILD_CLIENTS) torunonly_api(tool.drcacheoff.skip "${drcachesim_path}" "offline-skip" "" # Test invariants with global headers after skipping instrs. # Here the skip does not find real headers and we expect synthetic. - "-tool;view;-infile;${zip_path};${test_mode_flag};-skip_instrs;62;-sim_refs;10" + "-tool;view;-view_syntax;dr;-infile;${zip_path};${test_mode_flag};-skip_instrs;62;-sim_refs;10" OFF OFF) set(tool.drcacheoff.skip_basedir "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests") set(tool.drcacheoff.skip_rawtemp ON) # no preprocessor torunonly_api(tool.drcacheoff.skip2 "${drcachesim_path}" "offline-skip2" "" # Test invariants with global headers after skipping instrs. # Here the skip finds real headers and we expect no synthetic. - "-tool;view;-infile;${zip_path};${test_mode_flag};-skip_instrs;63;-sim_refs;10" + "-tool;view;-view_syntax;dr;-infile;${zip_path};${test_mode_flag};-skip_instrs;63;-sim_refs;10" OFF OFF) set(tool.drcacheoff.skip2_basedir "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests") set(tool.drcacheoff.skip2_rawtemp ON) # no preprocessor @@ -4813,7 +4813,7 @@ if (BUILD_CLIENTS) file(MAKE_DIRECTORY ${srcdir}) file(COPY ${zip_path} DESTINATION ${srcdir}) torunonly_api(tool.drcacheoff.trim "${drcachesim_path}" "offline-trim" "" - "-tool;view;-indir;${outdir};${test_mode_flag}" + "-tool;view;-view_syntax;dr;-indir;${outdir};${test_mode_flag}" OFF OFF) set(tool.drcacheoff.trim_runcmp "${CMAKE_CURRENT_SOURCE_DIR}/runmulti.cmake") # The filter overwrites any existing file in the dir from a prior run. From f57c4a432b17cba051c0efda144eaa0258d444b6 Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Tue, 27 Aug 2024 21:25:56 -0400 Subject: [PATCH 2/3] i#975 static DR: Document statically linked DR usage (#6943) Adds a new documentation section on statically linking DR and clients into the application, and the limitations of that mode. Issue: #975 --- api/docs/deployment.dox | 24 +++++++++++++++++++++++- api/docs/intro.dox | 9 +++++++-- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/api/docs/deployment.dox b/api/docs/deployment.dox index 63215f62cb4..cb845b698d6 100644 --- a/api/docs/deployment.dox +++ b/api/docs/deployment.dox @@ -526,7 +526,7 @@ can use the convenience of the \ref page_droption. Client options are not allowed to contain semicolons. Additionally, the client option string combined with the path to the client library cannot -contain all three quote characters (', ", `) simultaneously. +contain all three quote characters (\', \", \`) simultaneously. To ensure a client is loaded into a child process of a different bitwidth (i.e., a 32-bit child created by a 64-bit parent), use the \c @@ -699,6 +699,28 @@ automatically, as the linker flags for shared libraries are separate from those for executables. +\section sec_static_DR Statically Linking DynamoRIO + +Limited support is provided to statically link DynamoRIO and clients +into the application itself. This generally requires the start-stop +interface (see \ref sec_startstop) in order to trigger takeover. + +The configure_DynamoRIO_static and use_DynamoRIO_static_client CMake +utilities are provided to help in setting up the link steps. + +One significant downside of statically linking is that DynamoRIO is +not able to provide library isolation in this mode. This means that +clients cannot safely use third-party library code at runtime. It is +considered safe to use libraries during process-wide initialization, +as that occurs in just one thread which is at a relatively safe point +having called something like dr_app_start(). The +#DR_DISALLOW_UNSAFE_STATIC and dr_allow_unsafe_static_behavior() +features are available to help detect violations of this rule, but +they only detect these violations when using DynamoRIO in its dynamic +library form. Thus, the recommendation is to support both modes and +employ regression tests which set #DR_DISALLOW_UNSAFE_STATIC in +dynamic mode in order to catch unsafe library code being added later. + *************************************************************************** \section sec_options DynamoRIO Runtime Options diff --git a/api/docs/intro.dox b/api/docs/intro.dox index 2870c0b0e09..d267053e26c 100644 --- a/api/docs/intro.dox +++ b/api/docs/intro.dox @@ -1,5 +1,5 @@ /* ****************************************************************************** - * Copyright (c) 2010-2021 Google, Inc. All rights reserved. + * Copyright (c) 2010-2024 Google, Inc. All rights reserved. * Copyright (c) 2011 Massachusetts Institute of Technology All rights reserved. * Copyright (c) 2007-2010 VMware, Inc. All rights reserved. * ******************************************************************************/ @@ -570,7 +570,7 @@ not use any global user-mode resources that would interfere with the running application, and as long as no alertable system calls are invoked on Windows (see \ref sec_alertable). While most non-graphical non-alertable Windows API routines are supported, native threading libraries -such as \p libpthread.so on Linux are known to cause problems. +such as \p libpthread.so on Linux are known to sometimes cause problems. Currently we provide a private loader for both Windows and Linux which supplies support for client external library use. @@ -587,6 +587,11 @@ heap. The loader also attempts to isolate other global resource usage and global callbacks. Please file reports on any transparency problems observed when using the private loader. +If DynamoRIO and clients are statically linked into the application, +the private loader is not available and third-party library usage is +generally no longer safe, unless that usage only happens during +process-wide initialization (see \ref sec_static_DR). + By default, all Windows clients link with libc. To instead use the libc subset of routines forwarded from the DynamoRIO library to \p ntdll.dll (which keeps clients more lightweight and is usually sufficient From 57c8e117f9753ea7a23d5354f0d8ccd45881759f Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Wed, 28 Aug 2024 10:51:53 -0400 Subject: [PATCH 3/3] i#6945: Reduce default block time (#6946) Reduces the scheduler and drmemtrace launcher default values for block_time_scale down to 10 and block_time_max down to 2.5s. This improves the scheduler behavior for small traces under fast analyzers. It seems better to err on the side of faster and let more heavyweight simulations tune the block times for more idle time; otherwise we can end up with local runs and especially new users trying things out and seeing the tool seem to just sit there doing nothing. This reduces the threadsig core-sharded time from a minute and a half down to 10 seconds in local runs (see #6945 for command lines); there is still some idle time in there so it seems a reasonable compromise. Fixes #6945 --- clients/drcachesim/common/options.cpp | 10 +++++----- clients/drcachesim/scheduler/scheduler.cpp | 2 +- clients/drcachesim/scheduler/scheduler.h | 8 ++++++-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index 64ef61bca38..7922eb3bd51 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -922,7 +922,7 @@ droption_t op_sched_blocking_switch_us( "-core_serial. "); droption_t op_sched_block_scale( - DROPTION_SCOPE_ALL, "sched_block_scale", 1000., "Input block time scale factor", + DROPTION_SCOPE_ALL, "sched_block_scale", 10., "Input block time scale factor", "The scale applied to the microsecond latency of blocking system calls. A higher " "value here results in blocking syscalls keeping inputs unscheduled for longer. " "This should roughly equal the slowdown of instruction record processing versus the " @@ -934,11 +934,11 @@ droption_t op_sched_block_scale( // finish and the simulation waits for tens of minutes further for a couple of outliers. // The cap remains a flag and not a constant as different length traces and different // speed simulators need different idle time ranges, so we need to be able to tune this -// to achieve desired cpu usage targets. The default value was selected while tuning -// a 1-minute-long schedule_stats run on a 112-core 500-thread large application -// to produce good cpu usage without unduly increasing tool runtime. +// to achieve desired cpu usage targets. The default value was selected to avoid unduly +// long idle times with local analyzers; it may need to be increased with more +// heavyweight analyzers/simulators. droption_t op_sched_block_max_us(DROPTION_SCOPE_ALL, "sched_block_max_us", - 25000000, + 2500000, "Maximum blocked input time, in microseconds", "The maximum blocked time, after scaling with " "-sched_block_scale."); diff --git a/clients/drcachesim/scheduler/scheduler.cpp b/clients/drcachesim/scheduler/scheduler.cpp index 69963ae6e47..ffecb6356fc 100644 --- a/clients/drcachesim/scheduler/scheduler.cpp +++ b/clients/drcachesim/scheduler/scheduler.cpp @@ -2483,7 +2483,7 @@ scheduler_tmpl_t::pop_from_ready_queue( VDO(this, 1, { static int heartbeat; // We are ok with races as the cadence is approximate. - if (++heartbeat % 500 == 0) { + if (++heartbeat % 2000 == 0) { VPRINT(this, 1, "heartbeat[%d] %zd in queue; %d blocked; %zd unscheduled => %d %d\n", for_output, ready_priority_.size(), num_blocked_, diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h index 5f3b2516461..64621c3e592 100644 --- a/clients/drcachesim/scheduler/scheduler.h +++ b/clients/drcachesim/scheduler/scheduler.h @@ -656,8 +656,12 @@ template class scheduler_tmpl_t { * the slowdown of the instruction record processing versus the original * (untraced) application execution. The blocked time is clamped to a maximum * value controlled by #block_time_max. + * + * The default value is meant to be reasonable for simple analyzers. It may + * result in too much or too little idle time depending on the analyzer or + * simulator and its speed; it is meant to be tuned and modified. */ - double block_time_scale = 1000.; + double block_time_scale = 10.; /** * The maximum time, in the units explained by #block_time_scale (either * #QUANTUM_TIME simulator time or wall-clock microseconds for @@ -668,7 +672,7 @@ template class scheduler_tmpl_t { * #TRACE_MARKER_TYPE_SYSCALL_UNSCHEDULE), after this amount of time those * inputs are all re-scheduled. */ - uint64_t block_time_max = 25000000; + uint64_t block_time_max = 2500000; // XXX: Should we share the file-to-reader code currently in the scheduler // with the analyzer and only then need reader interfaces and not pass paths // to the scheduler?