Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#6938 sched migrate: Separate run queue per output #6985

Merged
merged 19 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
5eb838b
i#6938 sched migrate: All options in microseconds + single scale
derekbruening Sep 11, 2024
bdec908
Fix Windows build warning
derekbruening Sep 12, 2024
b12e991
Merge branch 'master' of github.com:DynamoRIO/dynamorio into i6938-ti…
derekbruening Sep 12, 2024
4b40d2b
Fix another Windows build warning
derekbruening Sep 12, 2024
33b5bfd
Fix yet another Windows build warning
derekbruening Sep 12, 2024
a462db1
Add missing includes
derekbruening Sep 12, 2024
c9ce27c
i#6938 sched migrate: Separate run queue per output
derekbruening Sep 6, 2024
5d6762a
Fix Windows build warnings; relax switches per core expected in test …
derekbruening Sep 13, 2024
0cb3cad
Fix another Windows build warning
derekbruening Sep 13, 2024
7cf2e42
Fix scaling bugs for blocked time and quantum; shift defaults to matc…
derekbruening Sep 13, 2024
3b3089d
Fix two unit tests missing time_units_per_us
derekbruening Sep 13, 2024
1d36d81
Remove diagnostics
derekbruening Sep 13, 2024
6dce7ce
Fix mis-attribution of stats on switchto to target's old output
derekbruening Sep 14, 2024
7e9fe13
On _SCHEDULE, give up target input lock before acquiring target outpu…
derekbruening Sep 14, 2024
6f37460
Give up direct switch input lock before acquiring its output lock
derekbruening Sep 16, 2024
2ebda7c
Review requests: s/runqueue/ready_queue/; add many comments; remove r…
derekbruening Sep 17, 2024
6128656
Merge branch 'master' of github.com:DynamoRIO/dynamorio into i6938-pe…
derekbruening Sep 17, 2024
a475ef4
Drop rebalance threshold from 1.5M to 150K as 1.5M is too high with t…
derekbruening Sep 17, 2024
baf9cf8
Review suggestion: optimize work-stealing loop
derekbruening Sep 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions clients/drcachesim/analyzer_multi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -553,13 +553,20 @@ analyzer_multi_tmpl_t<RecordType, ReaderType>::init_dynamic_schedule()
op_sched_order_time.get_value() ? sched_type_t::DEPENDENCY_TIMESTAMPS
: sched_type_t::DEPENDENCY_IGNORE,
sched_type_t::SCHEDULER_DEFAULTS, op_verbose.get_value());
sched_ops.quantum_duration = op_sched_quantum.get_value();
if (op_sched_time.get_value())
sched_ops.time_units_per_us = op_sched_time_per_us.get_value();
if (op_sched_time.get_value()) {
sched_ops.quantum_unit = sched_type_t::QUANTUM_TIME;
sched_ops.quantum_duration_us = op_sched_quantum.get_value();
} else {
sched_ops.quantum_duration_instrs = op_sched_quantum.get_value();
}
sched_ops.syscall_switch_threshold = op_sched_syscall_switch_us.get_value();
sched_ops.blocking_switch_threshold = op_sched_blocking_switch_us.get_value();
sched_ops.block_time_scale = op_sched_block_scale.get_value();
sched_ops.block_time_max = op_sched_block_max_us.get_value();
sched_ops.block_time_multiplier = op_sched_block_scale.get_value();
sched_ops.block_time_max_us = op_sched_block_max_us.get_value();
sched_ops.migration_threshold_us = op_sched_migration_threshold_us.get_value();
sched_ops.rebalance_period_us = op_sched_rebalance_period_us.get_value();
sched_ops.time_units_per_us = op_sched_time_units_per_us.get_value();
sched_ops.randomize_next_input = op_sched_randomize.get_value();
sched_ops.honor_direct_switches = !op_sched_disable_direct_switches.get_value();
#ifdef HAS_ZIP
Expand Down
9 changes: 9 additions & 0 deletions clients/drcachesim/common/memtrace_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,15 @@ class memtrace_stream_t {
* i.e., the number of input migrations to this core.
*/
SCHED_STAT_MIGRATIONS,
/**
* Counts the number of times this output's runqueue became empty and it took
* work from another output's runqueue.
*/
SCHED_STAT_RUNQUEUE_STEALS,
/**
* Counts the number of output runqueue rebalances triggered by this output.
*/
SCHED_STAT_RUNQUEUE_REBALANCES,
/** Count of statistic types. */
SCHED_STAT_TYPE_COUNT,
};
Expand Down
50 changes: 38 additions & 12 deletions clients/drcachesim/common/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -887,13 +887,19 @@ droption_t<bool> op_core_serial(
"How the scheduling is performed is controlled by a set "
"of options with the prefix \"sched_\" along with -cores.");

droption_t<double>
op_sched_time_per_us(DROPTION_SCOPE_ALL, "sched_time_per_us", 1000.,
"Wall-clock microseconds per simulated microsecond",
"Wall-clock microseconds per simulated microsecond.");

droption_t<int64_t>
// We pick 6 million to match 2 instructions per nanosecond with a 3ms quantum.
op_sched_quantum(DROPTION_SCOPE_ALL, "sched_quantum", 6 * 1000 * 1000,
"Scheduling quantum",
"Applies to -core_sharded and -core_serial. "
"Scheduling quantum: in microseconds of wall-clock "
"time if -sched_time is set; otherwise in instructions.");
"Scheduling quantum in instructions, unless -sched_time is set in "
"which case this value is multiplied by -sched_time_per_us to "
"produce a quantum in wall-clock microseconds.");

droption_t<bool>
op_sched_time(DROPTION_SCOPE_ALL, "sched_time", false,
Expand Down Expand Up @@ -922,23 +928,24 @@ droption_t<uint64_t> op_sched_blocking_switch_us(
"-core_serial. ");

droption_t<double> op_sched_block_scale(
DROPTION_SCOPE_ALL, "sched_block_scale", 10., "Input block time scale factor",
"The scale applied to the microsecond latency of blocking system calls. A higher "
"value here results in blocking syscalls keeping inputs unscheduled for longer. "
"This should roughly equal the slowdown of instruction record processing versus the "
"original (untraced) application execution.");

// We have a max to avoid outlier latencies that are already a second or more from
// scaling up to tens of minutes. We assume a cap is representative as the outliers
DROPTION_SCOPE_ALL, "sched_block_scale", 0.01, "Input block time scale factor",
"This value is multiplied by -sched_time_per_us to produce a scale which is applied "
"to the as-traced microsecond latency of blocking system calls to produce the block "
"time during simulation. A higher value here results in blocking syscalls keeping "
"inputs unscheduled for longer.");

// We have a max to avoid outlier latencies from scaling up to extreme times. There is
// some inflation in the as-traced latencies and some can be inflated more than others.
// We assume a cap is representative as the outliers
// likely were not part of key dependence chains. Without a cap the other threads all
// finish and the simulation waits for tens of minutes further for a couple of outliers.
// The cap remains a flag and not a constant as different length traces and different
// speed simulators need different idle time ranges, so we need to be able to tune this
// to achieve desired cpu usage targets. The default value was selected to avoid unduly
// long idle times with local analyzers; it may need to be increased with more
// heavyweight analyzers/simulators.
droption_t<uint64_t> op_sched_block_max_us(DROPTION_SCOPE_ALL, "sched_block_max_us",
250000,
// TODO i#6959: Once we have -exit_if_all_unscheduled raise this.
droption_t<uint64_t> op_sched_block_max_us(DROPTION_SCOPE_ALL, "sched_block_max_us", 250,
"Maximum blocked input time, in microseconds",
"The maximum blocked time, after scaling with "
"-sched_block_scale.");
Expand Down Expand Up @@ -985,6 +992,25 @@ droption_t<bool> op_sched_disable_direct_switches(
"switch being determined by latency and the next input in the queue. The "
"TRACE_MARKER_TYPE_DIRECT_THREAD_SWITCH markers are not removed from the trace.");

droption_t<uint64_t> op_sched_migration_threshold_us(
DROPTION_SCOPE_ALL, "sched_migration_threshold_us", 500,
"Time in simulated microseconds before an input can be migrated across cores",
"The minimum time in simulated microseconds that must have elapsed since an input "
"last ran on a core before it can be migrated to another core.");

droption_t<uint64_t> op_sched_rebalance_period_us(
DROPTION_SCOPE_ALL, "sched_rebalance_period_us", 1500000,
"Period in microseconds at which core run queues are load-balanced",
"The period in simulated microseconds at which per-core run queues are re-balanced "
"to redistribute load.");

droption_t<double> op_sched_time_units_per_us(
DROPTION_SCOPE_ALL, "sched_time_units_per_us", 1000.,
"Time units per simulated microsecond",
"Time units (currently wall-clock time) per simulated microsecond. This scales all "
"of the -sched_*_us values as it concerts wall-clock time into the simulated "
"microseconds measured by those options.");

// Schedule_stats options.
droption_t<uint64_t>
op_schedule_stats_print_every(DROPTION_SCOPE_ALL, "schedule_stats_print_every",
Expand Down
4 changes: 4 additions & 0 deletions clients/drcachesim/common/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ extern dynamorio::droption::droption_t<int> op_kernel_trace_buffer_size_shift;
#endif
extern dynamorio::droption::droption_t<bool> op_core_sharded;
extern dynamorio::droption::droption_t<bool> op_core_serial;
extern dynamorio::droption::droption_t<double> op_sched_time_per_us;
extern dynamorio::droption::droption_t<int64_t> op_sched_quantum;
extern dynamorio::droption::droption_t<bool> op_sched_time;
extern dynamorio::droption::droption_t<bool> op_sched_order_time;
Expand All @@ -214,6 +215,9 @@ extern dynamorio::droption::droption_t<std::string> op_cpu_schedule_file;
extern dynamorio::droption::droption_t<std::string> op_sched_switch_file;
extern dynamorio::droption::droption_t<bool> op_sched_randomize;
extern dynamorio::droption::droption_t<bool> op_sched_disable_direct_switches;
extern dynamorio::droption::droption_t<uint64_t> op_sched_migration_threshold_us;
extern dynamorio::droption::droption_t<uint64_t> op_sched_rebalance_period_us;
extern dynamorio::droption::droption_t<double> op_sched_time_units_per_us;
extern dynamorio::droption::droption_t<uint64_t> op_schedule_stats_print_every;
extern dynamorio::droption::droption_t<std::string> op_syscall_template_file;
extern dynamorio::droption::droption_t<uint64_t> op_filter_stop_timestamp;
Expand Down
9 changes: 9 additions & 0 deletions clients/drcachesim/scheduler/flexible_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,15 @@ class flexible_queue_t {
return entries_[rand_gen_() % size()]; // Undefined if empty.
}

// Returns an entry from the back -- or at least not from the front; it's not
// guaranteed to be the lowest priority, just not the highest.
T
back()
{
assert(!empty());
return entries_.back();
}

bool
empty() const
{
Expand Down
Loading
Loading