Skip to content

Commit

Permalink
i#6938 sched migrate: Fix direct switch blocked_start_time (#6970)
Browse files Browse the repository at this point in the history
Fixes a bug (discovered while testing separate runqueues) where direct
switch requests with timeouts do not set blocked_start_time, resulting
in re-using a prior (possibly 0) time and consequently much shorter
timeouts.

Reduces the default block_time_max parameter, as the bug was
artificially reducing many instances and with the accurate timeout the
existing defaults were no longer well-tuned.

Updates unit tests.

Issue: #6938
  • Loading branch information
derekbruening authored Sep 10, 2024
1 parent ddcae43 commit cac7cda
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 7 deletions.
2 changes: 1 addition & 1 deletion clients/drcachesim/common/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -938,7 +938,7 @@ droption_t<double> op_sched_block_scale(
// long idle times with local analyzers; it may need to be increased with more
// heavyweight analyzers/simulators.
droption_t<uint64_t> op_sched_block_max_us(DROPTION_SCOPE_ALL, "sched_block_max_us",
2500000,
250000,
"Maximum blocked input time, in microseconds",
"The maximum blocked time, after scaling with "
"-sched_block_scale.");
Expand Down
2 changes: 2 additions & 0 deletions clients/drcachesim/scheduler/scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3174,6 +3174,7 @@ scheduler_tmpl_t<RecordType, ReaderType>::process_marker(input_info_t &input,
input.unscheduled = true;
if (input.syscall_timeout_arg > 0) {
input.blocked_time = scale_blocked_time(input.syscall_timeout_arg);
input.blocked_start_time = get_output_time(output);
VPRINT(this, 3, "input %d unscheduled for %" PRIu64 " @%" PRIu64 "\n",
input.index, input.blocked_time, input.reader->get_last_timestamp());
} else {
Expand Down Expand Up @@ -3201,6 +3202,7 @@ scheduler_tmpl_t<RecordType, ReaderType>::process_marker(input_info_t &input,
input.unscheduled = true;
if (input.syscall_timeout_arg > 0) {
input.blocked_time = scale_blocked_time(input.syscall_timeout_arg);
input.blocked_start_time = get_output_time(output);
VPRINT(this, 3, "input %d unscheduled for %" PRIu64 " @%" PRIu64 "\n",
input.index, input.blocked_time, input.reader->get_last_timestamp());
} else {
Expand Down
2 changes: 1 addition & 1 deletion clients/drcachesim/scheduler/scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,7 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
* #TRACE_MARKER_TYPE_SYSCALL_UNSCHEDULE), after this amount of time those
* inputs are all re-scheduled.
*/
uint64_t block_time_max = 2500000;
uint64_t block_time_max = 250000;
// XXX: Should we share the file-to-reader code currently in the scheduler
// with the analyzer and only then need reader interfaces and not pass paths
// to the scheduler?
Expand Down
11 changes: 6 additions & 5 deletions clients/drcachesim/tests/scheduler_unit_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4246,7 +4246,7 @@ test_direct_switch()
// has significant blocked time left. But then after B is scheduled and finishes,
// we still have to wait for C's block time so we see idle underscores:
static const char *const CORE0_SCHED_STRING =
"...AA..........CC.......A....BBBB.__________C....";
"...AA..........CC.......A....BBBB._______________C....";
std::vector<scheduler_t::input_workload_t> sched_inputs;
sched_inputs.emplace_back(std::move(readers));
scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_ANY_OUTPUT,
Expand Down Expand Up @@ -4325,7 +4325,7 @@ test_unscheduled()
static constexpr int QUANTUM_DURATION = 100; // Never reached.
static constexpr int BLOCK_LATENCY = 100;
static constexpr double BLOCK_SCALE = 1. / (BLOCK_LATENCY);
static constexpr int SWITCH_TIMEOUT = 2000;
static constexpr int SWITCH_TIMEOUT = 1000;
static constexpr memref_tid_t TID_BASE = 100;
static constexpr memref_tid_t TID_A = TID_BASE + 0;
static constexpr memref_tid_t TID_B = TID_BASE + 1;
Expand Down Expand Up @@ -4451,7 +4451,7 @@ test_unscheduled()
// has lapsed so it runs; finally we wait idle for C's long block to finish,
// after which C runs and *does not unschedule* b/c of B's prior request.
static const char *const CORE0_SCHED_STRING =
"...AA.........B........CC.....______________B......B......B....A......BBBB."
"...AA.........B........CC.....________B......B......B....A......BBBB.______"
"A._________________C......C.";

std::vector<scheduler_t::input_workload_t> sched_inputs;
Expand Down Expand Up @@ -4626,9 +4626,10 @@ test_unscheduled_fallback()
readers.emplace_back(std::unique_ptr<mock_reader_t>(new mock_reader_t(refs_C)),
std::unique_ptr<mock_reader_t>(new mock_reader_t()), TID_C);
// This looks like the schedule in test_unscheduled() up until "..A.." when
// we have a BLOCK_TIME_MAX-long idle period:
// we have an idle period equal to the rebalance_period from the start
// (so BLOCK_TIME_MAX minus what was run).
static const char *const CORE0_SCHED_STRING =
"...AA.........B........CC.....______________B......B....A.....______________"
"...AA.........B........CC.....__________________B......B....A.....__________"
"_________C._________________________________________________________________"
"____________________________________________________________________________"
"____________________________________________________________________________"
Expand Down

0 comments on commit cac7cda

Please sign in to comment.