Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#6938 sched migrate: Enforce migration threshold at the start #7038

Merged
merged 2 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions clients/drcachesim/scheduler/scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2780,22 +2780,33 @@ scheduler_tmpl_t<RecordType, ReaderType>::pop_from_ready_queue_hold_locks(
found_candidate = true;
else {
assert(cur_time > 0 || res->last_run_time == 0);
if (res->last_run_time == 0) {
// For never-executed inputs we consider their last execution
// to be the very first simulation time, which we can't
// easily initialize until here.
res->last_run_time = outputs_[from_output].initial_cur_time;
}
VPRINT(this, 5,
"migration check %d to %d: cur=%" PRIu64 " last=%" PRIu64
" delta=%" PRId64 " vs thresh %" PRIu64 "\n",
from_output, for_output, cur_time, res->last_run_time,
cur_time - res->last_run_time,
options_.migration_threshold_us);
// Guard against time going backward (happens for wall-clock: i#6966).
if (options_.migration_threshold_us == 0 || res->last_run_time == 0 ||
if (options_.migration_threshold_us == 0 ||
// Allow free movement for the initial load balance at init time.
cur_time == 0 ||
(cur_time > res->last_run_time &&
cur_time - res->last_run_time >=
static_cast<uint64_t>(options_.migration_threshold_us *
options_.time_units_per_us))) {
VPRINT(this, 2, "migrating %d to %d\n", from_output, for_output);
found_candidate = true;
++outputs_[from_output]
.stats[memtrace_stream_t::SCHED_STAT_MIGRATIONS];
// Do not count an initial rebalance as a migration.
if (cur_time > 0) {
++outputs_[from_output]
.stats[memtrace_stream_t::SCHED_STAT_MIGRATIONS];
}
}
}
if (found_candidate)
Expand Down Expand Up @@ -3789,6 +3800,9 @@ scheduler_tmpl_t<RecordType, ReaderType>::next_record(output_ordinal_t output,
cur_time = 1 + outputs_[output].stream->get_output_instruction_ordinal() +
outputs_[output].idle_count;
}
if (outputs_[output].initial_cur_time == 0) {
outputs_[output].initial_cur_time = cur_time;
}
// Invalid values for cur_time are checked below.
outputs_[output].cur_time->store(cur_time, std::memory_order_release);
if (!outputs_[output].active->load(std::memory_order_acquire)) {
Expand Down
2 changes: 2 additions & 0 deletions clients/drcachesim/scheduler/scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -1692,6 +1692,8 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
// This is accessed by other outputs for stealing and rebalancing.
// Indirected so we can store it in our vector.
std::unique_ptr<std::atomic<uint64_t>> cur_time;
// The first simulation time passed to this output.
uint64_t initial_cur_time = 0;
// Used for MAP_TO_RECORDED_OUTPUT get_output_cpuid().
int64_t as_traced_cpuid = -1;
// Used for MAP_AS_PREVIOUSLY with live_replay_output_count_.
Expand Down
88 changes: 88 additions & 0 deletions clients/drcachesim/tests/scheduler_unit_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6008,6 +6008,93 @@ test_rebalancing()
}
}

static void
test_initial_migrate()
{
std::cerr << "\n----------------\nTesting initial migrations\n";
// We want to ensures migration thresholds are applied to never-executed inputs.
static constexpr int NUM_OUTPUTS = 2;
static constexpr memref_tid_t TID_BASE = 100;
static constexpr memref_tid_t TID_A = TID_BASE + 0;
static constexpr memref_tid_t TID_B = TID_BASE + 1;
static constexpr memref_tid_t TID_C = TID_BASE + 2;
static constexpr uint64_t TIMESTAMP_START = 10;

// We have 3 inputs and 2 outputs. We expect a round-robin initial assignment
// to put A and C on output #0 and B on #1.
// B will finish #1 and then try to steal C from A.
std::vector<trace_entry_t> refs_A = {
/* clang-format off */
make_thread(TID_A),
make_pid(1),
make_version(4),
make_timestamp(TIMESTAMP_START),
make_marker(TRACE_MARKER_TYPE_CPU_ID, 0),
make_instr(10),
make_instr(11),
make_instr(12),
make_instr(13),
make_instr(14),
make_instr(15),
make_exit(TID_A),
/* clang-format on */
};
std::vector<trace_entry_t> refs_B = {
/* clang-format off */
make_thread(TID_B),
make_pid(1),
make_version(4),
make_timestamp(TIMESTAMP_START),
make_marker(TRACE_MARKER_TYPE_CPU_ID, 0),
make_instr(20),
make_exit(TID_B),
/* clang-format on */
};
std::vector<trace_entry_t> refs_C = {
/* clang-format off */
make_thread(TID_C),
make_pid(1),
make_version(4),
make_timestamp(TIMESTAMP_START + 10),
make_marker(TRACE_MARKER_TYPE_CPU_ID, 0),
make_instr(30),
make_instr(31),
make_instr(32),
make_exit(TID_C),
/* clang-format on */
};

std::vector<scheduler_t::input_reader_t> readers;
readers.emplace_back(std::unique_ptr<mock_reader_t>(new mock_reader_t(refs_A)),
std::unique_ptr<mock_reader_t>(new mock_reader_t()), TID_A);
readers.emplace_back(std::unique_ptr<mock_reader_t>(new mock_reader_t(refs_B)),
std::unique_ptr<mock_reader_t>(new mock_reader_t()), TID_B);
readers.emplace_back(std::unique_ptr<mock_reader_t>(new mock_reader_t(refs_C)),
std::unique_ptr<mock_reader_t>(new mock_reader_t()), TID_C);
std::vector<scheduler_t::input_workload_t> sched_inputs;
sched_inputs.emplace_back(std::move(readers));
scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_ANY_OUTPUT,
scheduler_t::DEPENDENCY_TIMESTAMPS,
scheduler_t::SCHEDULER_DEFAULTS,
/*verbosity=*/3);
scheduler_t scheduler;
if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) !=
scheduler_t::STATUS_SUCCESS)
assert(false);
std::vector<std::string> sched_as_string =
run_lockstep_simulation(scheduler, NUM_OUTPUTS, TID_BASE, /*send_time=*/true);
// We should see zero migrations.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add " ... even though C was moved to a different core than what it was initially assigned to"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It wasn't moved, actually, which is what we want to test for. I changed this to "// We should see zero migrations since output #1 failed to steal C from output #0." I also augmented the comment above to say:

// We have 3 inputs and 2 outputs. We expect a round-robin initial assignment
// to put A and C on output #0 and B on #1.
// B will finish #1 and then try to steal C from A but should fail if initial
// migrations have to wait for the threshold as though the input just ran
// right before the trace started, which is how we treat them now.

static const char *const CORE0_SCHED_STRING = "...AAAAAA....CCC.";
static const char *const CORE1_SCHED_STRING = "...B.____________";
for (int i = 0; i < NUM_OUTPUTS; i++) {
assert(scheduler.get_stream(i)->get_schedule_statistic(
memtrace_stream_t::SCHED_STAT_MIGRATIONS) == 0);
std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n";
}
assert(sched_as_string[0] == CORE0_SCHED_STRING);
assert(sched_as_string[1] == CORE1_SCHED_STRING);
}

static void
test_exit_early()
{
Expand Down Expand Up @@ -6249,6 +6336,7 @@ test_main(int argc, const char *argv[])
test_random_schedule();
test_record_scheduler();
test_rebalancing();
test_initial_migrate();
test_exit_early();
test_marker_updates();

Expand Down
Loading