DynamoRIO · derekbruening · Oct 11, 2024 · Oct 11, 2024 · Oct 11, 2024 · abhinav92003
diff --git a/clients/drcachesim/scheduler/scheduler.cpp b/clients/drcachesim/scheduler/scheduler.cpp
@@ -2780,22 +2780,33 @@ scheduler_tmpl_t<RecordType, ReaderType>::pop_from_ready_queue_hold_locks(
                     found_candidate = true;
                 else {
                     assert(cur_time > 0 || res->last_run_time == 0);
+                    if (res->last_run_time == 0) {
+                        // For never-executed inputs we consider their last execution
+                        // to be the very first simulation time, which we can't
+                        // easily initialize until here.
+                        res->last_run_time = outputs_[from_output].initial_cur_time;
+                    }
                     VPRINT(this, 5,
                            "migration check %d to %d: cur=%" PRIu64 " last=%" PRIu64
                            " delta=%" PRId64 " vs thresh %" PRIu64 "\n",
                            from_output, for_output, cur_time, res->last_run_time,
                            cur_time - res->last_run_time,
                            options_.migration_threshold_us);
                     // Guard against time going backward (happens for wall-clock: i#6966).
-                    if (options_.migration_threshold_us == 0 || res->last_run_time == 0 ||
+                    if (options_.migration_threshold_us == 0 ||
+                        // Allow free movement for the initial load balance at init time.
+                        cur_time == 0 ||
                         (cur_time > res->last_run_time &&
                          cur_time - res->last_run_time >=
                              static_cast<uint64_t>(options_.migration_threshold_us *
                                                    options_.time_units_per_us))) {
                         VPRINT(this, 2, "migrating %d to %d\n", from_output, for_output);
                         found_candidate = true;
-                        ++outputs_[from_output]
-                              .stats[memtrace_stream_t::SCHED_STAT_MIGRATIONS];
+                        // Do not count an initial rebalance as a migration.
+                        if (cur_time > 0) {
+                            ++outputs_[from_output]
+                                  .stats[memtrace_stream_t::SCHED_STAT_MIGRATIONS];
+                        }
                     }
                 }
                 if (found_candidate)
@@ -3789,6 +3800,9 @@ scheduler_tmpl_t<RecordType, ReaderType>::next_record(output_ordinal_t output,
         cur_time = 1 + outputs_[output].stream->get_output_instruction_ordinal() +
             outputs_[output].idle_count;
     }
+    if (outputs_[output].initial_cur_time == 0) {
+        outputs_[output].initial_cur_time = cur_time;
+    }
     // Invalid values for cur_time are checked below.
     outputs_[output].cur_time->store(cur_time, std::memory_order_release);
     if (!outputs_[output].active->load(std::memory_order_acquire)) {

diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h
@@ -1692,6 +1692,8 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
         // This is accessed by other outputs for stealing and rebalancing.
         // Indirected so we can store it in our vector.
         std::unique_ptr<std::atomic<uint64_t>> cur_time;
+        // The first simulation time passed to this output.
+        uint64_t initial_cur_time = 0;
         // Used for MAP_TO_RECORDED_OUTPUT get_output_cpuid().
         int64_t as_traced_cpuid = -1;
         // Used for MAP_AS_PREVIOUSLY with live_replay_output_count_.

diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp
@@ -6008,6 +6008,93 @@ test_rebalancing()
     }
 }
 
+static void
+test_initial_migrate()
+{
+    std::cerr << "\n----------------\nTesting initial migrations\n";
+    // We want to ensures migration thresholds are applied to never-executed inputs.
+    static constexpr int NUM_OUTPUTS = 2;
+    static constexpr memref_tid_t TID_BASE = 100;
+    static constexpr memref_tid_t TID_A = TID_BASE + 0;
+    static constexpr memref_tid_t TID_B = TID_BASE + 1;
+    static constexpr memref_tid_t TID_C = TID_BASE + 2;
+    static constexpr uint64_t TIMESTAMP_START = 10;
+
+    // We have 3 inputs and 2 outputs. We expect a round-robin initial assignment
+    // to put A and C on output #0 and B on #1.
+    // B will finish #1 and then try to steal C from A.
+    std::vector<trace_entry_t> refs_A = {
+        /* clang-format off */
+        make_thread(TID_A),
+        make_pid(1),
+        make_version(4),
+        make_timestamp(TIMESTAMP_START),
+        make_marker(TRACE_MARKER_TYPE_CPU_ID, 0),
+        make_instr(10),
+        make_instr(11),
+        make_instr(12),
+        make_instr(13),
+        make_instr(14),
+        make_instr(15),
+        make_exit(TID_A),
+        /* clang-format on */
+    };
+    std::vector<trace_entry_t> refs_B = {
+        /* clang-format off */
+        make_thread(TID_B),
+        make_pid(1),
+        make_version(4),
+        make_timestamp(TIMESTAMP_START),
+        make_marker(TRACE_MARKER_TYPE_CPU_ID, 0),
+        make_instr(20),
+        make_exit(TID_B),
+        /* clang-format on */
+    };
+    std::vector<trace_entry_t> refs_C = {
+        /* clang-format off */
+        make_thread(TID_C),
+        make_pid(1),
+        make_version(4),
+        make_timestamp(TIMESTAMP_START + 10),
+        make_marker(TRACE_MARKER_TYPE_CPU_ID, 0),
+        make_instr(30),
+        make_instr(31),
+        make_instr(32),
+        make_exit(TID_C),
+        /* clang-format on */
+    };
+
+    std::vector<scheduler_t::input_reader_t> readers;
+    readers.emplace_back(std::unique_ptr<mock_reader_t>(new mock_reader_t(refs_A)),
+                         std::unique_ptr<mock_reader_t>(new mock_reader_t()), TID_A);
+    readers.emplace_back(std::unique_ptr<mock_reader_t>(new mock_reader_t(refs_B)),
+                         std::unique_ptr<mock_reader_t>(new mock_reader_t()), TID_B);
+    readers.emplace_back(std::unique_ptr<mock_reader_t>(new mock_reader_t(refs_C)),
+                         std::unique_ptr<mock_reader_t>(new mock_reader_t()), TID_C);
+    std::vector<scheduler_t::input_workload_t> sched_inputs;
+    sched_inputs.emplace_back(std::move(readers));
+    scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_ANY_OUTPUT,
+                                               scheduler_t::DEPENDENCY_TIMESTAMPS,
+                                               scheduler_t::SCHEDULER_DEFAULTS,
+                                               /*verbosity=*/3);
+    scheduler_t scheduler;
+    if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) !=
+        scheduler_t::STATUS_SUCCESS)
+        assert(false);
+    std::vector<std::string> sched_as_string =
+        run_lockstep_simulation(scheduler, NUM_OUTPUTS, TID_BASE, /*send_time=*/true);
+    // We should see zero migrations.
+    static const char *const CORE0_SCHED_STRING = "...AAAAAA....CCC.";
+    static const char *const CORE1_SCHED_STRING = "...B.____________";
+    for (int i = 0; i < NUM_OUTPUTS; i++) {
+        assert(scheduler.get_stream(i)->get_schedule_statistic(
+                   memtrace_stream_t::SCHED_STAT_MIGRATIONS) == 0);
+        std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n";
+    }
+    assert(sched_as_string[0] == CORE0_SCHED_STRING);
+    assert(sched_as_string[1] == CORE1_SCHED_STRING);
+}
+
 static void
 test_exit_early()
 {
@@ -6249,6 +6336,7 @@ test_main(int argc, const char *argv[])
     test_random_schedule();
     test_record_scheduler();
     test_rebalancing();
+    test_initial_migrate();
     test_exit_early();
     test_marker_updates();