Skip to content

Commit

Permalink
i#6938 sched migrate: Add scheduler statistics (#6939)
Browse files Browse the repository at this point in the history
Adds schedule statistics to memtrace_stream.h. Implements these
statistics in the streams returned by scheduler_t. This initial round
includes the following values:
```
[scheduler] Stats for output #0
[scheduler]   Switch input->input      :        16
[scheduler]   Switch input->idle       :         4
[scheduler]   Switch idle->input       :         3
[scheduler]   Switch nop               :       119
[scheduler]   Quantum preempts         :       131
[scheduler]   Direct switch attempts   :         0
[scheduler]   Direct switch successes  :         0
```

The switches are split into those 4 categories to make it easier to
compare to other sources of switch counts, such as `perf` where `perf`
limited to a cgroup or process will be missing the `idle->input`
switches, or schedule_stats which is missing the `input->idle` today.

Adds checks that these match the schedule_stats tool's values.

Adds tests of the values to several key scheduler unit tests.

Issue: #6938
  • Loading branch information
derekbruening authored Aug 28, 2024
1 parent 57c8e11 commit 8db35ac
Show file tree
Hide file tree
Showing 5 changed files with 256 additions and 12 deletions.
44 changes: 44 additions & 0 deletions clients/drcachesim/common/memtrace_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,39 @@ namespace drmemtrace { /**< DrMemtrace tracing + simulation infrastructure names
*/
class memtrace_stream_t {
public:
/**
* Statistics on the resulting schedule from interleaving and switching
* between the inputs in core-sharded modes.
*/
enum schedule_statistic_t {
/** Count of context switches away from an input to a different input. */
SCHED_STAT_SWITCH_INPUT_TO_INPUT,
/** Count of context switches away from an input to an idle state. */
SCHED_STAT_SWITCH_INPUT_TO_IDLE,
/**
* Count of context switches away from idle to an input.
* This does not include the initial assignment of an input to a core.
*/
SCHED_STAT_SWITCH_IDLE_TO_INPUT,
/**
* Count of quantum preempt points where the same input remains in place
* as nothing else of equal or greater priority is available.
*/
SCHED_STAT_SWITCH_NOP,
/**
* Count of preempts due to quantum expiration. Includes instances
* of the quantum expiring but no switch happening (but #SCHED_STAT_SWITCH_NOP
* can be used to separate those).
*/
SCHED_STAT_QUANTUM_PREEMPTS,
/** Count of #TRACE_MARKER_TYPE_DIRECT_THREAD_SWITCH markers. */
SCHED_STAT_DIRECT_SWITCH_ATTEMPTS,
/** Count of #TRACE_MARKER_TYPE_DIRECT_THREAD_SWITCH attempts that succeeded. */
SCHED_STAT_DIRECT_SWITCH_SUCCESSES,
/** Count of statistic types. */
SCHED_STAT_TYPE_COUNT,
};

/** Destructor. */
virtual ~memtrace_stream_t()
{
Expand Down Expand Up @@ -240,6 +273,17 @@ class memtrace_stream_t {
{
return false;
}

/**
* Returns the value of the specified statistic for this output stream.
* The values for all output stream must be summed to obtain global counts.
* Returns -1 if statistics are not supported for this stream.
*/
virtual double
get_schedule_statistic(schedule_statistic_t stat) const
{
return -1;
}
};

/**
Expand Down
77 changes: 68 additions & 9 deletions clients/drcachesim/scheduler/scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,28 @@ scheduler_tmpl_t<RecordType, ReaderType>::stream_t::set_active(bool active)
* Scheduler.
*/

template <typename RecordType, typename ReaderType>
scheduler_tmpl_t<RecordType, ReaderType>::~scheduler_tmpl_t()
{
for (unsigned int i = 0; i < outputs_.size(); ++i) {
VPRINT(this, 1, "Stats for output #%d\n", i);
VPRINT(this, 1, " %-25s: %9" PRId64 "\n", "Switch input->input",
outputs_[i].stats[memtrace_stream_t::SCHED_STAT_SWITCH_INPUT_TO_INPUT]);
VPRINT(this, 1, " %-25s: %9" PRId64 "\n", "Switch input->idle",
outputs_[i].stats[memtrace_stream_t::SCHED_STAT_SWITCH_INPUT_TO_IDLE]);
VPRINT(this, 1, " %-25s: %9" PRId64 "\n", "Switch idle->input",
outputs_[i].stats[memtrace_stream_t::SCHED_STAT_SWITCH_IDLE_TO_INPUT]);
VPRINT(this, 1, " %-25s: %9" PRId64 "\n", "Switch nop",
outputs_[i].stats[memtrace_stream_t::SCHED_STAT_SWITCH_NOP]);
VPRINT(this, 1, " %-25s: %9" PRId64 "\n", "Quantum preempts",
outputs_[i].stats[memtrace_stream_t::SCHED_STAT_QUANTUM_PREEMPTS]);
VPRINT(this, 1, " %-25s: %9" PRId64 "\n", "Direct switch attempts",
outputs_[i].stats[memtrace_stream_t::SCHED_STAT_DIRECT_SWITCH_ATTEMPTS]);
VPRINT(this, 1, " %-25s: %9" PRId64 "\n", "Direct switch successes",
outputs_[i].stats[memtrace_stream_t::SCHED_STAT_DIRECT_SWITCH_SUCCESSES]);
}
}

template <typename RecordType, typename ReaderType>
bool
scheduler_tmpl_t<RecordType, ReaderType>::check_valid_input_limits(
Expand Down Expand Up @@ -2117,7 +2139,7 @@ scheduler_tmpl_t<RecordType, ReaderType>::advance_region_of_interest(
input.cur_region);
if (input.cur_region >= static_cast<int>(input.regions_of_interest.size())) {
if (input.at_eof)
return eof_or_idle(output, /*hold_sched_lock=*/false);
return eof_or_idle(output, /*hold_sched_lock=*/false, input.index);
else {
// We let the user know we're done.
if (options_.schedule_record_ostream != nullptr) {
Expand Down Expand Up @@ -2592,7 +2614,7 @@ scheduler_tmpl_t<RecordType, ReaderType>::set_cur_input(output_ordinal_t output,
return STATUS_OK;

int prev_workload = -1;
if (outputs_[output].prev_input >= 0) {
if (outputs_[output].prev_input >= 0 && outputs_[output].prev_input != input) {
std::lock_guard<std::mutex> lock(*inputs_[outputs_[output].prev_input].lock);
prev_workload = inputs_[outputs_[output].prev_input].workload;
}
Expand Down Expand Up @@ -2681,7 +2703,7 @@ scheduler_tmpl_t<RecordType, ReaderType>::pick_next_input_as_previously(
outputs_[output].at_eof = true;
live_replay_output_count_.fetch_add(-1, std::memory_order_release);
}
return eof_or_idle(output, need_sched_lock());
return eof_or_idle(output, need_sched_lock(), outputs_[output].cur_input);
}
const schedule_record_t &segment =
outputs_[output].record[outputs_[output].record_index + 1];
Expand Down Expand Up @@ -2895,6 +2917,8 @@ scheduler_tmpl_t<RecordType, ReaderType>::pick_next_input(output_ordinal_t outpu
target->blocked_time = 0;
target->unscheduled = false;
}
++outputs_[output].stats
[memtrace_stream_t::SCHED_STAT_DIRECT_SWITCH_SUCCESSES];
} else if (unscheduled_priority_.find(target)) {
target->unscheduled = false;
unscheduled_priority_.erase(target);
Expand All @@ -2905,6 +2929,8 @@ scheduler_tmpl_t<RecordType, ReaderType>::pick_next_input(output_ordinal_t outpu
"@%" PRIu64 "\n",
output, prev_index, target->index,
inputs_[prev_index].reader->get_last_timestamp());
++outputs_[output].stats
[memtrace_stream_t::SCHED_STAT_DIRECT_SWITCH_SUCCESSES];
} else {
// We assume that inter-input dependencies are captured in
// the _DIRECT_THREAD_SWITCH, _UNSCHEDULE, and _SCHEDULE markers
Expand All @@ -2928,11 +2954,11 @@ scheduler_tmpl_t<RecordType, ReaderType>::pick_next_input(output_ordinal_t outpu
// We found a direct switch target above.
} else if (ready_queue_empty() && blocked_time == 0) {
if (prev_index == INVALID_INPUT_ORDINAL)
return eof_or_idle(output, need_lock);
return eof_or_idle(output, need_lock, prev_index);
auto lock = std::unique_lock<std::mutex>(*inputs_[prev_index].lock);
if (inputs_[prev_index].at_eof) {
lock.unlock();
return eof_or_idle(output, need_lock);
return eof_or_idle(output, need_lock, prev_index);
} else
index = prev_index; // Go back to prior.
} else {
Expand All @@ -2955,12 +2981,17 @@ scheduler_tmpl_t<RecordType, ReaderType>::pick_next_input(output_ordinal_t outpu
if (record_status != sched_type_t::STATUS_OK)
return record_status;
}
if (prev_index != INVALID_INPUT_ORDINAL) {
++outputs_[output]
.stats[memtrace_stream_t::
SCHED_STAT_SWITCH_INPUT_TO_IDLE];
}
}
return status;
}
if (queue_next == nullptr) {
assert(blocked_time == 0 || prev_index == INVALID_INPUT_ORDINAL);
return eof_or_idle(output, need_lock);
return eof_or_idle(output, need_lock, prev_index);
}
index = queue_next->index;
}
Expand All @@ -2975,7 +3006,7 @@ scheduler_tmpl_t<RecordType, ReaderType>::pick_next_input(output_ordinal_t outpu
}
}
if (index < 0)
return eof_or_idle(output, need_lock);
return eof_or_idle(output, need_lock, prev_index);
VPRINT(this, 2,
"next_record[%d]: advancing to timestamp %" PRIu64
" == input #%d\n",
Expand Down Expand Up @@ -3017,6 +3048,16 @@ scheduler_tmpl_t<RecordType, ReaderType>::pick_next_input(output_ordinal_t outpu
}
break;
}
// We can't easily place these stats inside set_cur_input() as we call that to
// temporarily give up our input.
if (prev_index == index)
++outputs_[output].stats[memtrace_stream_t::SCHED_STAT_SWITCH_NOP];
else if (prev_index != INVALID_INPUT_ORDINAL && index != INVALID_INPUT_ORDINAL)
++outputs_[output].stats[memtrace_stream_t::SCHED_STAT_SWITCH_INPUT_TO_INPUT];
else if (index == INVALID_INPUT_ORDINAL)
++outputs_[output].stats[memtrace_stream_t::SCHED_STAT_SWITCH_INPUT_TO_IDLE];
else
++outputs_[output].stats[memtrace_stream_t::SCHED_STAT_SWITCH_IDLE_TO_INPUT];
set_cur_input(output, index);
return res;
}
Expand Down Expand Up @@ -3056,6 +3097,7 @@ scheduler_tmpl_t<RecordType, ReaderType>::process_marker(input_info_t &input,
case TRACE_MARKER_TYPE_DIRECT_THREAD_SWITCH: {
if (!options_.honor_direct_switches)
break;
++outputs_[output].stats[memtrace_stream_t::SCHED_STAT_DIRECT_SWITCH_ATTEMPTS];
memref_tid_t target_tid = marker_value;
auto it = tid2input_.find(workload_tid_t(input.workload, target_tid));
if (it == tid2input_.end()) {
Expand Down Expand Up @@ -3213,7 +3255,7 @@ scheduler_tmpl_t<RecordType, ReaderType>::next_record(output_ordinal_t output,
if (outputs_[output].cur_input < 0) {
// This happens with more outputs than inputs. For non-empty outputs we
// require cur_input to be set to >=0 during init().
return eof_or_idle(output, /*hold_sched_lock=*/false);
return eof_or_idle(output, /*hold_sched_lock=*/false, outputs_[output].cur_input);
}
input = &inputs_[outputs_[output].cur_input];
auto lock = std::unique_lock<std::mutex>(*input->lock);
Expand Down Expand Up @@ -3404,6 +3446,8 @@ scheduler_tmpl_t<RecordType, ReaderType>::next_record(output_ordinal_t output,
preempt = !need_new_input;
need_new_input = true;
input->instrs_in_quantum = 0;
++outputs_[output]
.stats[memtrace_stream_t::SCHED_STAT_QUANTUM_PREEMPTS];
}
} else if (options_.quantum_unit == QUANTUM_TIME) {
if (cur_time == 0 || cur_time < input->prev_time_in_quantum) {
Expand All @@ -3427,6 +3471,8 @@ scheduler_tmpl_t<RecordType, ReaderType>::next_record(output_ordinal_t output,
preempt = !need_new_input;
need_new_input = true;
input->time_spent_in_quantum = 0;
++outputs_[output]
.stats[memtrace_stream_t::SCHED_STAT_QUANTUM_PREEMPTS];
}
}
}
Expand Down Expand Up @@ -3613,7 +3659,8 @@ scheduler_tmpl_t<RecordType, ReaderType>::mark_input_eof(input_info_t &input)
template <typename RecordType, typename ReaderType>
typename scheduler_tmpl_t<RecordType, ReaderType>::stream_status_t
scheduler_tmpl_t<RecordType, ReaderType>::eof_or_idle(output_ordinal_t output,
bool hold_sched_lock)
bool hold_sched_lock,
input_ordinal_t prev_input)
{
// XXX i#6831: Refactor to use subclasses or templates to specialize
// scheduler code based on mapping options, to avoid these top-level
Expand Down Expand Up @@ -3672,6 +3719,8 @@ scheduler_tmpl_t<RecordType, ReaderType>::eof_or_idle(output_ordinal_t output,
}
}
outputs_[output].waiting = true;
if (prev_input != INVALID_INPUT_ORDINAL)
++outputs_[output].stats[memtrace_stream_t::SCHED_STAT_SWITCH_INPUT_TO_IDLE];
set_cur_input(output, INVALID_INPUT_ORDINAL);
return sched_type_t::STATUS_IDLE;
}
Expand All @@ -3687,6 +3736,16 @@ scheduler_tmpl_t<RecordType, ReaderType>::is_record_kernel(output_ordinal_t outp
return inputs_[index].reader->is_record_kernel();
}

template <typename RecordType, typename ReaderType>
double
scheduler_tmpl_t<RecordType, ReaderType>::get_statistic(
output_ordinal_t output, memtrace_stream_t::schedule_statistic_t stat) const
{
if (stat >= memtrace_stream_t::SCHED_STAT_TYPE_COUNT)
return -1;
return static_cast<double>(outputs_[output].stats[stat]);
}

template <typename RecordType, typename ReaderType>
typename scheduler_tmpl_t<RecordType, ReaderType>::stream_status_t
scheduler_tmpl_t<RecordType, ReaderType>::set_output_active(output_ordinal_t output,
Expand Down
27 changes: 25 additions & 2 deletions clients/drcachesim/scheduler/scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -1129,6 +1129,18 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
return scheduler_->is_record_kernel(ordinal_);
}

/**
* Returns the value of the specified statistic for this output stream.
* The values for all output streams must be summed to obtain global counts.
* These statistics are not guaranteed to be accurate when replaying a
* prior schedule via #MAP_TO_RECORDED_OUTPUT.
*/
double
get_schedule_statistic(schedule_statistic_t stat) const override
{
return scheduler_->get_statistic(ordinal_, stat);
}

protected:
scheduler_tmpl_t<RecordType, ReaderType> *scheduler_ = nullptr;
int ordinal_ = -1;
Expand Down Expand Up @@ -1157,7 +1169,7 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
: ready_priority_(static_cast<int>(get_time_micros()))
{
}
virtual ~scheduler_tmpl_t() = default;
virtual ~scheduler_tmpl_t();

/**
* Initializes the scheduler for the given inputs, count of output streams, and
Expand Down Expand Up @@ -1444,6 +1456,9 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
bool at_eof = false;
// Used for replaying wait periods.
uint64_t wait_start_time = 0;
// Exported statistics. Currently all integers and cast to double on export.
std::vector<int64_t> stats =
std::vector<int64_t>(memtrace_stream_t::SCHED_STAT_TYPE_COUNT);
};

// Used for reading as-traced schedules.
Expand Down Expand Up @@ -1788,13 +1803,21 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
// Determines whether to exit or wait for other outputs when one output
// runs out of things to do. May end up scheduling new inputs.
stream_status_t
eof_or_idle(output_ordinal_t output, bool hold_sched_lock);
eof_or_idle(output_ordinal_t output, bool hold_sched_lock,
input_ordinal_t prev_input);

// Returns whether the current record for the current input stream scheduled on
// the 'output_ordinal'-th output stream is from a part of the trace corresponding
// to kernel execution.
bool
is_record_kernel(output_ordinal_t output);

// These statistics are not guaranteed to be accurate when replaying a
// prior schedule.
double
get_statistic(output_ordinal_t output,
memtrace_stream_t::schedule_statistic_t stat) const;

///////////////////////////////////////////////////////////////////////////
// Support for ready queues for who to schedule next:

Expand Down
Loading

0 comments on commit 8db35ac

Please sign in to comment.