diff --git a/api/docs/release.dox b/api/docs/release.dox index d1fde925f0a..0eba0910da3 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -245,6 +245,8 @@ Further non-compatibility-affecting changes include: markers based on the function ID. This filter is enabled by "-filter_keep_func_ids" followed by a comma-separated list of function IDs to preserve in the output trace. All function markers whose ID is not in the list are removed. + - Added -skip_to_timestamp and #dynamorio::drmemtrace::scheduler_tmpl_t:: + input_workload_t::times_of_interest to the drmemtrace scheduler. **************************************************
diff --git a/clients/drcachesim/analyzer.cpp b/clients/drcachesim/analyzer.cpp index dff5bb535c3..6ffe60867dd 100644 --- a/clients/drcachesim/analyzer.cpp +++ b/clients/drcachesim/analyzer.cpp @@ -248,6 +248,9 @@ analyzer_tmpl_t::init_scheduler( if (only_thread != INVALID_THREAD_ID) { workload.only_threads.insert(only_thread); } + if (regions.empty() && skip_to_timestamp_ > 0) { + workload.times_of_interest.emplace_back(skip_to_timestamp_, 0); + } return init_scheduler_common(workload, std::move(options)); } @@ -308,12 +311,14 @@ analyzer_tmpl_t::init_scheduler_common( } } else if (parallel_) { sched_ops = sched_type_t::make_scheduler_parallel_options(verbosity_); + sched_ops.replay_as_traced_istream = options.replay_as_traced_istream; sched_ops.read_inputs_in_init = options.read_inputs_in_init; if (worker_count_ <= 0) worker_count_ = std::thread::hardware_concurrency(); output_count = worker_count_; } else { sched_ops = sched_type_t::make_scheduler_serial_options(verbosity_); + sched_ops.replay_as_traced_istream = options.replay_as_traced_istream; sched_ops.read_inputs_in_init = options.read_inputs_in_init; worker_count_ = 1; output_count = 1; diff --git a/clients/drcachesim/analyzer.h b/clients/drcachesim/analyzer.h index 43d8ce12083..794ea399eb4 100644 --- a/clients/drcachesim/analyzer.h +++ b/clients/drcachesim/analyzer.h @@ -424,6 +424,7 @@ template class analyzer_tmpl_t { int worker_count_; const char *output_prefix_ = "[analyzer]"; uint64_t skip_instrs_ = 0; + uint64_t skip_to_timestamp_ = 0; uint64_t interval_microseconds_ = 0; uint64_t interval_instr_count_ = 0; int verbosity_ = 0; diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp index c0a3cb57d5a..1389afcfa68 100644 --- a/clients/drcachesim/analyzer_multi.cpp +++ b/clients/drcachesim/analyzer_multi.cpp @@ -351,6 +351,13 @@ analyzer_multi_tmpl_t::analyzer_multi_tmpl_t() { this->worker_count_ = op_jobs.get_value(); this->skip_instrs_ = op_skip_instrs.get_value(); + this->skip_to_timestamp_ = op_skip_to_timestamp.get_value(); + if (this->skip_instrs_ > 0 && this->skip_to_timestamp_ > 0) { + this->error_string_ = "Usage error: only one of -skip_instrs and " + "-skip_to_timestamp can be used at a time"; + this->success_ = false; + return; + } this->interval_microseconds_ = op_interval_microseconds.get_value(); this->interval_instr_count_ = op_interval_instr_count.get_value(); // Initial measurements show it's sometimes faster to keep the parallel model @@ -437,6 +444,14 @@ analyzer_multi_tmpl_t::analyzer_multi_tmpl_t() this->parallel_ = false; } sched_ops = init_dynamic_schedule(); + } else if (op_skip_to_timestamp.get_value() > 0) { +#ifdef HAS_ZIP + if (!op_cpu_schedule_file.get_value().empty()) { + cpu_schedule_zip_.reset( + new zipfile_istream_t(op_cpu_schedule_file.get_value())); + sched_ops.replay_as_traced_istream = cpu_schedule_zip_.get(); + } +#endif } if (!op_indir.get_value().empty()) { @@ -521,9 +536,14 @@ analyzer_multi_tmpl_t::init_dynamic_schedule() sched_ops.deps = sched_type_t::DEPENDENCY_TIMESTAMPS; } else if (!op_cpu_schedule_file.get_value().empty()) { cpu_schedule_zip_.reset(new zipfile_istream_t(op_cpu_schedule_file.get_value())); - sched_ops.mapping = sched_type_t::MAP_TO_RECORDED_OUTPUT; - sched_ops.deps = sched_type_t::DEPENDENCY_TIMESTAMPS; sched_ops.replay_as_traced_istream = cpu_schedule_zip_.get(); + // -cpu_schedule_file is used for two different things: actually replaying, + // and just input for -skip_to_timestamp. Only if -skip_to_timestamp is 0 + // do we actually replay. + if (op_skip_to_timestamp.get_value() == 0) { + sched_ops.mapping = sched_type_t::MAP_TO_RECORDED_OUTPUT; + sched_ops.deps = sched_type_t::DEPENDENCY_TIMESTAMPS; + } } #endif sched_ops.kernel_switch_trace_path = op_sched_switch_file.get_value(); diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index 6d2c51e913e..34291d45e99 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -549,7 +549,8 @@ droption_t op_skip_instrs( "Specifies the number of instructions to skip in the beginning of the trace " "analysis. For serial iteration, this number is " "computed just once across the interleaving sequence of all threads; for parallel " - "iteration, each thread skips this many insructions. When built with zipfile " + "iteration, each thread skips this many instructions (see -skip_to_timestamp for " + "an alternative which does align all threads). When built with zipfile " "support, this skipping is optimized and large instruction counts can be quickly " "skipped; this is not the case for -skip_refs."); @@ -561,6 +562,17 @@ droption_t "of being simulated. This skipping may be slow for large skip values; " "consider -skip_instrs for a faster method of skipping."); +droption_t op_skip_to_timestamp( + DROPTION_SCOPE_FRONTEND, "skip_to_timestamp", 0, "Timestamp to start at", + "Specifies a timestamp to start at, skipping over prior records in the trace. " + "This is cross-cutting across all threads. If the target timestamp is not " + "present as a timestamp marker, interpolation is used to approximate the " + "target location in each thread. Only one of this and -skip_instrs can be " + "specified. Requires -cpu_schedule_file to also be specified as a schedule file " + "is required to translate the timestamp into per-thread instruction ordinals." + "When built with zipfile support, this skipping is optimized and large " + "instruction counts can be quickly skipped."); + droption_t op_L0_filter_until_instrs( DROPTION_SCOPE_CLIENT, "L0_filter_until_instrs", 0, "Number of instructions for warmup trace", @@ -901,9 +913,11 @@ droption_t op_replay_file(DROPTION_SCOPE_FRONTEND, "replay_file", " "Path with stored schedule for replay."); droption_t op_cpu_schedule_file(DROPTION_SCOPE_FRONTEND, "cpu_schedule_file", "", - "Path with stored as-traced schedule for replay", + "Path to as-traced schedule for replay or skip-to-timestamp", "Applies to -core_sharded and -core_serial. " - "Path with stored as-traced schedule for replay."); + "Path with stored as-traced schedule for replay. If specified " + "with a non-zero -skip_to_timestamp, there is no replay " + "and instead the file is used for the skip request."); #endif droption_t op_sched_switch_file( DROPTION_SCOPE_FRONTEND, "sched_switch_file", "", diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h index 20286b97a7c..a5bd3b3feaa 100644 --- a/clients/drcachesim/common/options.h +++ b/clients/drcachesim/common/options.h @@ -167,6 +167,7 @@ extern dynamorio::droption::droption_t extern dynamorio::droption::droption_t op_only_thread; extern dynamorio::droption::droption_t op_skip_instrs; extern dynamorio::droption::droption_t op_skip_refs; +extern dynamorio::droption::droption_t op_skip_to_timestamp; extern dynamorio::droption::droption_t op_warmup_refs; extern dynamorio::droption::droption_t op_warmup_fraction; extern dynamorio::droption::droption_t op_sim_refs; diff --git a/clients/drcachesim/reader/zipfile_file_reader.cpp b/clients/drcachesim/reader/zipfile_file_reader.cpp index acd1aebd9bd..7d03dd324a4 100644 --- a/clients/drcachesim/reader/zipfile_file_reader.cpp +++ b/clients/drcachesim/reader/zipfile_file_reader.cpp @@ -185,7 +185,7 @@ file_reader_t::skip_instructions(uint64_t instruction_count) { if (instruction_count == 0) return *this; - VPRINT(this, 2, "Skipping %" PRIi64 " instrs in %s\n", instruction_count, + VPRINT(this, 2, "Skipping %" PRIu64 " instrs in %s\n", instruction_count, input_file_.path.c_str()); if (!pre_skip_instructions()) return *this; @@ -199,7 +199,7 @@ file_reader_t::skip_instructions(uint64_t instruction_count) // know the chunk names to use with a single unzLocateFile. uint64_t stop_count = cur_instr_count_ + instruction_count + 1; VPRINT(this, 2, - "stop=%" PRIi64 " cur=%" PRIi64 " chunk=%" PRIi64 " est=%" PRIi64 "\n", + "stop=%" PRIu64 " cur=%" PRIu64 " chunk=%" PRIu64 " est=%" PRIu64 "\n", stop_count, cur_instr_count_, chunk_instr_count_, cur_instr_count_ + (chunk_instr_count_ - (cur_instr_count_ % chunk_instr_count_))); @@ -227,11 +227,11 @@ file_reader_t::skip_instructions(uint64_t instruction_count) return *this; } cur_instr_count_ += chunk_instr_count_ - (cur_instr_count_ % chunk_instr_count_); - VPRINT(this, 2, "At %" PRIi64 " instrs at start of new chunk\n", + VPRINT(this, 2, "At %" PRIu64 " instrs at start of new chunk\n", cur_instr_count_); VPRINT(this, 2, - "zip chunk stop=%" PRIi64 " cur=%" PRIi64 " chunk=%" PRIi64 - " end-of-chunk=%" PRIi64 "\n", + "zip chunk stop=%" PRIu64 " cur=%" PRIu64 " chunk=%" PRIu64 + " end-of-chunk=%" PRIu64 "\n", stop_count, cur_instr_count_, chunk_instr_count_, cur_instr_count_ + (chunk_instr_count_ - (cur_instr_count_ % chunk_instr_count_))); diff --git a/clients/drcachesim/scheduler/scheduler.cpp b/clients/drcachesim/scheduler/scheduler.cpp index b7bf1170d32..fc90a723ac9 100644 --- a/clients/drcachesim/scheduler/scheduler.cpp +++ b/clients/drcachesim/scheduler/scheduler.cpp @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -698,6 +699,18 @@ scheduler_tmpl_t::init( tid2input_[workload_tid_t(workload_idx, it.first)] = it.second; } } + if (!workload.times_of_interest.empty()) { + for (const auto &modifiers : workload.thread_modifiers) { + if (!modifiers.regions_of_interest.empty()) { + // We do not support mixing with other ROI specifiers. + return STATUS_ERROR_INVALID_PARAMETER; + } + } + sched_type_t::scheduler_status_t status = + create_regions_from_times(workload_tids, workload); + if (status != sched_type_t::STATUS_SUCCESS) + return STATUS_ERROR_INVALID_PARAMETER; + } for (const auto &modifiers : workload.thread_modifiers) { if (modifiers.struct_size != sizeof(input_thread_info_t)) return STATUS_ERROR_INVALID_PARAMETER; @@ -724,6 +737,8 @@ scheduler_tmpl_t::init( input.priority = modifiers.priority; for (size_t i = 0; i < modifiers.regions_of_interest.size(); ++i) { const auto &range = modifiers.regions_of_interest[i]; + VPRINT(this, 3, "ROI #%zu for input %d: [%" PRIu64 ", %" PRIu64 ")\n", + i, index, range.start_instruction, range.stop_instruction); if (range.start_instruction == 0 || (range.stop_instruction < range.start_instruction && range.stop_instruction != 0)) @@ -731,8 +746,10 @@ scheduler_tmpl_t::init( if (i == 0) continue; if (range.start_instruction <= - modifiers.regions_of_interest[i - 1].stop_instruction) + modifiers.regions_of_interest[i - 1].stop_instruction) { + error_string_ = "gap required between regions of interest"; return STATUS_ERROR_INVALID_PARAMETER; + } } input.regions_of_interest = modifiers.regions_of_interest; } @@ -852,7 +869,8 @@ scheduler_tmpl_t::set_initial_schedule( // benefit to reading the schedule file. The analyzer serial mode or other // special cases of one output don't set the replay_as_traced_istream // field.) - sched_type_t::scheduler_status_t status = read_traced_schedule(); + sched_type_t::scheduler_status_t status = + read_and_instantiate_traced_schedule(); if (status != sched_type_t::STATUS_SUCCESS) return STATUS_ERROR_INVALID_PARAMETER; // Now leverage the regular replay code. @@ -1066,37 +1084,9 @@ scheduler_tmpl_t::read_recorded_schedule() template typename scheduler_tmpl_t::scheduler_status_t -scheduler_tmpl_t::read_traced_schedule() +scheduler_tmpl_t::read_and_instantiate_traced_schedule() { - if (options_.replay_as_traced_istream == nullptr) - return STATUS_ERROR_INVALID_PARAMETER; - - schedule_entry_t entry(0, 0, 0, 0); - // See comment in read_recorded_schedule() on our assumption that we can - // easily fit the whole context switch sequence in memory. This cpu_schedule - // file has an entry per timestamp, though, even for consecutive ones on the same - // core, so it uses more memory. - // We do not have a subfile listing feature in archive_istream_t, but we can - // read sequentially as each record has a cpu field. - // This schedule_entry_t format doesn't have the stop instruction ordinal (as it was - // designed for skip targets only), so we take two passes to get that information. - // If we do find memory is an issue we could add a stop field to schedule_entry_t - // and collapse as we go, saving memory. - // We also need to translate the thread and cpu id values into 0-based ordinals. - std::unordered_map tid2input; - for (int i = 0; i < static_cast(inputs_.size()); ++i) { - tid2input[inputs_[i].tid] = i; - } std::vector> start2stop(inputs_.size()); - // We initially number the outputs according to their order in the file, and then - // sort by the stored cpuid below. - // XXX i#6726: Should we support some direction from the user on this? Simulation - // may want to preserve the NUMA relationships and may need to set up its simulated - // cores at init time, so it would prefer to partition by output stream identifier. - // Maybe we could at least add the proposed memtrace_stream_t query for cpuid and - // let it be called even before reading any records at all? - output_ordinal_t cur_output = 0; - uint64_t cur_cpu = std::numeric_limits::max(); // We also want to collapse same-cpu consecutive records so we start with // a temporary local vector. std::vector> all_sched(outputs_.size()); @@ -1105,45 +1095,8 @@ scheduler_tmpl_t::read_traced_schedule() // These hold entries added in the on-disk (unsorted) order. std::vector disk_ord2index; // Initially [i] holds i. std::vector disk_ord2cpuid; // [i] holds cpuid for entry i. - while (options_.replay_as_traced_istream->read(reinterpret_cast(&entry), - sizeof(entry))) { - if (entry.cpu != cur_cpu) { - // This is a zipfile component boundary: one conmponent per cpu. - if (cur_cpu != std::numeric_limits::max()) { - ++cur_output; - if (cur_output >= static_cast(outputs_.size())) { - error_string_ = "replay_as_traced_istream cpu count != output count"; - return STATUS_ERROR_INVALID_PARAMETER; - } - } - cur_cpu = entry.cpu; - disk_ord2cpuid.push_back(cur_cpu); - disk_ord2index.push_back(cur_output); - } - input_ordinal_t input = tid2input[entry.thread]; - // We'll fill in the stop ordinal in our second pass below. - uint64_t start = entry.start_instruction; - uint64_t timestamp = entry.timestamp; - // Some entries have no instructions (there is an entry for each timestamp, and - // a signal can come in after a prior timestamp with no intervening instrs). - if (!all_sched[cur_output].empty() && - input == all_sched[cur_output].back().input && - start == all_sched[cur_output].back().start_instruction) { - VPRINT(this, 3, - "Output #%d: as-read segment #%zu has no instructions: skipping\n", - cur_output, all_sched[cur_output].size() - 1); - continue; - } - all_sched[cur_output].emplace_back(true, input, start, timestamp); - start2stop[input].insert(start); - input_sched[input].emplace_back(cur_output, all_sched[cur_output].size() - 1, - start, timestamp); - } - sched_type_t::scheduler_status_t res = - check_and_fix_modulo_problem_in_schedule(input_sched, start2stop, all_sched); - if (res != sched_type_t::STATUS_SUCCESS) - return res; - res = remove_zero_instruction_segments(input_sched, all_sched); + sched_type_t::scheduler_status_t res = read_traced_schedule( + input_sched, start2stop, all_sched, disk_ord2index, disk_ord2cpuid); if (res != sched_type_t::STATUS_SUCCESS) return res; // Sort by cpuid to get a more natural ordering. @@ -1255,6 +1208,212 @@ scheduler_tmpl_t::read_traced_schedule() return STATUS_SUCCESS; } +template +typename scheduler_tmpl_t::scheduler_status_t +scheduler_tmpl_t::create_regions_from_times( + const std::unordered_map &workload_tids, + input_workload_t &workload) +{ + // First, read from the as-traced schedule file into data structures shared with + // replay-as-traced. + std::vector> input_sched(inputs_.size()); + // These are all unused. + std::vector> start2stop(inputs_.size()); + std::vector> all_sched; + std::vector disk_ord2index; + std::vector disk_ord2cpuid; + sched_type_t::scheduler_status_t res = read_traced_schedule( + input_sched, start2stop, all_sched, disk_ord2index, disk_ord2cpuid); + if (res != sched_type_t::STATUS_SUCCESS) + return res; + // Do not allow a replay mode to start later. + options_.replay_as_traced_istream = nullptr; + + // Now create an interval tree of timestamps (with instr ordinals as payloads) + // for each input. As our intervals do not overlap and have no gaps we need + // no size, just the start address key. + std::vector> time_tree(inputs_.size()); + for (int input_idx = 0; input_idx < static_cast(inputs_.size()); + ++input_idx) { + for (int sched_idx = 0; + sched_idx < static_cast(input_sched[input_idx].size()); ++sched_idx) { + schedule_input_tracker_t &sched = input_sched[input_idx][sched_idx]; + VPRINT(this, 4, "as-read: input=%d start=%" PRId64 " time=%" PRId64 "\n", + input_idx, sched.start_instruction, sched.timestamp); + time_tree[input_idx][sched.timestamp] = sched.start_instruction; + } + } + + // Finally, convert the requested time ranges into instr ordinal ranges. + for (const auto &tid_it : workload_tids) { + std::vector instr_ranges; + bool entire_tid = false; + for (const auto × : workload.times_of_interest) { + uint64_t instr_start = 0, instr_end = 0; + bool has_start = time_tree_lookup(time_tree[tid_it.second], + times.start_timestamp, instr_start); + bool has_end; + if (times.stop_timestamp == 0) + has_end = true; + else { + has_end = time_tree_lookup(time_tree[tid_it.second], times.stop_timestamp, + instr_end); + } + if (has_start && has_end && instr_start == instr_end) { + if (instr_start == 0 && instr_end == 0) { + entire_tid = true; + } else { + ++instr_end; + } + } + // If !has_start we'll include from 0. The start timestamp will make it be + // scheduled last but there will be no delay if no other thread is available. + // If !has_end, instr_end will still be 0 which means the end of the trace. + if (instr_start > 0 || instr_end > 0) { + if (!instr_ranges.empty() && + (instr_ranges.back().stop_instruction >= instr_start || + instr_ranges.back().stop_instruction == 0)) { + error_string_ = + "times_of_interest are too close together: " + "corresponding instruction ordinals are overlapping or adjacent"; + return STATUS_ERROR_INVALID_PARAMETER; + } + instr_ranges.emplace_back(instr_start, instr_end); + VPRINT(this, 2, + "tid %" PRIu64 " overlaps with times_of_interest [%" PRIu64 + ", %" PRIu64 ") @ [%" PRIu64 ", %" PRIu64 ")\n", + tid_it.first, times.start_timestamp, times.stop_timestamp, + instr_start, instr_end); + } + } + if (!entire_tid && instr_ranges.empty()) { + // Exclude this thread completely. We've already created its + // inputs_ entry with cross-indices stored in other structures + // so instead of trying to erase it we give it a max start point. + VPRINT(this, 2, + "tid %" PRIu64 " has no overlap with any times_of_interest entry\n", + tid_it.first); + instr_ranges.emplace_back(std::numeric_limits::max(), 0); + } + if (entire_tid) { + // No range is needed. + } else { + workload.thread_modifiers.emplace_back(instr_ranges); + workload.thread_modifiers.back().tids.emplace_back(tid_it.first); + } + } + return sched_type_t::STATUS_SUCCESS; +} + +template +bool +scheduler_tmpl_t::time_tree_lookup( + const std::map &tree, uint64_t time, uint64_t &ordinal) +{ + auto it = tree.upper_bound(time); + if (it == tree.begin() || it == tree.end()) { + // We do not have a timestamp in the footer, so we assume any time + // past the final known timestamp is too far and do not try to + // fit into the final post-last-timestamp sequence. + return false; + } + uint64_t upper_time = it->first; + uint64_t upper_ord = it->second; + it--; + uint64_t lower_time = it->first; + uint64_t lower_ord = it->second; + double fraction = (time - lower_time) / static_cast(upper_time - lower_time); + double interpolate = lower_ord + fraction * (upper_ord - lower_ord); + ordinal = static_cast(interpolate); + VPRINT(this, 3, + "time2ordinal: time %" PRIu64 " => times [%" PRIu64 ", %" PRIu64 + ") ords [%" PRIu64 ", %" PRIu64 ") => interpolated %" PRIu64 "\n", + time, lower_time, upper_time, lower_ord, upper_ord, ordinal); + return true; +} + +template +typename scheduler_tmpl_t::scheduler_status_t +scheduler_tmpl_t::read_traced_schedule( + std::vector> &input_sched, + std::vector> &start2stop, + std::vector> &all_sched, + std::vector &disk_ord2index, std::vector &disk_ord2cpuid) +{ + if (options_.replay_as_traced_istream == nullptr) { + error_string_ = "Missing as-traced istream"; + return STATUS_ERROR_INVALID_PARAMETER; + } + + schedule_entry_t entry(0, 0, 0, 0); + // See comment in read_recorded_schedule() on our assumption that we can + // easily fit the whole context switch sequence in memory. This cpu_schedule + // file has an entry per timestamp, though, even for consecutive ones on the same + // core, so it uses more memory. + // We do not have a subfile listing feature in archive_istream_t, but we can + // read sequentially as each record has a cpu field. + // This schedule_entry_t format doesn't have the stop instruction ordinal (as it was + // designed for skip targets only), so we take two passes to get that information. + // If we do find memory is an issue we could add a stop field to schedule_entry_t + // and collapse as we go, saving memory. + // We also need to translate the thread and cpu id values into 0-based ordinals. + std::unordered_map tid2input; + for (int i = 0; i < static_cast(inputs_.size()); ++i) { + tid2input[inputs_[i].tid] = i; + } + // We initially number the outputs according to their order in the file, and then + // sort by the stored cpuid below. + // XXX i#6726: Should we support some direction from the user on this? Simulation + // may want to preserve the NUMA relationships and may need to set up its simulated + // cores at init time, so it would prefer to partition by output stream identifier. + // Maybe we could at least add the proposed memtrace_stream_t query for cpuid and + // let it be called even before reading any records at all? + output_ordinal_t cur_output = 0; + uint64_t cur_cpu = std::numeric_limits::max(); + while (options_.replay_as_traced_istream->read(reinterpret_cast(&entry), + sizeof(entry))) { + if (entry.cpu != cur_cpu) { + // This is a zipfile component boundary: one conmponent per cpu. + if (cur_cpu != std::numeric_limits::max()) { + ++cur_output; + if (options_.mapping == MAP_TO_RECORDED_OUTPUT && !outputs_.empty() && + cur_output >= static_cast(outputs_.size())) { + error_string_ = "replay_as_traced_istream cpu count != output count"; + return STATUS_ERROR_INVALID_PARAMETER; + } + } + cur_cpu = entry.cpu; + disk_ord2cpuid.push_back(cur_cpu); + disk_ord2index.push_back(cur_output); + } + input_ordinal_t input = tid2input[entry.thread]; + // The caller must fill in the stop ordinal in a second pass. + uint64_t start = entry.start_instruction; + uint64_t timestamp = entry.timestamp; + // Some entries have no instructions (there is an entry for each timestamp, and + // a signal can come in after a prior timestamp with no intervening instrs). + if (all_sched.size() < static_cast(cur_output + 1)) + all_sched.resize(cur_output + 1); + if (!all_sched[cur_output].empty() && + input == all_sched[cur_output].back().input && + start == all_sched[cur_output].back().start_instruction) { + VPRINT(this, 3, + "Output #%d: as-read segment #%zu has no instructions: skipping\n", + cur_output, all_sched[cur_output].size() - 1); + continue; + } + all_sched[cur_output].emplace_back(true, input, start, timestamp); + start2stop[input].insert(start); + input_sched[input].emplace_back(cur_output, all_sched[cur_output].size() - 1, + start, timestamp); + } + sched_type_t::scheduler_status_t res = + check_and_fix_modulo_problem_in_schedule(input_sched, start2stop, all_sched); + if (res != sched_type_t::STATUS_SUCCESS) + return res; + return remove_zero_instruction_segments(input_sched, all_sched); +} + template typename scheduler_tmpl_t::scheduler_status_t scheduler_tmpl_t::remove_zero_instruction_segments( @@ -1888,7 +2047,7 @@ scheduler_tmpl_t::advance_region_of_interest( if (input.in_cur_region && cur_instr >= cur_range.start_instruction - 1) return sched_type_t::STATUS_OK; - VPRINT(this, 2, "skipping from %" PRId64 " to %" PRId64 " instrs for ROI\n", + VPRINT(this, 2, "skipping from %" PRIu64 " to %" PRIu64 " instrs for ROI\n", cur_instr, cur_range.start_instruction); if (options_.schedule_record_ostream != nullptr) { sched_type_t::stream_status_t status = close_schedule_segment(output, input); @@ -1943,10 +2102,17 @@ scheduler_tmpl_t::skip_instructions(output_ordinal_t out clear_input_queue(input); input.reader->skip_instructions(skip_amount); if (*input.reader == *input.reader_end) { - // Raise error because the input region is out of bounds. - VPRINT(this, 2, "skip_instructions: input=%d skip out of bounds\n", input.index); mark_input_eof(input); - return sched_type_t::STATUS_REGION_INVALID; + // Raise error because the input region is out of bounds, unless the max + // was used which we ourselves use internally for times_of_interest. + if (skip_amount >= std::numeric_limits::max() - 2) { + VPRINT(this, 2, "skip_instructions: input=%d skip to eof\n", input.index); + return sched_type_t::STATUS_SKIPPED; + } else { + VPRINT(this, 2, "skip_instructions: input=%d skip out of bounds\n", + input.index); + return sched_type_t::STATUS_REGION_INVALID; + } } input.in_cur_region = true; auto *stream = outputs_[output].stream; diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h index 5ca8207512f..afdb0ee1b53 100644 --- a/clients/drcachesim/scheduler/scheduler.h +++ b/clients/drcachesim/scheduler/scheduler.h @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -170,6 +171,27 @@ template class scheduler_tmpl_t { uint64_t stop_instruction; }; + /** + * A time range in units of the microsecond timestamps in the traces.. + */ + struct timestamp_range_t { + /** Convenience constructor. */ + timestamp_range_t(uint64_t start, uint64_t stop) + : start_timestamp(start) + , stop_timestamp(stop) + { + } + /** + * The starting time in the microsecond timstamp units in the trace. + */ + uint64_t start_timestamp; + /** + * The ending time in the microsecond timstamp units in the trace. + * The ending time is inclusive. 0 means the end of the trace. + */ + uint64_t stop_timestamp; + }; + /** * Specifies details about one set of input trace files from one workload, * each input file representing a single software thread. @@ -311,6 +333,34 @@ template class scheduler_tmpl_t { /** Scheduling modifiers for the threads in this workload. */ std::vector thread_modifiers; + /** + * If non-empty, all input records outside of these ranges are skipped. These + * times cut across all inputs of this workload. The times are converted into a + * separate sequence of instruction + * #dynamorio::drmemtrace::scheduler_tmpl_t::range_t for each input. The end + * result is as though the #dynamorio::drmemtrace::scheduler_tmpl_t:: + * input_thread_info_t.regions_of_interest + * field were set for each input. See the comments by that field for further + * details such as the marker inserted between ranges. Although the times cut + * across all inputs for determining the per-input instruction-ordinal ranges, no + * barrier is applied to align the resulting regions of interest: i.e., one input + * can finish its initial region and move to its next region before all other + * inputs finish their initial regions. + * + * If non-empty, the + * #dynamorio::drmemtrace::scheduler_tmpl_t:: + * input_thread_info_t.regions_of_interest + * field must be empty for each modifier for this workload. + * + * If non-empty, the #dynamorio::drmemtrace::scheduler_tmpl_t:: + * scheduler_options_t.replay_as_traced_istream field must also be specified to + * provide the timestamp-to-instruction-ordinal mappings. These mappings are not + * precise due to the coarse-grained timestamps and the elision of adjacent + * timestamps in the istream. Interpolation is used to estimate instruction + * ordinals when timestamps fall in between recorded points. + */ + std::vector times_of_interest; + // Work around a known Visual Studio issue where it complains about deleted copy // constructors for unique_ptr by deleting our copies and defaulting our moves. input_workload_t(const input_workload_t &) = delete; @@ -539,7 +589,10 @@ template class scheduler_tmpl_t { /** * Input stream for replaying the traced schedule when #MAP_TO_RECORDED_OUTPUT is * specified for more than one output stream (whose count must match the number - * of traced cores). + * of traced cores). Alternatively, if + * #dynamorio::drmemtrace::scheduler_tmpl_t::input_workload_t.times_of_interest + * is non-empty, this stream is required for obtaining the mappings between + * timestamps and instruction ordinals. */ archive_istream_t *replay_as_traced_istream = nullptr; /** @@ -1461,13 +1514,33 @@ template class scheduler_tmpl_t { skip_instructions(output_ordinal_t output, input_info_t &input, uint64_t skip_amount); scheduler_status_t - read_traced_schedule(); + read_and_instantiate_traced_schedule(); + + scheduler_status_t + create_regions_from_times(const std::unordered_map &workload_tids, + input_workload_t &workload); + + // Interval time-to-instr-ord tree lookup with interpolation. + bool + time_tree_lookup(const std::map &tree, uint64_t time, + uint64_t &ordinal); + + // Reads from the as-traced schedule file into the passed-in structures, after + // fixing up zero-instruction sequences and working around i#6107. + scheduler_status_t + read_traced_schedule(std::vector> &input_sched, + std::vector> &start2stop, + std::vector> &all_sched, + std::vector &disk_ord2index, + std::vector &disk_ord2cpuid); + // Helper for read_traced_schedule(). scheduler_status_t remove_zero_instruction_segments( std::vector> &input_sched, std::vector> &all_sched); + // Helper for read_traced_schedule(). scheduler_status_t check_and_fix_modulo_problem_in_schedule( std::vector> &input_sched, diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp index eeec09c073c..554715e42e4 100644 --- a/clients/drcachesim/tests/scheduler_unit_tests.cpp +++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp @@ -3555,6 +3555,163 @@ test_replay_as_traced_from_file(const char *testdir) #endif } +static void +test_times_of_interest() +{ +#ifdef HAS_ZIP + std::cerr << "\n----------------\nTesting times of interest\n"; + + static constexpr int NUM_INPUTS = 3; + static constexpr int NUM_OUTPUTS = 1; + static constexpr int NUM_TIMESTAMPS = 3; + static constexpr int NUM_INSTRS_PER_TIMESTAMP = 3; + static constexpr memref_tid_t TID_BASE = 100; + static constexpr addr_t PC_BASE = 42; + static constexpr int CPU0 = 6; + static constexpr int CPU1 = 9; + std::vector inputs[NUM_INPUTS]; + for (int i = 0; i < NUM_INPUTS; ++i) { + memref_tid_t tid = TID_BASE + i; + inputs[i].push_back(make_thread(tid)); + inputs[i].push_back(make_pid(1)); + for (int j = 0; j < NUM_TIMESTAMPS; ++j) { + uint64_t timestamp = i == 2 ? (1 + 5 * (j + 1)) : (10 * (j + 1) + 10 * i); + inputs[i].push_back(make_timestamp(timestamp)); + for (int k = 0; k < NUM_INSTRS_PER_TIMESTAMP; ++k) { + inputs[i].push_back(make_instr(PC_BASE + 1 /*1-based ranges*/ + + j * NUM_INSTRS_PER_TIMESTAMP + k)); + } + } + inputs[i].push_back(make_exit(tid)); + } + + // Synthesize a cpu-schedule file. + std::string cpu_fname = "tmp_test_times_of_interest.zip"; + { + std::vector sched0; + std::vector sched1; + // We do not bother to interleave to make it easier to see the sequence + // in this test. + // Thread A. + sched0.emplace_back(TID_BASE + 0, 10, CPU0, 0); + sched0.emplace_back(TID_BASE + 0, 20, CPU0, 4); + sched0.emplace_back(TID_BASE + 0, 30, CPU0, 7); + // Thread B. + sched0.emplace_back(TID_BASE + 1, 20, CPU0, 0); + sched0.emplace_back(TID_BASE + 1, 30, CPU0, 4); + sched0.emplace_back(TID_BASE + 1, 40, CPU0, 7); + // Thread C. + sched1.emplace_back(TID_BASE + 2, 6, CPU1, 0); + sched1.emplace_back(TID_BASE + 2, 11, CPU1, 4); + sched1.emplace_back(TID_BASE + 2, 16, CPU1, 7); + std::ostringstream cpu0_string; + cpu0_string << CPU0; + std::ostringstream cpu1_string; + cpu1_string << CPU1; + zipfile_ostream_t outfile(cpu_fname); + std::string err = outfile.open_new_component(cpu0_string.str()); + assert(err.empty()); + if (!outfile.write(reinterpret_cast(sched0.data()), + sched0.size() * sizeof(sched0[0]))) + assert(false); + err = outfile.open_new_component(cpu1_string.str()); + assert(err.empty()); + if (!outfile.write(reinterpret_cast(sched1.data()), + sched1.size() * sizeof(sched1[0]))) + assert(false); + } + + { + // Test an erroneous range request with no gap. + std::vector sched_inputs; + std::vector readers; + for (int i = 0; i < NUM_INPUTS; i++) { + memref_tid_t tid = TID_BASE + i; + readers.emplace_back( + std::unique_ptr(new mock_reader_t(inputs[i])), + std::unique_ptr(new mock_reader_t()), tid); + } + sched_inputs.emplace_back(std::move(readers)); + // Pick times that have adjacent corresponding instructions: 30 and 32 + // have a time gap but no instruction gap. + sched_inputs.back().times_of_interest = { { 25, 30 }, { 32, 33 } }; + scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_ANY_OUTPUT, + scheduler_t::DEPENDENCY_TIMESTAMPS, + scheduler_t::SCHEDULER_DEFAULTS, + /*verbosity=*/3); + zipfile_istream_t infile(cpu_fname); + sched_ops.replay_as_traced_istream = &infile; + scheduler_t scheduler; + assert(scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) == + scheduler_t::STATUS_ERROR_INVALID_PARAMETER); + } + { + // Test a valid range request. + std::vector sched_inputs; + std::vector readers; + for (int i = 0; i < NUM_INPUTS; i++) { + memref_tid_t tid = TID_BASE + i; + readers.emplace_back( + std::unique_ptr(new mock_reader_t(inputs[i])), + std::unique_ptr(new mock_reader_t()), tid); + } + sched_inputs.emplace_back(std::move(readers)); + sched_inputs.back().times_of_interest = { { 25, 30 }, { 38, 39 } }; + scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_ANY_OUTPUT, + scheduler_t::DEPENDENCY_TIMESTAMPS, + scheduler_t::SCHEDULER_DEFAULTS, + /*verbosity=*/3); + zipfile_istream_t infile(cpu_fname); + sched_ops.replay_as_traced_istream = &infile; + scheduler_t scheduler; + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != + scheduler_t::STATUS_SUCCESS) { + std::cerr << scheduler.get_error_string() << "\n"; + assert(false); + } + auto *stream0 = scheduler.get_stream(0); + auto check_next = [](scheduler_t::stream_t *stream, memref_tid_t expect_tid, + trace_type_t expect_type, addr_t expect_addr = 0) { + memref_t record; + scheduler_t::stream_status_t status = stream->next_record(record); + assert(status == scheduler_t::STATUS_OK); + assert(record.instr.tid == expect_tid); + if (record.instr.type != expect_type) { + std::cerr << "Expected type " << expect_type + << " != " << record.instr.type << "\n"; + assert(false); + } + if (expect_addr != 0 && record.instr.addr != expect_addr) { + std::cerr << "Expected addr " << expect_addr + << " != " << record.instr.addr << "\n"; + assert(false); + } + }; + // Range is 5 until the end. + check_next(stream0, TID_BASE + 0, TRACE_TYPE_INSTR, PC_BASE + 5); + check_next(stream0, TID_BASE + 0, TRACE_TYPE_INSTR, PC_BASE + 6); + check_next(stream0, TID_BASE + 0, TRACE_TYPE_MARKER); + check_next(stream0, TID_BASE + 0, TRACE_TYPE_INSTR, PC_BASE + 7); + check_next(stream0, TID_BASE + 0, TRACE_TYPE_INSTR, PC_BASE + 8); + check_next(stream0, TID_BASE + 0, TRACE_TYPE_INSTR, PC_BASE + 9); + check_next(stream0, TID_BASE + 0, TRACE_TYPE_THREAD_EXIT); + // Two ranges: 2-4 and 6-7. + check_next(stream0, TID_BASE + 1, TRACE_TYPE_INSTR, PC_BASE + 2); + check_next(stream0, TID_BASE + 1, TRACE_TYPE_INSTR, PC_BASE + 3); + check_next(stream0, TID_BASE + 1, TRACE_TYPE_MARKER); + check_next(stream0, TID_BASE + 1, TRACE_TYPE_INSTR, PC_BASE + 4); + // Window id marker in between. + check_next(stream0, TID_BASE + 1, TRACE_TYPE_MARKER); + check_next(stream0, TID_BASE + 1, TRACE_TYPE_INSTR, PC_BASE + 6); + check_next(stream0, TID_BASE + 1, TRACE_TYPE_MARKER); + check_next(stream0, TID_BASE + 1, TRACE_TYPE_INSTR, PC_BASE + 7); + check_next(stream0, TID_BASE + 1, TRACE_TYPE_THREAD_EXIT); + memref_t record; + assert(stream0->next_record(record) == scheduler_t::STATUS_EOF); + } +#endif +} + static void test_inactive() { @@ -4773,6 +4930,7 @@ test_main(int argc, const char *argv[]) test_replay_as_traced_i6107_workaround(); test_replay_as_traced_dup_start(); test_replay_as_traced_sort(); + test_times_of_interest(); test_inactive(); test_direct_switch(); test_unscheduled(); diff --git a/clients/drcachesim/tests/skip_to_timestamp.templatex b/clients/drcachesim/tests/skip_to_timestamp.templatex new file mode 100644 index 00000000000..f0f2f3d9b59 --- /dev/null +++ b/clients/drcachesim/tests/skip_to_timestamp.templatex @@ -0,0 +1,47 @@ +Basic counts tool results: +Total counts: + 46166 total \(fetched\) instructions + 4327 total unique \(fetched\) instructions + 46166 total userspace instructions + 0 total kernel instructions + 27090 total non-fetched instructions + 27090 total non-fetched userspace instructions + 0 total non-fetched kernel instructions + 0 total prefetches + 37530 total data loads + 38316 total data stores + 0 total icache flushes + 0 total dcache flushes + 2 total threads + 514 total timestamp \+ cpuid markers +.* +Thread 1257602 counts: + 30351 \(fetched\) instructions + 309 unique \(fetched\) instructions + 30351 userspace instructions + 0 kernel instructions + 27090 non-fetched instructions + 27090 non-fetched userspace instructions + 0 non-fetched kernel instructions + 0 prefetches + 33202 data loads + 35737 data stores + 0 icache flushes + 0 dcache flushes + 52 timestamp \+ cpuid markers +.* +Thread 1257596 counts: + 15815 \(fetched\) instructions + 4074 unique \(fetched\) instructions + 15815 userspace instructions + 0 kernel instructions + 0 non-fetched instructions + 0 non-fetched userspace instructions + 0 non-fetched kernel instructions + 0 prefetches + 4328 data loads + 2579 data stores + 0 icache flushes + 0 dcache flushes + 462 timestamp \+ cpuid markers +.* diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index b5f2e730d33..37e32fe61ef 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -3870,6 +3870,13 @@ if (BUILD_CLIENTS) "") set(tool.simulate_as_traced_rawtemp ON) # no preprocessor + torunonly_simtool(skip_to_timestamp ${ci_shared_app} + # We pick a timestamp far enough in that only 2 threads remain + # at that point, testing thread exclusion. + "-indir ${thread_trace_dir} -tool basic_counts -skip_to_timestamp 13331862029905366 -cpu_schedule_file ${cpu_sched_path}" + "") + set(tool.skip_to_timestamp_rawtemp ON) # no preprocessor + set(switch_file "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests/mock_switch_sequences.x64.zip") torunonly_simtool(switch_insertion ${ci_shared_app}