Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Profiler class for scheduler/block profiling #131

Merged
merged 18 commits into from
Aug 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bench/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ add_benchmark(bm_case1)
add_benchmark(bm_filter)
add_benchmark(bm_fft)
add_benchmark(bm_history_buffer)
add_benchmark(bm_profiler)
add_benchmark(bm_scheduler)

add_executable(bm_case1_nosimd bm_case1.cpp)
Expand Down
66 changes: 66 additions & 0 deletions bench/bm_profiler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#include "benchmark.hpp"

#include <profiler.hpp>

#include <iostream>

using namespace fair::graph::profiling;

inline constexpr std::size_t N_ITER = 7;
inline constexpr std::size_t N_SAMPLES = 1;

inline void
run_without_profiler() {
const auto start = detail::clock::now();

long long r = 0;
for (std::size_t i = 0; i < 1000; ++i) {
for (std::size_t j = 0; j < 1000; ++j) {
std::vector<int> v(10000);
std::iota(v.begin(), v.end(), 1);
r += std::accumulate(v.begin(), v.end(), 0);
}
}

const auto elapsed = detail::clock::now() - start;
fmt::print("The sum of sums is {} and it took {}ms\n", r, std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count());
}

template<Profiler P>
inline void
run_with_profiler(P &p) {
const auto start = detail::clock::now();
auto &handler = p.for_this_thread();

[[maybe_unused]] auto whole_calculation_event = handler.start_complete_event("whole_calculation");
long long r = 0;
for (std::size_t i = 0; i < 1000; ++i) {
auto async_event = handler.start_async_event("iteration", {}, { { "arg1", 2 }, { "arg2", "hello" } });
for (std::size_t j = 0; j < 1000; ++j) {
std::vector<int> v(10000);
std::iota(v.begin(), v.end(), 1);
r += std::accumulate(v.begin(), v.end(), 0);
async_event.step();
}
}

const auto elapsed = detail::clock::now() - start;
fmt::print("The sum of sums is {} and it took {}ms\n", r, std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count());
}

[[maybe_unused]] inline const boost::ut::suite profiler_tests = [] {
using namespace boost::ut;
using namespace benchmark;

profiler prof;
"default profiler"_benchmark.repeat<N_ITER>(N_SAMPLES) = [&p = prof] { run_with_profiler(p); };

null::profiler null_prof;
"null profiler"_benchmark.repeat<N_ITER>(N_SAMPLES) = [&p = null_prof] { run_with_profiler(p); };

"no profiler"_benchmark.repeat<N_ITER>(N_SAMPLES) = [] { run_without_profiler(); };
};

int
main() { /* not needed by the UT framework */
}
7 changes: 7 additions & 0 deletions bench/bm_scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <boost/ut.hpp>
#include <graph.hpp>
#include <profiler.hpp>
#include <scheduler.hpp>

#include "bm_test_helper.hpp"
Expand Down Expand Up @@ -109,6 +110,7 @@ exec_bm(auto &scheduler, const std::string &test_case) {
}

[[maybe_unused]] inline const boost::ut::suite scheduler_tests = [] {
using namespace fair::graph::profiling;
using namespace boost::ut;
using namespace benchmark;
using thread_pool = fair::thread_pool::BasicThreadPool;
Expand Down Expand Up @@ -139,6 +141,11 @@ exec_bm(auto &scheduler, const std::string &test_case) {

fg::scheduler::breadth_first<multi_threaded> sched4_mt(test_graph_bifurcated<float>(N_NODES), pool);
"bifurcated graph - BFS scheduler (multi-threaded)"_benchmark.repeat<N_ITER>(N_SAMPLES) = [&sched4_mt]() { exec_bm(sched4_mt, "bifurcated-graph BFS-sched (multi-threaded)"); };

fg::scheduler::breadth_first<multi_threaded, profiler> sched4_mt_prof(test_graph_bifurcated<float>(N_NODES), pool);
"bifurcated graph - BFS scheduler (multi-threaded) with profiling"_benchmark.repeat<N_ITER>(N_SAMPLES) = [&sched4_mt_prof]() {
exec_bm(sched4_mt_prof, "bifurcated-graph BFS-sched (multi-threaded) with profiling");
};
};

int
Expand Down
2 changes: 1 addition & 1 deletion include/circular_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ class circular_buffer
_parent(parent), _index(index), _n_slots_to_claim(n_slots_to_claim), _offset(sequence - static_cast<signed_index_type>(n_slots_to_claim)), _internal_span({ &_parent->_buffer->_data[_index], _n_slots_to_claim }) { }
ReservedOutputRange(const ReservedOutputRange&) = delete;
ReservedOutputRange& operator=(const ReservedOutputRange&) = delete;
explicit ReservedOutputRange(ReservedOutputRange&& other) noexcept
ReservedOutputRange(ReservedOutputRange&& other) noexcept
: _parent(std::exchange(other._parent, nullptr))
, _index(std::exchange(other._index, 0))
, _n_slots_to_claim(std::exchange(other._n_slots_to_claim, 0))
Expand Down
Loading