diff --git a/barretenberg/cpp/src/barretenberg/benchmark/ivc_bench/ivc.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/ivc_bench/ivc.bench.cpp index 5273eb14dc8..467e9be906e 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/ivc_bench/ivc.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/ivc_bench/ivc.bench.cpp @@ -128,9 +128,12 @@ BENCHMARK_DEFINE_F(IvcBench, Decide)(benchmark::State& state) { ClientIVC ivc; <<<<<<< HEAD +<<<<<<< HEAD ======= >>>>>>> master +======= +>>>>>>> 26118e61dc6183b907f5718a24d3dcfb1e036361 // Perform a specified number of iterations of function/kernel accumulation perform_ivc_accumulation_rounds(state, ivc); @@ -148,10 +151,7 @@ BENCHMARK_DEFINE_F(IvcBench, Decide)(benchmark::State& state) BENCHMARK_DEFINE_F(IvcBench, ECCVM)(benchmark::State& state) { ClientIVC ivc; -<<<<<<< HEAD -======= ->>>>>>> master // Perform a specified number of iterations of function/kernel accumulation perform_ivc_accumulation_rounds(state, ivc); diff --git a/barretenberg/cpp/src/barretenberg/commitment_schemes/ipa/ipa.hpp b/barretenberg/cpp/src/barretenberg/commitment_schemes/ipa/ipa.hpp index a0c7f735096..f89b42ca60a 100644 --- a/barretenberg/cpp/src/barretenberg/commitment_schemes/ipa/ipa.hpp +++ b/barretenberg/cpp/src/barretenberg/commitment_schemes/ipa/ipa.hpp @@ -235,6 +235,9 @@ template class IPA { // Compute G_zero // First construct s_vec std::vector s_vec(poly_degree); + // TODO(https://github.com/AztecProtocol/barretenberg/issues/857): This code is not efficient as its O(nlogn). + // This can be optimized to be linear by computing a tree of products. Its very readable, so we're + // leaving it unoptimized for now. run_loop_in_parallel_if_effective( poly_degree, [&s_vec, &round_challenges_inv, log_poly_degree](size_t start, size_t end) { diff --git a/barretenberg/cpp/src/barretenberg/polynomials/pow.bench.cpp b/barretenberg/cpp/src/barretenberg/polynomials/pow.bench.cpp new file mode 100644 index 00000000000..083ff70db5e --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/polynomials/pow.bench.cpp @@ -0,0 +1,27 @@ +#include "barretenberg/polynomials/pow.hpp" +#include "barretenberg/ecc/curves/bn254/fr.hpp" +#include + +using namespace benchmark; +using namespace bb; + +namespace { + +void compute_pow_poly(benchmark::State& state) +{ + // just set up huge vector + std::vector betas{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 }; + + for (auto _ : state) { + int64_t num_betas = state.range(0); + std::vector cur_betas(betas.begin(), betas.begin() + num_betas); + PowPolynomial pow{ cur_betas }; + pow.compute_values(); + } +} + +BENCHMARK(compute_pow_poly)->Unit(benchmark::kMillisecond)->Arg(20); + +} // namespace +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/polynomials/pow.hpp b/barretenberg/cpp/src/barretenberg/polynomials/pow.hpp index 9079724c98a..7dcc2fbddeb 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/pow.hpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/pow.hpp @@ -1,4 +1,6 @@ #pragma once +#include "barretenberg/common/compiler_hints.hpp" +#include "barretenberg/common/op_count.hpp" #include "barretenberg/common/thread.hpp" #include @@ -121,8 +123,9 @@ template struct PowPolynomial { * @brief Given \vec{β} = {β_0,...,β_{d-1}} compute pow_\vec{β}(i) for i=0,...,2^{d}-1 * */ - void compute_values() + BB_PROFILE void compute_values() { + BB_OP_COUNT_TIME(); size_t pow_size = 1 << betas.size(); pow_betas = std::vector(pow_size); @@ -136,6 +139,11 @@ template struct PowPolynomial { size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified num_threads = num_threads > 0 ? num_threads : 1; // ensure num threads is >= 1 size_t iterations_per_thread = pow_size / num_threads; // actual iterations per thread + + // TODO(https://github.com/AztecProtocol/barretenberg/issues/864): This computation is asymtotically slow as it + // does pow_size * log(pow_size) work. However, in practice, its super efficient because its trivially + // parallelizable and only takes 45ms for the whole 6 iter IVC benchmark. Its also very readable, so we're + // leaving it unoptimized for now. parallel_for(num_threads, [&](size_t thread_idx) { size_t start = thread_idx * iterations_per_thread; size_t end = (thread_idx + 1) * iterations_per_thread;