From 9c3045b2db23a55c91606dcaabe66b16de87731b Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 20 Oct 2023 17:26:24 +0000 Subject: [PATCH 01/25] Tinkering --- .../benchmark/honk_bench/main.simple.cpp | 40 ++++++++++++------- .../cpp/src/barretenberg/common/thread.cpp | 7 ++++ .../honk/pcs/zeromorph/zeromorph.hpp | 5 ++- .../barretenberg/polynomials/polynomial.cpp | 2 +- 4 files changed, 37 insertions(+), 17 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp index 84a2f3c8c88..ac695876812 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp @@ -7,6 +7,7 @@ #include #include "barretenberg/honk/composer/ultra_composer.hpp" +#include "barretenberg/plonk/composer/ultra_composer.hpp" #include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp" #include "barretenberg/proof_system/types/circuit_type.hpp" #include "barretenberg/stdlib/encryption/ecdsa/ecdsa.hpp" @@ -24,43 +25,54 @@ using namespace proof_system; -template void generate_sha256_test_circuit(Builder& builder, size_t num_iterations) +template void generate_keccak_test_circuit(Builder& builder, size_t num_iterations) { - std::string in; - in.resize(32); - plonk::stdlib::packed_byte_array input(&builder, in); + std::string in = "abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz01"; + + proof_system::plonk::stdlib::byte_array input(&builder, in); for (size_t i = 0; i < num_iterations; i++) { - input = plonk::stdlib::sha256(input); + input = proof_system::plonk::stdlib::keccak::hash(input); } } -BBERG_INSTRUMENT BBERG_NOINLINE void sumcheck_profiling(honk::UltraProver& ext_prover) +BBERG_INSTRUMENT BBERG_NOINLINE void prover_profiling(auto& ext_prover) { - ext_prover.construct_proof(); - for (size_t i = 0; i < 200; i++) { - // Bench sumcheck - ext_prover.execute_relation_check_rounds(); + for (size_t i = 0; i < 1; i++) { + ext_prover.construct_proof(); } } /** * @brief Benchmark: Construction of a Ultra Honk proof for a circuit determined by the provided circuit function */ -void construct_proof_ultra() noexcept +void construct_honk_proof_ultra() noexcept { barretenberg::srs::init_crs_factory("../srs_db/ignition"); // Constuct circuit and prover; don't include this part in measurement honk::UltraComposer::CircuitBuilder builder; - generate_sha256_test_circuit(builder, 1); + generate_keccak_test_circuit(builder, 1); std::cout << "gates: " << builder.get_total_circuit_size() << std::endl; honk::UltraComposer composer; std::shared_ptr instance = composer.create_instance(builder); honk::UltraProver ext_prover = composer.create_prover(instance); - sumcheck_profiling(ext_prover); + prover_profiling(ext_prover); } +void construct_plonk_proof_ultra() noexcept +{ + barretenberg::srs::init_crs_factory("../srs_db/ignition"); + // Constuct circuit and prover; don't include this part in measurement + for (int i = 0; i < 10; i++) { + plonk::UltraComposer::CircuitBuilder builder; + generate_keccak_test_circuit(builder, 1); + plonk::UltraComposer composer; + plonk::UltraProver ext_prover = composer.create_prover(builder); + prover_profiling(ext_prover); + } +} int main() { - construct_proof_ultra(); + // construct_honk_proof_ultra(); + construct_plonk_proof_ultra(); } diff --git a/barretenberg/cpp/src/barretenberg/common/thread.cpp b/barretenberg/cpp/src/barretenberg/common/thread.cpp index 04fdbe18454..47db8bd3ba4 100644 --- a/barretenberg/cpp/src/barretenberg/common/thread.cpp +++ b/barretenberg/cpp/src/barretenberg/common/thread.cpp @@ -86,3 +86,10 @@ void parallel_for(size_t num_iterations, const std::function& func #endif #endif } + +inline void parallel_foreach(const auto& container, const auto&... containers, auto&& func) +{ + // We expect containers of the same size + (ASSERT(containers.size() == container.size()), ...); + parallel_for(container.size(), [&](size_t i) { func(containers[i]...); }); +} \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp b/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp index ed3beccf79e..b1a09c7c376 100644 --- a/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp +++ b/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp @@ -1,4 +1,5 @@ #pragma once +#include "barretenberg/common/thread.hpp" #include "barretenberg/polynomials/polynomial.hpp" namespace proof_system::honk::pcs::zeromorph { @@ -69,7 +70,7 @@ template class ZeroMorphProver_ { } // Compute the q_k in reverse order, i.e. q_{n-1}, ..., q_0 - for (size_t k = 0; k < log_N; ++k) { + parallel_for(log_N, [&](size_t k) { // Define partial evaluation point u' = (u_k, ..., u_{n-1}) auto evaluation_point_size = static_cast(k + 1); std::vector u_partial(u_challenge.end() - evaluation_point_size, u_challenge.end()); @@ -88,7 +89,7 @@ template class ZeroMorphProver_ { q_k -= f_1; quotients[log_N - k - 1] = q_k; - } + }); return quotients; } diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index e3f94e9c872..7e9ba3bab65 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -453,7 +453,7 @@ template Polynomial Polynomial::partial_evaluate_mle(std:: } // Construct resulting polynomial g(X_0,…,X_{n-m-1})) = p(X_0,…,X_{n-m-1},u_0,...u_{m-1}) from buffer - auto result = Polynomial(n_l); + Polynomial result{ n_l }; for (size_t idx = 0; idx < n_l; ++idx) { result[idx] = tmp[idx]; } From 69155825b39e65ae1c583c392834db1f8de1b625 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 20 Oct 2023 17:53:02 +0000 Subject: [PATCH 02/25] mesing --- barretenberg/cpp/CMakePresets.json | 17 ++++++++++++++ .../benchmark/honk_bench/main.simple.cpp | 23 +++++++++++-------- .../honk/pcs/zeromorph/zeromorph.hpp | 16 ++++++------- .../barretenberg/polynomials/polynomial.cpp | 2 +- 4 files changed, 39 insertions(+), 19 deletions(-) diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index c54e0b3419e..3a460d20af3 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -54,6 +54,18 @@ "DISABLE_ASM": "ON" } }, + { + "name": "tsan", + "displayName": "Debugging build with address sanitizer on Clang-16", + "description": "Build with address sanitizer on clang16 with debugging information", + "inherits": "clang16-dbg", + "binaryDir": "build-tsan", + "environment": { + "CFLAGS": "-fsanitize=thread", + "CXXFLAGS": "-fsanitize=thread", + "LDFLAGS": "-fsanitize=thread" + } + }, { "name": "asan", "displayName": "Debugging build with address sanitizer on Clang-16", @@ -246,6 +258,11 @@ "inherits": "default", "configurePreset": "asan" }, + { + "name": "tsan", + "inherits": "default", + "configurePreset": "tsan" + }, { "name": "gcc", "inherits": "default", diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp index ac695876812..b5840959957 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp @@ -49,30 +49,33 @@ void construct_honk_proof_ultra() noexcept { barretenberg::srs::init_crs_factory("../srs_db/ignition"); // Constuct circuit and prover; don't include this part in measurement - honk::UltraComposer::CircuitBuilder builder; - generate_keccak_test_circuit(builder, 1); - std::cout << "gates: " << builder.get_total_circuit_size() << std::endl; - honk::UltraComposer composer; - std::shared_ptr instance = composer.create_instance(builder); - honk::UltraProver ext_prover = composer.create_prover(instance); - prover_profiling(ext_prover); + for (int i = 0; i < 10; i++) { + honk::UltraComposer::CircuitBuilder builder; + generate_keccak_test_circuit(builder, 1); + std::cout << "gates: " << builder.get_total_circuit_size() << std::endl; + honk::UltraComposer composer; + std::shared_ptr instance = composer.create_instance(builder); + std::cout << "gates: " << builder.get_total_circuit_size() << std::endl; + honk::UltraProver ext_prover = composer.create_prover(instance); + prover_profiling(ext_prover); + } } void construct_plonk_proof_ultra() noexcept { barretenberg::srs::init_crs_factory("../srs_db/ignition"); - // Constuct circuit and prover; don't include this part in measurement for (int i = 0; i < 10; i++) { plonk::UltraComposer::CircuitBuilder builder; generate_keccak_test_circuit(builder, 1); plonk::UltraComposer composer; plonk::UltraProver ext_prover = composer.create_prover(builder); + std::cout << "gates: " << builder.get_total_circuit_size() << std::endl; prover_profiling(ext_prover); } } int main() { - // construct_honk_proof_ultra(); - construct_plonk_proof_ultra(); + construct_honk_proof_ultra(); + // construct_plonk_proof_ultra(); } diff --git a/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp b/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp index b1a09c7c376..c769becbe79 100644 --- a/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp +++ b/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp @@ -78,17 +78,17 @@ template class ZeroMorphProver_ { // Compute f' = f(X_0,...,X_{k-1}, u') auto f_1 = polynomial.partial_evaluate_mle(u_partial); - // Increment first element to get altered partial evaluation point u'' = (u_k + 1, u_{k+1}, ..., u_{n-1}) - u_partial[0] += 1; + // // Increment first element to get altered partial evaluation point u'' = (u_k + 1, u_{k+1}, ..., u_{n-1}) + // u_partial[0] += 1; - // Compute f'' = f(X_0,...,X_{k-1}, u'') - auto f_2 = polynomial.partial_evaluate_mle(u_partial); + // // Compute f'' = f(X_0,...,X_{k-1}, u'') + // auto f_2 = polynomial.partial_evaluate_mle(u_partial); - // Compute q_k = f''(X_0,...,X_{k-1}) - f'(X_0,...,X_{k-1}) - auto q_k = f_2; - q_k -= f_1; + // // Compute q_k = f''(X_0,...,X_{k-1}) - f'(X_0,...,X_{k-1}) + // auto q_k = f_2; + // q_k -= f_1; - quotients[log_N - k - 1] = q_k; + // quotients[log_N - k - 1] = q_k; }); return quotients; diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index 7e9ba3bab65..48260c63800 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -453,7 +453,7 @@ template Polynomial Polynomial::partial_evaluate_mle(std:: } // Construct resulting polynomial g(X_0,…,X_{n-m-1})) = p(X_0,…,X_{n-m-1},u_0,...u_{m-1}) from buffer - Polynomial result{ n_l }; + Polynomial result = Polynomial(n_l); for (size_t idx = 0; idx < n_l; ++idx) { result[idx] = tmp[idx]; } From bc2981bac213c1ec803c00ff1fb85ec2c6c77144 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 20 Oct 2023 18:18:44 +0000 Subject: [PATCH 03/25] add parallel_for_batched --- .../cpp/src/barretenberg/common/thread.cpp | 22 +++++++++++++++---- .../barretenberg/polynomials/polynomial.cpp | 15 ++++--------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/common/thread.cpp b/barretenberg/cpp/src/barretenberg/common/thread.cpp index 47db8bd3ba4..1b916ed1fb8 100644 --- a/barretenberg/cpp/src/barretenberg/common/thread.cpp +++ b/barretenberg/cpp/src/barretenberg/common/thread.cpp @@ -87,9 +87,23 @@ void parallel_for(size_t num_iterations, const std::function& func #endif } -inline void parallel_foreach(const auto& container, const auto&... containers, auto&& func) +/** + * A modified parallel_for optimized for work being done in batches. + * This is more appropriate for work with small granularity, to avoid thread caching issues and overhead. + */ +inline void parallel_for_batched(size_t num_iterations, auto&& func) { - // We expect containers of the same size - (ASSERT(containers.size() == container.size()), ...); - parallel_for(container.size(), [&](size_t i) { func(containers[i]...); }); + size_t num_threads = get_num_cpus_pow2(); + size_t batch_size = (num_iterations + num_threads - 1) / num_threads; // round up division + + // We will use parallel_for to dispatch the batches + parallel_for(num_threads, [=](size_t thread_idx) { + // Calculate start and end for this batch + size_t start = thread_idx * batch_size; + size_t end = std::min(start + batch_size, num_iterations); + + for (size_t i = start; i < end; ++i) { + func(i); // Pass both the current index and thread_idx + } + }); } \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index 48260c63800..86c0364eeb1 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -433,31 +433,24 @@ template Polynomial Polynomial::partial_evaluate_mle(std:: size_t n_l = 1 << (n - 1); // Temporary buffer of half the size of the polynomial - pointer tmp_ptr = allocate_aligned_memory(sizeof(Fr) * n_l); - auto tmp = tmp_ptr.get(); - + Polynomial result{ n_l }; Fr* prev = coefficients_.get(); // Evaluate variable X_{n-1} at u_{m-1} Fr u_l = evaluation_points[m - 1]; for (size_t i = 0; i < n_l; ++i) { - tmp[i] = prev[i] + u_l * (prev[i + n_l] - prev[i]); + result[i] = prev[i] + u_l * (prev[i + n_l] - prev[i]); } // Evaluate m-1 variables X_{n-l-1}, ..., X_{n-2} at m-1 remaining values u_0,...,u_{m-2}) for (size_t l = 1; l < m; ++l) { n_l = 1 << (n - l - 1); u_l = evaluation_points[m - l - 1]; for (size_t i = 0; i < n_l; ++i) { - tmp[i] = tmp[i] + u_l * (tmp[i + n_l] - tmp[i]); + result[i] += u_l * (result[i + n_l] - result[i]); } } - // Construct resulting polynomial g(X_0,…,X_{n-m-1})) = p(X_0,…,X_{n-m-1},u_0,...u_{m-1}) from buffer - Polynomial result = Polynomial(n_l); - for (size_t idx = 0; idx < n_l; ++idx) { - result[idx] = tmp[idx]; - } - + // Return resulting polynomial g(X_0,…,X_{n-m-1})) = p(X_0,…,X_{n-m-1},u_0,...u_{m-1}) return result; } From 887b346bc613572518b7904875e0c16bf959bdbc Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 20 Oct 2023 18:55:07 +0000 Subject: [PATCH 04/25] tinkering --- .../cpp/src/barretenberg/common/thread.cpp | 21 ----------------- .../cpp/src/barretenberg/common/thread.hpp | 23 ++++++++++++++++++- .../honk/pcs/zeromorph/zeromorph.hpp | 21 ++++++++--------- .../barretenberg/polynomials/polynomial.cpp | 8 ++++--- 4 files changed, 37 insertions(+), 36 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/common/thread.cpp b/barretenberg/cpp/src/barretenberg/common/thread.cpp index 1b916ed1fb8..04a71f5746b 100644 --- a/barretenberg/cpp/src/barretenberg/common/thread.cpp +++ b/barretenberg/cpp/src/barretenberg/common/thread.cpp @@ -85,25 +85,4 @@ void parallel_for(size_t num_iterations, const std::function& func // parallel_for_queued(num_iterations, func); #endif #endif -} - -/** - * A modified parallel_for optimized for work being done in batches. - * This is more appropriate for work with small granularity, to avoid thread caching issues and overhead. - */ -inline void parallel_for_batched(size_t num_iterations, auto&& func) -{ - size_t num_threads = get_num_cpus_pow2(); - size_t batch_size = (num_iterations + num_threads - 1) / num_threads; // round up division - - // We will use parallel_for to dispatch the batches - parallel_for(num_threads, [=](size_t thread_idx) { - // Calculate start and end for this batch - size_t start = thread_idx * batch_size; - size_t end = std::min(start + batch_size, num_iterations); - - for (size_t i = start; i < end; ++i) { - func(i); // Pass both the current index and thread_idx - } - }); } \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/common/thread.hpp b/barretenberg/cpp/src/barretenberg/common/thread.hpp index c28e8595123..45f11037c7a 100644 --- a/barretenberg/cpp/src/barretenberg/common/thread.hpp +++ b/barretenberg/cpp/src/barretenberg/common/thread.hpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -21,4 +22,24 @@ inline size_t get_num_cpus_pow2() return static_cast(1ULL << numeric::get_msb(get_num_cpus())); } -void parallel_for(size_t num_iterations, const std::function& func); \ No newline at end of file +void parallel_for(size_t num_iterations, const std::function& func); + +/** + * A modified parallel_for optimized for work being done in batches. + * This is more appropriate for work with small granularity, to avoid thread caching issues and overhead. + */ +inline void parallel_for_batched(size_t num_iterations, auto&& func) +{ + size_t num_threads = get_num_cpus_pow2(); + size_t batch_size = (num_iterations + num_threads - 1) / num_threads; // round up division + // We will use parallel_for to dispatch the batches + parallel_for(num_threads, [&](size_t thread_idx) { + // Calculate start and end for this batch + size_t start = thread_idx * batch_size; + size_t end = std::min(start + batch_size, num_iterations); + + for (size_t i = start; i < end; ++i) { + func(i); + } + }); +} \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp b/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp index c769becbe79..ed3beccf79e 100644 --- a/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp +++ b/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp @@ -1,5 +1,4 @@ #pragma once -#include "barretenberg/common/thread.hpp" #include "barretenberg/polynomials/polynomial.hpp" namespace proof_system::honk::pcs::zeromorph { @@ -70,7 +69,7 @@ template class ZeroMorphProver_ { } // Compute the q_k in reverse order, i.e. q_{n-1}, ..., q_0 - parallel_for(log_N, [&](size_t k) { + for (size_t k = 0; k < log_N; ++k) { // Define partial evaluation point u' = (u_k, ..., u_{n-1}) auto evaluation_point_size = static_cast(k + 1); std::vector u_partial(u_challenge.end() - evaluation_point_size, u_challenge.end()); @@ -78,18 +77,18 @@ template class ZeroMorphProver_ { // Compute f' = f(X_0,...,X_{k-1}, u') auto f_1 = polynomial.partial_evaluate_mle(u_partial); - // // Increment first element to get altered partial evaluation point u'' = (u_k + 1, u_{k+1}, ..., u_{n-1}) - // u_partial[0] += 1; + // Increment first element to get altered partial evaluation point u'' = (u_k + 1, u_{k+1}, ..., u_{n-1}) + u_partial[0] += 1; - // // Compute f'' = f(X_0,...,X_{k-1}, u'') - // auto f_2 = polynomial.partial_evaluate_mle(u_partial); + // Compute f'' = f(X_0,...,X_{k-1}, u'') + auto f_2 = polynomial.partial_evaluate_mle(u_partial); - // // Compute q_k = f''(X_0,...,X_{k-1}) - f'(X_0,...,X_{k-1}) - // auto q_k = f_2; - // q_k -= f_1; + // Compute q_k = f''(X_0,...,X_{k-1}) - f'(X_0,...,X_{k-1}) + auto q_k = f_2; + q_k -= f_1; - // quotients[log_N - k - 1] = q_k; - }); + quotients[log_N - k - 1] = q_k; + } return quotients; } diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index 86c0364eeb1..cbef7b6f7a2 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -438,9 +438,11 @@ template Polynomial Polynomial::partial_evaluate_mle(std:: // Evaluate variable X_{n-1} at u_{m-1} Fr u_l = evaluation_points[m - 1]; - for (size_t i = 0; i < n_l; ++i) { - result[i] = prev[i] + u_l * (prev[i + n_l] - prev[i]); - } + + parallel_for_batched(n_l, [&](size_t i) { result[i] = prev[i] + u_l * (prev[i + n_l] - prev[i]); }); + // for (size_t i = 0; i < n_l; ++i) { + // result[i] = prev[i] + u_l * (prev[i + n_l] - prev[i]); + // } // Evaluate m-1 variables X_{n-l-1}, ..., X_{n-2} at m-1 remaining values u_0,...,u_{m-2}) for (size_t l = 1; l < m; ++l) { n_l = 1 << (n - l - 1); From 889dc1faece3de68deded6a85cd4fa93729fa5f3 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 20 Oct 2023 20:36:27 +0000 Subject: [PATCH 05/25] Zero morph cleanup --- .../barretenberg/polynomials/polynomial.cpp | 40 ++++++++++++++----- .../barretenberg/polynomials/polynomial.hpp | 8 +++- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index cbef7b6f7a2..6721c5a6684 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -19,9 +19,9 @@ namespace barretenberg { * Constructors / Destructors **/ template -Polynomial::Polynomial(const size_t size_) +Polynomial::Polynomial(size_t initial_size) : coefficients_(nullptr) - , size_(size_) + , size_(initial_size) { if (capacity() > 0) { coefficients_ = allocate_aligned_memory(sizeof(Fr) * capacity()); @@ -29,6 +29,18 @@ Polynomial::Polynomial(const size_t size_) memset(static_cast(coefficients_.get()), 0, sizeof(Fr) * capacity()); } +template +Polynomial::Polynomial(size_t initial_size, DontZeroMemory flag) + : coefficients_(nullptr) + , size_(initial_size) +{ + // Flag is unused, but we don't memset 0 if passed. + (void)flag; + if (capacity() > 0) { + coefficients_ = allocate_aligned_memory(sizeof(Fr) * capacity()); + } +} + template Polynomial::Polynomial(const Polynomial& other) : Polynomial(other, other.size()) @@ -433,26 +445,32 @@ template Polynomial Polynomial::partial_evaluate_mle(std:: size_t n_l = 1 << (n - 1); // Temporary buffer of half the size of the polynomial - Polynomial result{ n_l }; + Polynomial tmp(n_l, DontZeroMemory::FLAG); + Fr* prev = coefficients_.get(); // Evaluate variable X_{n-1} at u_{m-1} Fr u_l = evaluation_points[m - 1]; - parallel_for_batched(n_l, [&](size_t i) { result[i] = prev[i] + u_l * (prev[i + n_l] - prev[i]); }); - // for (size_t i = 0; i < n_l; ++i) { - // result[i] = prev[i] + u_l * (prev[i + n_l] - prev[i]); - // } + for (size_t i = 0; i < n_l; ++i) { + tmp[i] = prev[i] + u_l * (prev[i + n_l] - prev[i]); + } // Evaluate m-1 variables X_{n-l-1}, ..., X_{n-2} at m-1 remaining values u_0,...,u_{m-2}) for (size_t l = 1; l < m; ++l) { - n_l = 1 << (n - l - 1); + size_t new_n_l = 1 << (n - l - 1); u_l = evaluation_points[m - l - 1]; - for (size_t i = 0; i < n_l; ++i) { - result[i] += u_l * (result[i + n_l] - result[i]); + for (size_t i = 0; i < new_n_l; ++i) { + tmp[i] = tmp[i] + u_l * (tmp[i + new_n_l] - tmp[i]); } } - // Return resulting polynomial g(X_0,…,X_{n-m-1})) = p(X_0,…,X_{n-m-1},u_0,...u_{m-1}) + size_t final_n_l = 1 << (n - m); + // Construct resulting polynomial g(X_0,…,X_{n-m-1})) = p(X_0,…,X_{n-m-1},u_0,...u_{m-1}) from buffer + Polynomial result(final_n_l, DontZeroMemory::FLAG); + for (size_t idx = 0; idx < final_n_l; ++idx) { + result[idx] = tmp[idx]; + } + return result; } diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp index b0ebaae33f6..63caded798a 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp @@ -7,6 +7,8 @@ #include namespace barretenberg { +enum class DontZeroMemory { FLAG }; + template class Polynomial { public: /** @@ -21,9 +23,11 @@ template class Polynomial { using const_iterator = Fr const*; using FF = Fr; - Polynomial(const size_t initial_size); + Polynomial(size_t initial_size); + // Constructor that does not initialize values, use with caution to save time. + Polynomial(size_t initial_size, DontZeroMemory flag); Polynomial(const Polynomial& other); - Polynomial(const Polynomial& other, const size_t target_size); + Polynomial(const Polynomial& other, size_t target_size); Polynomial(Polynomial&& other) noexcept; From e930a31ca35d3901b1deb09f5d9cf8c55cd3980b Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 20 Oct 2023 20:38:53 +0000 Subject: [PATCH 06/25] batch 1 loop --- .../cpp/src/barretenberg/polynomials/polynomial.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index 6721c5a6684..7a296dd89ef 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -445,22 +445,23 @@ template Polynomial Polynomial::partial_evaluate_mle(std:: size_t n_l = 1 << (n - 1); // Temporary buffer of half the size of the polynomial - Polynomial tmp(n_l, DontZeroMemory::FLAG); + Polynomial intermediate(n_l, DontZeroMemory::FLAG); Fr* prev = coefficients_.get(); // Evaluate variable X_{n-1} at u_{m-1} Fr u_l = evaluation_points[m - 1]; - for (size_t i = 0; i < n_l; ++i) { - tmp[i] = prev[i] + u_l * (prev[i + n_l] - prev[i]); - } + parallel_for_batched(n_l, [&](size_t i) { + // Initiate our intermediate results + intermediate[i] = prev[i] + u_l * (prev[i + n_l] - prev[i]); + }); // Evaluate m-1 variables X_{n-l-1}, ..., X_{n-2} at m-1 remaining values u_0,...,u_{m-2}) for (size_t l = 1; l < m; ++l) { size_t new_n_l = 1 << (n - l - 1); u_l = evaluation_points[m - l - 1]; for (size_t i = 0; i < new_n_l; ++i) { - tmp[i] = tmp[i] + u_l * (tmp[i + new_n_l] - tmp[i]); + intermediate[i] += u_l * (intermediate[i + new_n_l] - intermediate[i]); } } @@ -468,7 +469,7 @@ template Polynomial Polynomial::partial_evaluate_mle(std:: // Construct resulting polynomial g(X_0,…,X_{n-m-1})) = p(X_0,…,X_{n-m-1},u_0,...u_{m-1}) from buffer Polynomial result(final_n_l, DontZeroMemory::FLAG); for (size_t idx = 0; idx < final_n_l; ++idx) { - result[idx] = tmp[idx]; + result[idx] = intermediate[idx]; } return result; From fbd0ef4a6bf6be2e58da7d4fb639f7549854bfda Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 20 Oct 2023 20:42:49 +0000 Subject: [PATCH 07/25] batch 1 loop --- .../src/barretenberg/polynomials/polynomial.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index 7a296dd89ef..394b0fd4eda 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -447,22 +447,21 @@ template Polynomial Polynomial::partial_evaluate_mle(std:: // Temporary buffer of half the size of the polynomial Polynomial intermediate(n_l, DontZeroMemory::FLAG); - Fr* prev = coefficients_.get(); - // Evaluate variable X_{n-1} at u_{m-1} Fr u_l = evaluation_points[m - 1]; parallel_for_batched(n_l, [&](size_t i) { - // Initiate our intermediate results - intermediate[i] = prev[i] + u_l * (prev[i + n_l] - prev[i]); + // Initiate our intermediate results using this polynomial. + intermediate[i] = at(i) + u_l * (at(i + n_l) - at(i)); }); // Evaluate m-1 variables X_{n-l-1}, ..., X_{n-2} at m-1 remaining values u_0,...,u_{m-2}) for (size_t l = 1; l < m; ++l) { size_t new_n_l = 1 << (n - l - 1); - u_l = evaluation_points[m - l - 1]; - for (size_t i = 0; i < new_n_l; ++i) { - intermediate[i] += u_l * (intermediate[i + new_n_l] - intermediate[i]); - } + Fr new_u_l = evaluation_points[m - l - 1]; + parallel_for_batched(new_n_l, [&](size_t i) { + // Iterate on increasingly small portions of intermediate results. + intermediate[i] += new_u_l * (intermediate[i + new_n_l] - intermediate[i]); + }); } size_t final_n_l = 1 << (n - m); From 6f1a26db4606f1c4b6285ffe21c8315fb20579fd Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 20 Oct 2023 21:17:35 +0000 Subject: [PATCH 08/25] Update --- barretenberg/cpp/src/barretenberg/common/thread.hpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/barretenberg/cpp/src/barretenberg/common/thread.hpp b/barretenberg/cpp/src/barretenberg/common/thread.hpp index 45f11037c7a..81477234f1b 100644 --- a/barretenberg/cpp/src/barretenberg/common/thread.hpp +++ b/barretenberg/cpp/src/barretenberg/common/thread.hpp @@ -28,8 +28,15 @@ void parallel_for(size_t num_iterations, const std::function& func * A modified parallel_for optimized for work being done in batches. * This is more appropriate for work with small granularity, to avoid thread caching issues and overhead. */ -inline void parallel_for_batched(size_t num_iterations, auto&& func) +inline void parallel_for_batched(size_t num_iterations, auto&& func, size_t min_num_iterations = 8000) { + if (num_iterations <= min_num_iterations) { + // Don't bother with overhead of splitting into threads if small + for (size_t i = 0; i < num_iterations; i++) { + func(i); + } + return; + } size_t num_threads = get_num_cpus_pow2(); size_t batch_size = (num_iterations + num_threads - 1) / num_threads; // round up division // We will use parallel_for to dispatch the batches From 68f7bd2b7b052e0b8214bbeac6cdeea98b9d88f2 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 20 Oct 2023 21:54:37 +0000 Subject: [PATCH 09/25] Benchmark zeromorph --- .../benchmark/honk_bench/CMakeLists.txt | 1 + .../honk_bench/benchmark_utilities.hpp | 49 ++++++++++++------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt index fad8d9f141d..80167f93b55 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt @@ -2,6 +2,7 @@ set(BENCHMARK_SOURCES standard_plonk.bench.cpp ultra_honk.bench.cpp + ultra_plonk_passes.bench.cpp ultra_plonk.bench.cpp ) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp index 242aa86764d..4c9b8bb2a39 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp @@ -1,7 +1,10 @@ #pragma once #include +#include #include "barretenberg/honk/composer/ultra_composer.hpp" +#include "barretenberg/honk/proof_system/ultra_prover.hpp" +#include "barretenberg/plonk/composer/ultra_composer.hpp" #include "barretenberg/proof_system/types/circuit_type.hpp" #include "barretenberg/stdlib/encryption/ecdsa/ecdsa.hpp" #include "barretenberg/stdlib/hash/keccak/keccak.hpp" @@ -203,6 +206,26 @@ void construct_proof_with_specified_num_gates(State& state, } } +inline proof_system::honk::UltraProver get_prover( + proof_system::honk::UltraComposer& composer, + void (*test_circuit_function)(proof_system::honk::UltraComposer::CircuitBuilder&, size_t), + size_t num_iterations) +{ + proof_system::honk::UltraComposer::CircuitBuilder builder; + std::shared_ptr instance = composer.create_instance(builder); + test_circuit_function(builder, num_iterations); + return composer.create_prover(instance); +} + +inline proof_system::plonk::UltraProver get_prover( + proof_system::plonk::UltraComposer& composer, + void (*test_circuit_function)(proof_system::honk::UltraComposer::CircuitBuilder&, size_t), + size_t num_iterations) +{ + proof_system::plonk::UltraComposer::CircuitBuilder builder; + test_circuit_function(builder, num_iterations); + return composer.create_prover(builder); +} /** * @brief Performs proof constuction for benchmarks based on a provided circuit function * @@ -219,29 +242,17 @@ void construct_proof_with_specified_num_iterations(State& state, size_t)) noexcept { barretenberg::srs::init_crs_factory("../srs_db/ignition"); - auto num_iterations = static_cast(state.range(0)); + + Composer composer; + for (auto _ : state) { // Constuct circuit and prover; don't include this part in measurement state.PauseTiming(); - auto builder = typename Composer::CircuitBuilder(); - test_circuit_function(builder, num_iterations); - - auto composer = Composer(); - if constexpr (proof_system::IsAnyOf) { - auto instance = composer.create_instance(builder); - auto ext_prover = composer.create_prover(instance); - state.ResumeTiming(); - - // Construct proof - auto proof = ext_prover.construct_proof(); - - } else { - auto ext_prover = composer.create_prover(builder); - state.ResumeTiming(); + auto prover = get_prover(composer, test_circuit_function, state.range(0)); + state.ResumeTiming(); - // Construct proof - auto proof = ext_prover.construct_proof(); - } + // Construct proof + auto proof = prover.construct_proof(); } } From bedd0b33841b78d5476c4092e22f86c7d0ea3484 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 20 Oct 2023 21:59:26 +0000 Subject: [PATCH 10/25] Benchmark zeromorph --- .../cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt index 80167f93b55..c2930f7213e 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt @@ -2,7 +2,7 @@ set(BENCHMARK_SOURCES standard_plonk.bench.cpp ultra_honk.bench.cpp - ultra_plonk_passes.bench.cpp + ultra_honk_passes.bench.cpp ultra_plonk.bench.cpp ) From cc32991d82383098e13e8b10553f089b79d37c55 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 20 Oct 2023 22:00:58 +0000 Subject: [PATCH 11/25] Benchmark zeromorph --- .../barretenberg/benchmark/honk_bench/benchmark_utilities.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp index 4c9b8bb2a39..1384eade029 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp @@ -245,10 +245,11 @@ void construct_proof_with_specified_num_iterations(State& state, Composer composer; + auto num_iterations = static_cast(state.range(0)); for (auto _ : state) { // Constuct circuit and prover; don't include this part in measurement state.PauseTiming(); - auto prover = get_prover(composer, test_circuit_function, state.range(0)); + auto prover = get_prover(composer, test_circuit_function, num_iterations); state.ResumeTiming(); // Construct proof From f7375cfc874f736f896f5847e135577edf3008c7 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 20 Oct 2023 23:20:45 +0000 Subject: [PATCH 12/25] Benchmark zeromorph --- .../barretenberg/polynomials/polynomial.cpp | 44 +++++++++++-------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index 394b0fd4eda..c628ca040f4 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -447,29 +447,35 @@ template Polynomial Polynomial::partial_evaluate_mle(std:: // Temporary buffer of half the size of the polynomial Polynomial intermediate(n_l, DontZeroMemory::FLAG); - // Evaluate variable X_{n-1} at u_{m-1} - Fr u_l = evaluation_points[m - 1]; - - parallel_for_batched(n_l, [&](size_t i) { - // Initiate our intermediate results using this polynomial. - intermediate[i] = at(i) + u_l * (at(i + n_l) - at(i)); - }); - // Evaluate m-1 variables X_{n-l-1}, ..., X_{n-2} at m-1 remaining values u_0,...,u_{m-2}) - for (size_t l = 1; l < m; ++l) { - size_t new_n_l = 1 << (n - l - 1); - Fr new_u_l = evaluation_points[m - l - 1]; - parallel_for_batched(new_n_l, [&](size_t i) { - // Iterate on increasingly small portions of intermediate results. - intermediate[i] += new_u_l * (intermediate[i + new_n_l] - intermediate[i]); - }); - } + // // Evaluate variable X_{n-1} at u_{m-1} + // Fr u_l = evaluation_points[m - 1]; + + // parallel_for_batched( + // n_l, + // [&](size_t i) { + // // Initiate our intermediate results using this polynomial. + // intermediate[i] = at(i) + u_l * (at(i + n_l) - at(i)); + // }, + // 1000000000); + // // Evaluate m-1 variables X_{n-l-1}, ..., X_{n-2} at m-1 remaining values u_0,...,u_{m-2}) + // for (size_t l = 1; l < m; ++l) { + // size_t new_n_l = 1 << (n - l - 1); + // Fr new_u_l = evaluation_points[m - l - 1]; + // parallel_for_batched( + // new_n_l, + // [&](size_t i) { + // // Iterate on increasingly small portions of intermediate results. + // intermediate[i] += new_u_l * (intermediate[i + new_n_l] - intermediate[i]); + // }, + // 10000000); + // } size_t final_n_l = 1 << (n - m); // Construct resulting polynomial g(X_0,…,X_{n-m-1})) = p(X_0,…,X_{n-m-1},u_0,...u_{m-1}) from buffer Polynomial result(final_n_l, DontZeroMemory::FLAG); - for (size_t idx = 0; idx < final_n_l; ++idx) { - result[idx] = intermediate[idx]; - } + // for (size_t idx = 0; idx < final_n_l; ++idx) { + // result[idx] = intermediate[idx]; + // } return result; } From 1c222d6dd920c225315b1428bb69e3c3fbf7d0ce Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 20 Oct 2023 23:32:27 +0000 Subject: [PATCH 13/25] stash --- .../cpp/src/barretenberg/common/thread.hpp | 2 +- .../barretenberg/polynomials/polynomial.cpp | 44 +++++++++---------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/common/thread.hpp b/barretenberg/cpp/src/barretenberg/common/thread.hpp index 81477234f1b..d1c37950b9d 100644 --- a/barretenberg/cpp/src/barretenberg/common/thread.hpp +++ b/barretenberg/cpp/src/barretenberg/common/thread.hpp @@ -30,7 +30,7 @@ void parallel_for(size_t num_iterations, const std::function& func */ inline void parallel_for_batched(size_t num_iterations, auto&& func, size_t min_num_iterations = 8000) { - if (num_iterations <= min_num_iterations) { + if (num_iterations <= 1 - min_num_iterations) { // Don't bother with overhead of splitting into threads if small for (size_t i = 0; i < num_iterations; i++) { func(i); diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index c628ca040f4..c3ff327b1df 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -447,28 +447,28 @@ template Polynomial Polynomial::partial_evaluate_mle(std:: // Temporary buffer of half the size of the polynomial Polynomial intermediate(n_l, DontZeroMemory::FLAG); - // // Evaluate variable X_{n-1} at u_{m-1} - // Fr u_l = evaluation_points[m - 1]; - - // parallel_for_batched( - // n_l, - // [&](size_t i) { - // // Initiate our intermediate results using this polynomial. - // intermediate[i] = at(i) + u_l * (at(i + n_l) - at(i)); - // }, - // 1000000000); - // // Evaluate m-1 variables X_{n-l-1}, ..., X_{n-2} at m-1 remaining values u_0,...,u_{m-2}) - // for (size_t l = 1; l < m; ++l) { - // size_t new_n_l = 1 << (n - l - 1); - // Fr new_u_l = evaluation_points[m - l - 1]; - // parallel_for_batched( - // new_n_l, - // [&](size_t i) { - // // Iterate on increasingly small portions of intermediate results. - // intermediate[i] += new_u_l * (intermediate[i + new_n_l] - intermediate[i]); - // }, - // 10000000); - // } + // Evaluate variable X_{n-1} at u_{m-1} + Fr u_l = evaluation_points[m - 1]; + + parallel_for_batched( + n_l, + [&](size_t i) { + // Initiate our intermediate results using this polynomial. + intermediate[i] = at(i) + u_l * (at(i + n_l) - at(i)); + }, + 1000000000); + // Evaluate m-1 variables X_{n-l-1}, ..., X_{n-2} at m-1 remaining values u_0,...,u_{m-2}) + for (size_t l = 1; l < m; ++l) { + size_t new_n_l = 1 << (n - l - 1); + Fr new_u_l = evaluation_points[m - l - 1]; + parallel_for_batched( + new_n_l, + [&](size_t i) { + // Iterate on increasingly small portions of intermediate results. + intermediate[i] += new_u_l * (intermediate[i + new_n_l] - intermediate[i]); + }, + 10000000); + } size_t final_n_l = 1 << (n - m); // Construct resulting polynomial g(X_0,…,X_{n-m-1})) = p(X_0,…,X_{n-m-1},u_0,...u_{m-1}) from buffer From dc72e3fb894080309d11f3425b2ae3d9a9769083 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 20 Oct 2023 23:33:13 +0000 Subject: [PATCH 14/25] stash --- .../barretenberg/polynomials/polynomial.cpp | 28 ++++++++----------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index c3ff327b1df..9b696542994 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -450,32 +450,26 @@ template Polynomial Polynomial::partial_evaluate_mle(std:: // Evaluate variable X_{n-1} at u_{m-1} Fr u_l = evaluation_points[m - 1]; - parallel_for_batched( - n_l, - [&](size_t i) { - // Initiate our intermediate results using this polynomial. - intermediate[i] = at(i) + u_l * (at(i + n_l) - at(i)); - }, - 1000000000); + for (size_t i = 0; i < n_l; i++) { + // Initiate our intermediate results using this polynomial. + intermediate[i] = at(i) + u_l * (at(i + n_l) - at(i)); + } // Evaluate m-1 variables X_{n-l-1}, ..., X_{n-2} at m-1 remaining values u_0,...,u_{m-2}) for (size_t l = 1; l < m; ++l) { size_t new_n_l = 1 << (n - l - 1); Fr new_u_l = evaluation_points[m - l - 1]; - parallel_for_batched( - new_n_l, - [&](size_t i) { - // Iterate on increasingly small portions of intermediate results. - intermediate[i] += new_u_l * (intermediate[i + new_n_l] - intermediate[i]); - }, - 10000000); + parallel_for_batched(new_n_l, [&](size_t i) { + // Iterate on increasingly small portions of intermediate results. + intermediate[i] += new_u_l * (intermediate[i + new_n_l] - intermediate[i]); + }); } size_t final_n_l = 1 << (n - m); // Construct resulting polynomial g(X_0,…,X_{n-m-1})) = p(X_0,…,X_{n-m-1},u_0,...u_{m-1}) from buffer Polynomial result(final_n_l, DontZeroMemory::FLAG); - // for (size_t idx = 0; idx < final_n_l; ++idx) { - // result[idx] = intermediate[idx]; - // } + for (size_t idx = 0; idx < final_n_l; ++idx) { + result[idx] = intermediate[idx]; + } return result; } From 26ef756e3372d99252f9f41d48b8852ed5b552db Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 20 Oct 2023 23:36:19 +0000 Subject: [PATCH 15/25] stash --- barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index 9b696542994..5a611e35f28 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -435,6 +435,7 @@ template Polynomial Polynomial::partial_evaluate_mle(std:: // Get size of partial evaluation point u = (u_0,...,u_{m-1}) const size_t m = evaluation_points.size(); + std::cout << "TESST" << std::endl; // Assert that the size of the polynomial being evaluated is a power of 2 greater than (1 << m) ASSERT(numeric::is_power_of_two(size_)); ASSERT(size_ >= static_cast(1 << m)); From e37b673471a66d4ecf8f83a3459803ea86a948c5 Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 20 Oct 2023 23:57:33 +0000 Subject: [PATCH 16/25] Get compiling again --- .../cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp | 1 + barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp b/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp index ed3beccf79e..cb4ebe4f44b 100644 --- a/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp +++ b/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp @@ -79,6 +79,7 @@ template class ZeroMorphProver_ { // Increment first element to get altered partial evaluation point u'' = (u_k + 1, u_{k+1}, ..., u_{n-1}) u_partial[0] += 1; + std::cout << "HEY" << std::endl; // Compute f'' = f(X_0,...,X_{k-1}, u'') auto f_2 = polynomial.partial_evaluate_mle(u_partial); diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index 5a611e35f28..9b696542994 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -435,7 +435,6 @@ template Polynomial Polynomial::partial_evaluate_mle(std:: // Get size of partial evaluation point u = (u_0,...,u_{m-1}) const size_t m = evaluation_points.size(); - std::cout << "TESST" << std::endl; // Assert that the size of the polynomial being evaluated is a power of 2 greater than (1 << m) ASSERT(numeric::is_power_of_two(size_)); ASSERT(size_ >= static_cast(1 << m)); From b8840d77f2735b1a34cd36cd83598483c078a6d1 Mon Sep 17 00:00:00 2001 From: ludamad Date: Mon, 23 Oct 2023 15:12:46 +0000 Subject: [PATCH 17/25] Surprising results --- .../scripts/collect_profile_information.sh | 13 ++-- .../honk_bench/benchmark_utilities.hpp | 1 + .../honk_bench/ultra_honk_passes.bench.cpp | 59 +++++++++++++++++++ .../honk/pcs/zeromorph/zeromorph.hpp | 1 - 4 files changed, 68 insertions(+), 6 deletions(-) create mode 100644 barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp diff --git a/barretenberg/cpp/scripts/collect_profile_information.sh b/barretenberg/cpp/scripts/collect_profile_information.sh index 0b7d79ef8ed..e2572340acd 100755 --- a/barretenberg/cpp/scripts/collect_profile_information.sh +++ b/barretenberg/cpp/scripts/collect_profile_information.sh @@ -3,6 +3,7 @@ set -eu PRESET=${1:-xray-1thread} # can also be 'xray' ONLY_PROCESS=${2:-} +EXECUTABLE=${3:-honk_bench_main_simple} # Move above script dir. cd $(dirname $0)/.. @@ -15,10 +16,10 @@ cd build-$PRESET if [ -z "$ONLY_PROCESS" ]; then # Clear old profile data. - rm -f xray-log.honk_bench_main_simple.* + rm -f xray-log.$EXECUTABLE.* # Run benchmark with profiling. - XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1" ./bin/honk_bench_main_simple + XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1" ./bin/$EXECUTABLE fi function shorten_cpp_names() { @@ -36,11 +37,13 @@ function shorten_cpp_names() { ' } + #| node ../scripts/llvm_xray_stack_flame_corrector.js \ + #| ../scripts/flamegraph.pl --width 1200 --fontsize 10 \ # Process benchmark file. -llvm-xray-16 stack xray-log.honk_bench_main_simple.* \ - --instr_map=./bin/honk_bench_main_simple --stack-format=flame --aggregate-threads --aggregation-type=time --all-stacks \ - | node ../scripts/llvm_xray_stack_flame_corrector.js \ +llvm-xray-16 stack xray-log.$EXECUTABLE.* \ + --instr_map=./bin/$EXECUTABLE --stack-format=flame --aggregate-threads --aggregation-type=time --all-stacks \ | shorten_cpp_names \ + | node ../scripts/llvm_xray_stack_flame_corrector.js \ | ../scripts/flamegraph.pl --width 1200 --fontsize 10 \ > xray.svg echo "Profiling complete, now you can do e.g. 'scp mainframe:`readlink -f xray.svg` .' on a local terminal and open the SVG in a browser." diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp index 1384eade029..d39c1152d55 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp @@ -214,6 +214,7 @@ inline proof_system::honk::UltraProver get_prover( proof_system::honk::UltraComposer::CircuitBuilder builder; std::shared_ptr instance = composer.create_instance(builder); test_circuit_function(builder, num_iterations); + std::cout << builder.get_total_circuit_size() << std::endl; return composer.create_prover(instance); } diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp new file mode 100644 index 00000000000..0f967bbe4e9 --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp @@ -0,0 +1,59 @@ +#include + +#include "barretenberg/benchmark/honk_bench/benchmark_utilities.hpp" +#include "barretenberg/honk/composer/ultra_composer.hpp" +#include "barretenberg/honk/proof_system/ultra_prover.hpp" +#include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp" + +using namespace benchmark; +using namespace proof_system; + +enum { PREAMBLE, WIRE_COMMITMENTS, SORTED_LIST_ACCUMULATOR, GRAND_PRODUCT_COMPUTATION, RELATION_CHECK, ZEROMORPH }; + +BBERG_INSTRUMENT BBERG_NOINLINE static void test_pass_inner(State& state, + honk::UltraProver& prover, + size_t index) noexcept +{ + + auto time_if_index = [&](size_t target_index, auto&& func) -> void { + if (index == target_index) { + // state.ResumeTiming(); + func(); + // state.PauseTiming(); + } else { + func(); + } + }; + for (auto _ : state) { + // state.PauseTiming(); + time_if_index(PREAMBLE, [&] { prover.execute_preamble_round(); }); + time_if_index(WIRE_COMMITMENTS, [&] { prover.execute_wire_commitments_round(); }); + time_if_index(SORTED_LIST_ACCUMULATOR, [&] { prover.execute_sorted_list_accumulator_round(); }); + time_if_index(GRAND_PRODUCT_COMPUTATION, [&] { prover.execute_grand_product_computation_round(); }); + time_if_index(RELATION_CHECK, [&] { prover.execute_relation_check_rounds(); }); + time_if_index(ZEROMORPH, [&] { prover.execute_zeromorph_rounds(); }); + // state.ResumeTiming(); + } +} +BBERG_INSTRUMENT BBERG_NOINLINE static void test_pass(State& state, size_t index) noexcept +{ + barretenberg::srs::init_crs_factory("../srs_db/ignition"); + + honk::UltraComposer composer; + honk::UltraProver prover = bench_utils::get_prover( + composer, &bench_utils::generate_ecdsa_verification_test_circuit, 10); + test_pass_inner(state, prover, index); +} +#define PASS_BENCHMARK(pass) \ + static void PASS_##pass(State& state) noexcept \ + { \ + test_pass(state, pass); \ + } \ + BENCHMARK(PASS_##pass)->Unit(::benchmark::kMillisecond) + +// PASS_BENCHMARK(PREAMBLE); +PASS_BENCHMARK(WIRE_COMMITMENTS); +PASS_BENCHMARK(SORTED_LIST_ACCUMULATOR); +PASS_BENCHMARK(GRAND_PRODUCT_COMPUTATION); +PASS_BENCHMARK(RELATION_CHECK); +PASS_BENCHMARK(ZEROMORPH); \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp b/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp index cb4ebe4f44b..ed3beccf79e 100644 --- a/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp +++ b/barretenberg/cpp/src/barretenberg/honk/pcs/zeromorph/zeromorph.hpp @@ -79,7 +79,6 @@ template class ZeroMorphProver_ { // Increment first element to get altered partial evaluation point u'' = (u_k + 1, u_{k+1}, ..., u_{n-1}) u_partial[0] += 1; - std::cout << "HEY" << std::endl; // Compute f'' = f(X_0,...,X_{k-1}, u'') auto f_2 = polynomial.partial_evaluate_mle(u_partial); From 5bdc5da112a78ae0569d4d8aaf79bff1ac15c4dd Mon Sep 17 00:00:00 2001 From: ludamad Date: Mon, 23 Oct 2023 15:21:03 +0000 Subject: [PATCH 18/25] Surprising results --- .../barretenberg/benchmark/honk_bench/benchmark_utilities.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp index d39c1152d55..5053d89bd54 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp @@ -212,9 +212,8 @@ inline proof_system::honk::UltraProver get_prover( size_t num_iterations) { proof_system::honk::UltraComposer::CircuitBuilder builder; - std::shared_ptr instance = composer.create_instance(builder); test_circuit_function(builder, num_iterations); - std::cout << builder.get_total_circuit_size() << std::endl; + std::shared_ptr instance = composer.create_instance(builder); return composer.create_prover(instance); } From a740a24f916496becdaba4c8f4a0f1e699806897 Mon Sep 17 00:00:00 2001 From: ludamad Date: Mon, 23 Oct 2023 16:46:48 +0000 Subject: [PATCH 19/25] Undo --- .../cpp/scripts/collect_profile_information.sh | 7 +++---- .../benchmark/honk_bench/ultra_honk_passes.bench.cpp | 12 ++++++------ barretenberg/cpp/src/barretenberg/common/thread.hpp | 4 ++-- .../cpp/src/barretenberg/polynomials/polynomial.cpp | 4 ++-- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/barretenberg/cpp/scripts/collect_profile_information.sh b/barretenberg/cpp/scripts/collect_profile_information.sh index e2572340acd..1197892e953 100755 --- a/barretenberg/cpp/scripts/collect_profile_information.sh +++ b/barretenberg/cpp/scripts/collect_profile_information.sh @@ -4,6 +4,7 @@ set -eu PRESET=${1:-xray-1thread} # can also be 'xray' ONLY_PROCESS=${2:-} EXECUTABLE=${3:-honk_bench_main_simple} +shift 3 # any extra args go to executable # Move above script dir. cd $(dirname $0)/.. @@ -19,7 +20,7 @@ if [ -z "$ONLY_PROCESS" ]; then rm -f xray-log.$EXECUTABLE.* # Run benchmark with profiling. - XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1" ./bin/$EXECUTABLE + XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1" ./bin/$EXECUTABLE $@ fi function shorten_cpp_names() { @@ -37,13 +38,11 @@ function shorten_cpp_names() { ' } - #| node ../scripts/llvm_xray_stack_flame_corrector.js \ - #| ../scripts/flamegraph.pl --width 1200 --fontsize 10 \ # Process benchmark file. llvm-xray-16 stack xray-log.$EXECUTABLE.* \ --instr_map=./bin/$EXECUTABLE --stack-format=flame --aggregate-threads --aggregation-type=time --all-stacks \ - | shorten_cpp_names \ | node ../scripts/llvm_xray_stack_flame_corrector.js \ + | shorten_cpp_names \ | ../scripts/flamegraph.pl --width 1200 --fontsize 10 \ > xray.svg echo "Profiling complete, now you can do e.g. 'scp mainframe:`readlink -f xray.svg` .' on a local terminal and open the SVG in a browser." diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp index 0f967bbe4e9..149361947f0 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp @@ -17,22 +17,22 @@ BBERG_INSTRUMENT BBERG_NOINLINE static void test_pass_inner(State& state, auto time_if_index = [&](size_t target_index, auto&& func) -> void { if (index == target_index) { - // state.ResumeTiming(); + state.ResumeTiming(); func(); - // state.PauseTiming(); + state.PauseTiming(); } else { func(); } }; for (auto _ : state) { - // state.PauseTiming(); + state.PauseTiming(); time_if_index(PREAMBLE, [&] { prover.execute_preamble_round(); }); time_if_index(WIRE_COMMITMENTS, [&] { prover.execute_wire_commitments_round(); }); time_if_index(SORTED_LIST_ACCUMULATOR, [&] { prover.execute_sorted_list_accumulator_round(); }); time_if_index(GRAND_PRODUCT_COMPUTATION, [&] { prover.execute_grand_product_computation_round(); }); time_if_index(RELATION_CHECK, [&] { prover.execute_relation_check_rounds(); }); time_if_index(ZEROMORPH, [&] { prover.execute_zeromorph_rounds(); }); - // state.ResumeTiming(); + state.ResumeTiming(); } } BBERG_INSTRUMENT BBERG_NOINLINE static void test_pass(State& state, size_t index) noexcept @@ -40,8 +40,8 @@ BBERG_INSTRUMENT BBERG_NOINLINE static void test_pass(State& state, size_t index barretenberg::srs::init_crs_factory("../srs_db/ignition"); honk::UltraComposer composer; - honk::UltraProver prover = bench_utils::get_prover( - composer, &bench_utils::generate_ecdsa_verification_test_circuit, 10); + honk::UltraProver prover = + bench_utils::get_prover(composer, &bench_utils::generate_keccak_test_circuit, 1); test_pass_inner(state, prover, index); } #define PASS_BENCHMARK(pass) \ diff --git a/barretenberg/cpp/src/barretenberg/common/thread.hpp b/barretenberg/cpp/src/barretenberg/common/thread.hpp index d1c37950b9d..e454dcca5b0 100644 --- a/barretenberg/cpp/src/barretenberg/common/thread.hpp +++ b/barretenberg/cpp/src/barretenberg/common/thread.hpp @@ -28,9 +28,9 @@ void parallel_for(size_t num_iterations, const std::function& func * A modified parallel_for optimized for work being done in batches. * This is more appropriate for work with small granularity, to avoid thread caching issues and overhead. */ -inline void parallel_for_batched(size_t num_iterations, auto&& func, size_t min_num_iterations = 8000) +inline void parallel_for_batched(size_t num_iterations, auto&& func, size_t min_num_iterations = 800) { - if (num_iterations <= 1 - min_num_iterations) { + if (num_iterations <= min_num_iterations) { // Don't bother with overhead of splitting into threads if small for (size_t i = 0; i < num_iterations; i++) { func(i); diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index 9b696542994..b60f5dd85be 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -458,10 +458,10 @@ template Polynomial Polynomial::partial_evaluate_mle(std:: for (size_t l = 1; l < m; ++l) { size_t new_n_l = 1 << (n - l - 1); Fr new_u_l = evaluation_points[m - l - 1]; - parallel_for_batched(new_n_l, [&](size_t i) { + for (size_t i = 0; i < new_n_l; i++) { // Iterate on increasingly small portions of intermediate results. intermediate[i] += new_u_l * (intermediate[i + new_n_l] - intermediate[i]); - }); + } } size_t final_n_l = 1 << (n - m); From 2fbe7613c7bdd20af2c5e2decaa15e1030c82a82 Mon Sep 17 00:00:00 2001 From: ludamad Date: Mon, 23 Oct 2023 16:49:14 +0000 Subject: [PATCH 20/25] Simplify batched parallel_for --- barretenberg/cpp/src/barretenberg/common/thread.hpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/common/thread.hpp b/barretenberg/cpp/src/barretenberg/common/thread.hpp index e454dcca5b0..45f11037c7a 100644 --- a/barretenberg/cpp/src/barretenberg/common/thread.hpp +++ b/barretenberg/cpp/src/barretenberg/common/thread.hpp @@ -28,15 +28,8 @@ void parallel_for(size_t num_iterations, const std::function& func * A modified parallel_for optimized for work being done in batches. * This is more appropriate for work with small granularity, to avoid thread caching issues and overhead. */ -inline void parallel_for_batched(size_t num_iterations, auto&& func, size_t min_num_iterations = 800) +inline void parallel_for_batched(size_t num_iterations, auto&& func) { - if (num_iterations <= min_num_iterations) { - // Don't bother with overhead of splitting into threads if small - for (size_t i = 0; i < num_iterations; i++) { - func(i); - } - return; - } size_t num_threads = get_num_cpus_pow2(); size_t batch_size = (num_iterations + num_threads - 1) / num_threads; // round up division // We will use parallel_for to dispatch the batches From 95ab25a24b0428182b4b658c87de495aba2d67ea Mon Sep 17 00:00:00 2001 From: ludamad Date: Mon, 23 Oct 2023 16:53:03 +0000 Subject: [PATCH 21/25] chore: cleanup old profiling entrypoint, default to honk_passes --- .../scripts/collect_profile_information.sh | 2 +- .../benchmark/honk_bench/CMakeLists.txt | 15 +--- .../benchmark/honk_bench/main.simple.cpp | 81 ------------------- .../honk_bench/ultra_honk_passes.bench.cpp | 2 +- 4 files changed, 3 insertions(+), 97 deletions(-) delete mode 100644 barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp diff --git a/barretenberg/cpp/scripts/collect_profile_information.sh b/barretenberg/cpp/scripts/collect_profile_information.sh index 1197892e953..1cd6a688f91 100755 --- a/barretenberg/cpp/scripts/collect_profile_information.sh +++ b/barretenberg/cpp/scripts/collect_profile_information.sh @@ -3,7 +3,7 @@ set -eu PRESET=${1:-xray-1thread} # can also be 'xray' ONLY_PROCESS=${2:-} -EXECUTABLE=${3:-honk_bench_main_simple} +EXECUTABLE=${3:-ultra_honk_passes_bench} shift 3 # any extra args go to executable # Move above script dir. diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt index c2930f7213e..c4adab0ade9 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt @@ -20,17 +20,4 @@ foreach(BENCHMARK_SOURCE ${BENCHMARK_SOURCES}) add_executable(${BENCHMARK_NAME}_bench main.bench.cpp ${BENCHMARK_SOURCE} benchmark_utilities.hpp) target_link_libraries(${BENCHMARK_NAME}_bench ${LINKED_LIBRARIES}) add_custom_target(run_${BENCHMARK_NAME} COMMAND ${BENCHMARK_NAME} WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) -endforeach() - -add_executable( - honk_bench_main_simple - main.simple.cpp -) - -target_link_libraries( - honk_bench_main_simple - PRIVATE - stdlib_sha256 - stdlib_keccak - stdlib_merkle_tree -) +endforeach() \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp deleted file mode 100644 index b5840959957..00000000000 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* Entry point for profiling with e.g. LLVM xray. - * This provides a simple entrypoint to bypass artifacts with - * TODO(AD): Consider if we can directly profile the bench executables. - */ -#include -#include -#include - -#include "barretenberg/honk/composer/ultra_composer.hpp" -#include "barretenberg/plonk/composer/ultra_composer.hpp" -#include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp" -#include "barretenberg/proof_system/types/circuit_type.hpp" -#include "barretenberg/stdlib/encryption/ecdsa/ecdsa.hpp" -#include "barretenberg/stdlib/hash/keccak/keccak.hpp" -#include "barretenberg/stdlib/hash/sha256/sha256.hpp" -#include "barretenberg/stdlib/merkle_tree/membership.hpp" -#include "barretenberg/stdlib/merkle_tree/memory_store.hpp" -#include "barretenberg/stdlib/merkle_tree/memory_tree.hpp" -#include "barretenberg/stdlib/merkle_tree/merkle_tree.hpp" -#include "barretenberg/stdlib/primitives/bool/bool.hpp" -#include "barretenberg/stdlib/primitives/curves/secp256k1.hpp" -#include "barretenberg/stdlib/primitives/field/field.hpp" -#include "barretenberg/stdlib/primitives/packed_byte_array/packed_byte_array.hpp" -#include "barretenberg/stdlib/primitives/witness/witness.hpp" - -using namespace proof_system; - -template void generate_keccak_test_circuit(Builder& builder, size_t num_iterations) -{ - std::string in = "abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz01"; - - proof_system::plonk::stdlib::byte_array input(&builder, in); - for (size_t i = 0; i < num_iterations; i++) { - input = proof_system::plonk::stdlib::keccak::hash(input); - } -} - -BBERG_INSTRUMENT BBERG_NOINLINE void prover_profiling(auto& ext_prover) -{ - for (size_t i = 0; i < 1; i++) { - ext_prover.construct_proof(); - } -} - -/** - * @brief Benchmark: Construction of a Ultra Honk proof for a circuit determined by the provided circuit function - */ -void construct_honk_proof_ultra() noexcept -{ - barretenberg::srs::init_crs_factory("../srs_db/ignition"); - // Constuct circuit and prover; don't include this part in measurement - - for (int i = 0; i < 10; i++) { - honk::UltraComposer::CircuitBuilder builder; - generate_keccak_test_circuit(builder, 1); - std::cout << "gates: " << builder.get_total_circuit_size() << std::endl; - honk::UltraComposer composer; - std::shared_ptr instance = composer.create_instance(builder); - std::cout << "gates: " << builder.get_total_circuit_size() << std::endl; - honk::UltraProver ext_prover = composer.create_prover(instance); - prover_profiling(ext_prover); - } -} - -void construct_plonk_proof_ultra() noexcept -{ - barretenberg::srs::init_crs_factory("../srs_db/ignition"); - for (int i = 0; i < 10; i++) { - plonk::UltraComposer::CircuitBuilder builder; - generate_keccak_test_circuit(builder, 1); - plonk::UltraComposer composer; - plonk::UltraProver ext_prover = composer.create_prover(builder); - std::cout << "gates: " << builder.get_total_circuit_size() << std::endl; - prover_profiling(ext_prover); - } -} -int main() -{ - construct_honk_proof_ultra(); - // construct_plonk_proof_ultra(); -} diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp index 149361947f0..18a0832c4d7 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp @@ -51,7 +51,7 @@ BBERG_INSTRUMENT BBERG_NOINLINE static void test_pass(State& state, size_t index } \ BENCHMARK(PASS_##pass)->Unit(::benchmark::kMillisecond) -// PASS_BENCHMARK(PREAMBLE); +PASS_BENCHMARK(PREAMBLE); PASS_BENCHMARK(WIRE_COMMITMENTS); PASS_BENCHMARK(SORTED_LIST_ACCUMULATOR); PASS_BENCHMARK(GRAND_PRODUCT_COMPUTATION); From cf193713b2aaaab9f7d061e6cbaf71e0e209ffbe Mon Sep 17 00:00:00 2001 From: ludamad Date: Mon, 23 Oct 2023 16:56:17 +0000 Subject: [PATCH 22/25] Fix profile script --- barretenberg/cpp/scripts/collect_profile_information.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/barretenberg/cpp/scripts/collect_profile_information.sh b/barretenberg/cpp/scripts/collect_profile_information.sh index 1cd6a688f91..cd3b7cc1957 100755 --- a/barretenberg/cpp/scripts/collect_profile_information.sh +++ b/barretenberg/cpp/scripts/collect_profile_information.sh @@ -1,10 +1,9 @@ #!/bin/bash set -eu -PRESET=${1:-xray-1thread} # can also be 'xray' +PRESET=${1:-xray} # can also be 'xray-1thread' ONLY_PROCESS=${2:-} EXECUTABLE=${3:-ultra_honk_passes_bench} -shift 3 # any extra args go to executable # Move above script dir. cd $(dirname $0)/.. @@ -20,7 +19,7 @@ if [ -z "$ONLY_PROCESS" ]; then rm -f xray-log.$EXECUTABLE.* # Run benchmark with profiling. - XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1" ./bin/$EXECUTABLE $@ + XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1" ./bin/$EXECUTABLE fi function shorten_cpp_names() { From 1e65db34694e785fbac9c928556fa60b4697486e Mon Sep 17 00:00:00 2001 From: ludamad Date: Mon, 23 Oct 2023 17:06:20 +0000 Subject: [PATCH 23/25] Force fast passes to be one iteration --- .../benchmark/honk_bench/ultra_honk_passes.bench.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp index 18a0832c4d7..ba28fd2810c 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp @@ -51,9 +51,11 @@ BBERG_INSTRUMENT BBERG_NOINLINE static void test_pass(State& state, size_t index } \ BENCHMARK(PASS_##pass)->Unit(::benchmark::kMillisecond) -PASS_BENCHMARK(PREAMBLE); -PASS_BENCHMARK(WIRE_COMMITMENTS); -PASS_BENCHMARK(SORTED_LIST_ACCUMULATOR); -PASS_BENCHMARK(GRAND_PRODUCT_COMPUTATION); +// Fast passes take a long time to benchmark because of how we compute statistical significance. +// Limit to one iteration so we don't spend a lot of time redoing full proofs just to measure this part. +PASS_BENCHMARK(PREAMBLE)->Iterations(1); +PASS_BENCHMARK(WIRE_COMMITMENTS)->Iterations(1); +PASS_BENCHMARK(SORTED_LIST_ACCUMULATOR)->Iterations(1); +PASS_BENCHMARK(GRAND_PRODUCT_COMPUTATION)->Iterations(1); PASS_BENCHMARK(RELATION_CHECK); PASS_BENCHMARK(ZEROMORPH); \ No newline at end of file From 799203de3509907b7f2e61cbbb113bf205e70a66 Mon Sep 17 00:00:00 2001 From: ludamad Date: Mon, 23 Oct 2023 17:40:37 +0000 Subject: [PATCH 24/25] Lighter weight stuffs --- barretenberg/cpp/CMakePresets.json | 6 +++--- .../benchmark/honk_bench/ultra_honk_passes.bench.cpp | 10 ++++------ .../cpp/src/barretenberg/common/compiler_hints.hpp | 12 ++++-------- .../barretenberg/honk/proof_system/ultra_prover.hpp | 12 ++++++------ 4 files changed, 17 insertions(+), 23 deletions(-) diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index 3a460d20af3..9b3dbbe6d80 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -202,9 +202,9 @@ "generator": "Unix Makefiles", "inherits": "clang16", "environment": { - "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100", - "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100", - "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100" + "CFLAGS": "-fxray-instrument", + "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=500", + "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=500" }, "binaryDir": "build-xray" }, diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp index ba28fd2810c..c150fdb79b3 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp @@ -10,9 +10,7 @@ using namespace proof_system; enum { PREAMBLE, WIRE_COMMITMENTS, SORTED_LIST_ACCUMULATOR, GRAND_PRODUCT_COMPUTATION, RELATION_CHECK, ZEROMORPH }; -BBERG_INSTRUMENT BBERG_NOINLINE static void test_pass_inner(State& state, - honk::UltraProver& prover, - size_t index) noexcept +BBERG_PROFILE static void test_pass_inner(State& state, honk::UltraProver& prover, size_t index) noexcept { auto time_if_index = [&](size_t target_index, auto&& func) -> void { @@ -35,13 +33,13 @@ BBERG_INSTRUMENT BBERG_NOINLINE static void test_pass_inner(State& state, state.ResumeTiming(); } } -BBERG_INSTRUMENT BBERG_NOINLINE static void test_pass(State& state, size_t index) noexcept +BBERG_PROFILE static void test_pass(State& state, size_t index) noexcept { barretenberg::srs::init_crs_factory("../srs_db/ignition"); honk::UltraComposer composer; - honk::UltraProver prover = - bench_utils::get_prover(composer, &bench_utils::generate_keccak_test_circuit, 1); + honk::UltraProver prover = bench_utils::get_prover( + composer, &bench_utils::generate_ecdsa_verification_test_circuit, 10); test_pass_inner(state, prover, index); } #define PASS_BENCHMARK(pass) \ diff --git a/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp b/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp index c3bdf3cf6b9..400ddf91d60 100644 --- a/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp +++ b/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp @@ -6,13 +6,9 @@ #define BBERG_INLINE __attribute__((always_inline)) inline #endif -// TODO(AD): Other compilers -#if defined(__clang__) -#define BBERG_INSTRUMENT [[clang::xray_always_instrument]] -#define BBERG_NO_INSTRUMENT [[clang::xray_never_instrument]] -#define BBERG_NOINLINE [[clang::noinline]] +// TODO(AD): Other instrumentation? +#ifdef XRAY +#define BBERG_PROFILE [[clang::xray_always_instrument]] [[clang::noinline]] #else -#define BBERG_INSTRUMENT -#define BBERG_NO_INSTRUMENT -#define BBERG_NOINLINE +#define BBERG_PROFILE #endif \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/honk/proof_system/ultra_prover.hpp b/barretenberg/cpp/src/barretenberg/honk/proof_system/ultra_prover.hpp index 4be07e693a8..a592fde4755 100644 --- a/barretenberg/cpp/src/barretenberg/honk/proof_system/ultra_prover.hpp +++ b/barretenberg/cpp/src/barretenberg/honk/proof_system/ultra_prover.hpp @@ -23,12 +23,12 @@ template class UltraProver_ { public: explicit UltraProver_(std::shared_ptr); - void execute_preamble_round(); - void execute_wire_commitments_round(); - void execute_sorted_list_accumulator_round(); - void execute_grand_product_computation_round(); - void execute_relation_check_rounds(); - void execute_zeromorph_rounds(); + BBERG_PROFILE void execute_preamble_round(); + BBERG_PROFILE void execute_wire_commitments_round(); + BBERG_PROFILE void execute_sorted_list_accumulator_round(); + BBERG_PROFILE void execute_grand_product_computation_round(); + BBERG_PROFILE void execute_relation_check_rounds(); + BBERG_PROFILE void execute_zeromorph_rounds(); plonk::proof& export_proof(); plonk::proof& construct_proof(); From cd42d168d0226b62748a3db15922aa853840703c Mon Sep 17 00:00:00 2001 From: ludamad Date: Mon, 23 Oct 2023 18:58:22 +0000 Subject: [PATCH 25/25] review feedback --- barretenberg/cpp/CMakePresets.json | 44 +++++++++---------- .../scripts/collect_profile_information.sh | 2 +- .../benchmark/honk_bench/CMakeLists.txt | 2 +- ....bench.cpp => ultra_honk_rounds.bench.cpp} | 43 +++++++++++------- .../barretenberg/common/compiler_hints.hpp | 2 + .../common/parallel_for_mutex_pool.cpp | 2 +- .../cpp/src/barretenberg/common/thread.hpp | 20 --------- .../barretenberg/polynomials/polynomial.cpp | 27 ++++++++---- 8 files changed, 72 insertions(+), 70 deletions(-) rename barretenberg/cpp/src/barretenberg/benchmark/honk_bench/{ultra_honk_passes.bench.cpp => ultra_honk_rounds.bench.cpp} (59%) diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index 9b3dbbe6d80..7927b0ba494 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -54,18 +54,6 @@ "DISABLE_ASM": "ON" } }, - { - "name": "tsan", - "displayName": "Debugging build with address sanitizer on Clang-16", - "description": "Build with address sanitizer on clang16 with debugging information", - "inherits": "clang16-dbg", - "binaryDir": "build-tsan", - "environment": { - "CFLAGS": "-fsanitize=thread", - "CXXFLAGS": "-fsanitize=thread", - "LDFLAGS": "-fsanitize=thread" - } - }, { "name": "asan", "displayName": "Debugging build with address sanitizer on Clang-16", @@ -136,6 +124,18 @@ "SMT": "ON" } }, + { + "name": "tsan", + "displayName": "Debugging build with thread sanitizer on Clang-16", + "description": "Build with thread sanitizer on clang16 with debugging information", + "inherits": "clang16-dbg", + "binaryDir": "build-tsan", + "environment": { + "CFLAGS": "-fsanitize=thread", + "CXXFLAGS": "-fsanitize=thread", + "LDFLAGS": "-fsanitize=thread" + } + }, { "name": "coverage", "displayName": "Build with coverage", @@ -203,8 +203,8 @@ "inherits": "clang16", "environment": { "CFLAGS": "-fxray-instrument", - "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=500", - "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=500" + "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=500 -DXRAY=1", + "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=500 -DXRAY=1" }, "binaryDir": "build-xray" }, @@ -214,9 +214,9 @@ "description": "Build with Clang and enable detailed LLVM XRay for profiling", "inherits": "xray", "environment": { - "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150", - "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150", - "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150" + "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150 -DXRAY=1", + "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150 -DXRAY=1", + "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150 -DXRAY=1" }, "binaryDir": "build-xray-verbose" }, @@ -258,11 +258,6 @@ "inherits": "default", "configurePreset": "asan" }, - { - "name": "tsan", - "inherits": "default", - "configurePreset": "tsan" - }, { "name": "gcc", "inherits": "default", @@ -293,6 +288,11 @@ "inherits": "clang16", "configurePreset": "smt-verification" }, + { + "name": "tsan", + "inherits": "default", + "configurePreset": "tsan" + }, { "name": "coverage", "inherits": "default", diff --git a/barretenberg/cpp/scripts/collect_profile_information.sh b/barretenberg/cpp/scripts/collect_profile_information.sh index cd3b7cc1957..28ca73fe4d2 100755 --- a/barretenberg/cpp/scripts/collect_profile_information.sh +++ b/barretenberg/cpp/scripts/collect_profile_information.sh @@ -3,7 +3,7 @@ set -eu PRESET=${1:-xray} # can also be 'xray-1thread' ONLY_PROCESS=${2:-} -EXECUTABLE=${3:-ultra_honk_passes_bench} +EXECUTABLE=${3:-ultra_honk_rounds_bench} # Move above script dir. cd $(dirname $0)/.. diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt index c4adab0ade9..ed6122bb41e 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt @@ -2,7 +2,7 @@ set(BENCHMARK_SOURCES standard_plonk.bench.cpp ultra_honk.bench.cpp - ultra_honk_passes.bench.cpp + ultra_honk_rounds.bench.cpp ultra_plonk.bench.cpp ) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp similarity index 59% rename from barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp rename to barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp index c150fdb79b3..8b4e7145596 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp @@ -8,11 +8,19 @@ using namespace benchmark; using namespace proof_system; +// The rounds to measure enum { PREAMBLE, WIRE_COMMITMENTS, SORTED_LIST_ACCUMULATOR, GRAND_PRODUCT_COMPUTATION, RELATION_CHECK, ZEROMORPH }; -BBERG_PROFILE static void test_pass_inner(State& state, honk::UltraProver& prover, size_t index) noexcept +/** + * @details Benchmark ultrahonk by performing all the rounds, but only measuring one. + * Note: As a result the very short rounds take a long time for statistical significance, so recommended to set their + * iterations to 1. + * @param state - The google benchmark state. + * @param prover - The ultrahonk prover. + * @param index - The pass to measure. + **/ +BBERG_PROFILE static void test_round_inner(State& state, honk::UltraProver& prover, size_t index) noexcept { - auto time_if_index = [&](size_t target_index, auto&& func) -> void { if (index == target_index) { state.ResumeTiming(); @@ -33,27 +41,28 @@ BBERG_PROFILE static void test_pass_inner(State& state, honk::UltraProver& prove state.ResumeTiming(); } } -BBERG_PROFILE static void test_pass(State& state, size_t index) noexcept +BBERG_PROFILE static void test_round(State& state, size_t index) noexcept { barretenberg::srs::init_crs_factory("../srs_db/ignition"); honk::UltraComposer composer; - honk::UltraProver prover = bench_utils::get_prover( - composer, &bench_utils::generate_ecdsa_verification_test_circuit, 10); - test_pass_inner(state, prover, index); + // TODO(AD) benchmark both sparse and dense circuits? + honk::UltraProver prover = + bench_utils::get_prover(composer, &bench_utils::generate_keccak_test_circuit, 1); + test_round_inner(state, prover, index); } -#define PASS_BENCHMARK(pass) \ - static void PASS_##pass(State& state) noexcept \ +#define ROUND_BENCHMARK(round) \ + static void ROUND_##round(State& state) noexcept \ { \ - test_pass(state, pass); \ + test_round(state, round); \ } \ - BENCHMARK(PASS_##pass)->Unit(::benchmark::kMillisecond) + BENCHMARK(ROUND_##round)->Unit(::benchmark::kMillisecond) -// Fast passes take a long time to benchmark because of how we compute statistical significance. +// Fast rounds take a long time to benchmark because of how we compute statistical significance. // Limit to one iteration so we don't spend a lot of time redoing full proofs just to measure this part. -PASS_BENCHMARK(PREAMBLE)->Iterations(1); -PASS_BENCHMARK(WIRE_COMMITMENTS)->Iterations(1); -PASS_BENCHMARK(SORTED_LIST_ACCUMULATOR)->Iterations(1); -PASS_BENCHMARK(GRAND_PRODUCT_COMPUTATION)->Iterations(1); -PASS_BENCHMARK(RELATION_CHECK); -PASS_BENCHMARK(ZEROMORPH); \ No newline at end of file +ROUND_BENCHMARK(PREAMBLE)->Iterations(1); +ROUND_BENCHMARK(WIRE_COMMITMENTS)->Iterations(1); +ROUND_BENCHMARK(SORTED_LIST_ACCUMULATOR)->Iterations(1); +ROUND_BENCHMARK(GRAND_PRODUCT_COMPUTATION)->Iterations(1); +ROUND_BENCHMARK(RELATION_CHECK); +ROUND_BENCHMARK(ZEROMORPH); \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp b/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp index 400ddf91d60..1815816a3c4 100644 --- a/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp +++ b/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp @@ -9,6 +9,8 @@ // TODO(AD): Other instrumentation? #ifdef XRAY #define BBERG_PROFILE [[clang::xray_always_instrument]] [[clang::noinline]] +#define BBERG_NO_PROFILE [[clang::xray_never_instrument]] #else #define BBERG_PROFILE +#define BBERG_NO_PROFILE #endif \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp b/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp index 47e03b5ea85..d3a1afac509 100644 --- a/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp +++ b/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp @@ -52,7 +52,7 @@ class ThreadPool { std::condition_variable complete_condition_; bool stop = false; - BBERG_NO_INSTRUMENT void worker_loop(size_t thread_index); + BBERG_NO_PROFILE void worker_loop(size_t thread_index); void do_iterations() { diff --git a/barretenberg/cpp/src/barretenberg/common/thread.hpp b/barretenberg/cpp/src/barretenberg/common/thread.hpp index 45f11037c7a..96e3df74092 100644 --- a/barretenberg/cpp/src/barretenberg/common/thread.hpp +++ b/barretenberg/cpp/src/barretenberg/common/thread.hpp @@ -23,23 +23,3 @@ inline size_t get_num_cpus_pow2() } void parallel_for(size_t num_iterations, const std::function& func); - -/** - * A modified parallel_for optimized for work being done in batches. - * This is more appropriate for work with small granularity, to avoid thread caching issues and overhead. - */ -inline void parallel_for_batched(size_t num_iterations, auto&& func) -{ - size_t num_threads = get_num_cpus_pow2(); - size_t batch_size = (num_iterations + num_threads - 1) / num_threads; // round up division - // We will use parallel_for to dispatch the batches - parallel_for(num_threads, [&](size_t thread_idx) { - // Calculate start and end for this batch - size_t start = thread_idx * batch_size; - size_t end = std::min(start + batch_size, num_iterations); - - for (size_t i = start; i < end; ++i) { - func(i); - } - }); -} \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index b60f5dd85be..727575b7d56 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -18,6 +18,12 @@ namespace barretenberg { /** * Constructors / Destructors **/ + +/** + * @brief Initialize a Polynomial to size 'initial_size', zeroing memory. + * + * @param initial_size The initial size of the polynomial. + */ template Polynomial::Polynomial(size_t initial_size) : coefficients_(nullptr) @@ -29,6 +35,13 @@ Polynomial::Polynomial(size_t initial_size) memset(static_cast(coefficients_.get()), 0, sizeof(Fr) * capacity()); } +/** + * @brief Initialize a Polynomial to size 'initial_size'. + * Important: This does NOT zero memory. + * + * @param initial_size The initial size of the polynomial. + * @param flag Signals that we do not zero memory. + */ template Polynomial::Polynomial(size_t initial_size, DontZeroMemory flag) : coefficients_(nullptr) @@ -456,18 +469,16 @@ template Polynomial Polynomial::partial_evaluate_mle(std:: } // Evaluate m-1 variables X_{n-l-1}, ..., X_{n-2} at m-1 remaining values u_0,...,u_{m-2}) for (size_t l = 1; l < m; ++l) { - size_t new_n_l = 1 << (n - l - 1); - Fr new_u_l = evaluation_points[m - l - 1]; - for (size_t i = 0; i < new_n_l; i++) { - // Iterate on increasingly small portions of intermediate results. - intermediate[i] += new_u_l * (intermediate[i + new_n_l] - intermediate[i]); + n_l = 1 << (n - l - 1); + u_l = evaluation_points[m - l - 1]; + for (size_t i = 0; i < n_l; ++i) { + intermediate[i] += u_l * (intermediate[i + n_l] - intermediate[i]); } } - size_t final_n_l = 1 << (n - m); // Construct resulting polynomial g(X_0,…,X_{n-m-1})) = p(X_0,…,X_{n-m-1},u_0,...u_{m-1}) from buffer - Polynomial result(final_n_l, DontZeroMemory::FLAG); - for (size_t idx = 0; idx < final_n_l; ++idx) { + Polynomial result(n_l, DontZeroMemory::FLAG); + for (size_t idx = 0; idx < n_l; ++idx) { result[idx] = intermediate[idx]; }