From ca523b556597bf52512d51eacce49c991217f782 Mon Sep 17 00:00:00 2001 From: lucasxia01 Date: Thu, 14 Sep 2023 22:28:17 +0000 Subject: [PATCH 1/4] created thread_utils file, calc_num_threads function added parallelization update to polynomials --- .../src/barretenberg/common/thread_utils.cpp | 20 ++++++ .../src/barretenberg/common/thread_utils.hpp | 15 ++++ .../barretenberg/polynomials/polynomial.cpp | 72 ++++++++++++------- .../barretenberg/polynomials/polynomial.hpp | 2 +- 4 files changed, 83 insertions(+), 26 deletions(-) create mode 100644 barretenberg/cpp/src/barretenberg/common/thread_utils.cpp create mode 100644 barretenberg/cpp/src/barretenberg/common/thread_utils.hpp diff --git a/barretenberg/cpp/src/barretenberg/common/thread_utils.cpp b/barretenberg/cpp/src/barretenberg/common/thread_utils.cpp new file mode 100644 index 00000000000..56d43656924 --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/common/thread_utils.cpp @@ -0,0 +1,20 @@ +#include "thread_utils.hpp" + +/** + * @brief calculates number of threads to create based on minimum iterations per thread + * @details Finds the number of cpus with get_num_cpus(), and calculates `desired_num_threads` + * Returns the min of `desired_num_threads` and `max_num_theads`. + * Note that it will not calculate a power of 2 necessarily, use `calc_num_threads_pow2` instead + * + * @param num_iterations + * @param min_iterations_per_thread + * @return size_t + */ +size_t calc_num_threads(size_t num_iterations, size_t min_iterations_per_thread) +{ + size_t max_num_threads = get_num_cpus(); // number of available threads + size_t desired_num_threads = num_iterations / min_iterations_per_thread; + size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified + num_threads = num_threads > 0 ? num_threads : 1; + return num_threads; +} \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/common/thread_utils.hpp b/barretenberg/cpp/src/barretenberg/common/thread_utils.hpp new file mode 100644 index 00000000000..2db092936d1 --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/common/thread_utils.hpp @@ -0,0 +1,15 @@ +#include "thread.hpp" + +const size_t MIN_ITERS_PER_THREAD = 1 << 4; + +/** + * @brief calculates number of threads to create based on minimum iterations per thread + * @details Finds the number of cpus with get_num_cpus(), and calculates `desired_num_threads` + * Returns the min of `desired_num_threads` and `max_num_theads`. + * Note that it will not calculate a power of 2 necessarily, use `calc_num_threads_pow2` instead + * + * @param num_iterations + * @param min_iterations_per_thread + * @return size_t + */ +size_t calc_num_threads(size_t num_iterations, size_t min_iterations_per_thread); \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index 47d57d8b986..f7f70706be8 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -1,7 +1,3 @@ -#include "polynomial.hpp" -#include "barretenberg/common/assert.hpp" -#include "barretenberg/common/slab_allocator.hpp" -#include "polynomial_arithmetic.hpp" #include #include #include @@ -11,6 +7,13 @@ #include #include +#include "barretenberg/common/assert.hpp" +#include "barretenberg/common/slab_allocator.hpp" +#include "barretenberg/common/thread.hpp" +#include "barretenberg/common/thread_utils.hpp" +#include "polynomial.hpp" +#include "polynomial_arithmetic.hpp" + namespace barretenberg { /** * Constructors / Destructors @@ -272,12 +275,18 @@ template void Polynomial::add_scaled(std::span other const size_t other_size = other.size(); ASSERT(in_place_operation_viable(other_size)); - /** TODO parallelize using some kind of generic evaluation domain - * we really only need to know the thread size, but we don't need all the FFT roots + /** Calculates number of threads with calc_num_threads + * Possible improvements: standardize this parallelization code */ - for (size_t i = 0; i < other_size; ++i) { - coefficients_.get()[i] += scaling_factor * other[i]; - } + size_t num_threads = calc_num_threads(other_size, MIN_ITERS_PER_THREAD); + size_t range_per_thread = other_size / num_threads; + size_t leftovers = other_size - (range_per_thread * num_threads); + parallel_for(num_threads, [&](size_t j) { + size_t offset = j * range_per_thread; + size_t end = (j == num_threads - 1) ? offset + range_per_thread + leftovers : offset + range_per_thread; + for (size_t i = offset; i < end; ++i) + coefficients_.get()[i] += scaling_factor * other[i]; + }); } template Polynomial& Polynomial::operator+=(std::span other) @@ -285,12 +294,15 @@ template Polynomial& Polynomial::operator+=(std::span Polynomial& Polynomial::operator-=(std::span Polynomial& Polynomial::operator*=(const Fr scaling_facor) +template Polynomial& Polynomial::operator*=(const Fr scaling_factor) { ASSERT(in_place_operation_viable()); - for (size_t i = 0; i < size_; ++i) { - coefficients_.get()[i] *= scaling_facor; - } + size_t num_threads = calc_num_threads(size_, MIN_ITERS_PER_THREAD); + size_t range_per_thread = size_ / num_threads; + size_t leftovers = size_ - (range_per_thread * num_threads); + parallel_for(num_threads, [&](size_t j) { + size_t offset = j * range_per_thread; + size_t end = (j == num_threads - 1) ? offset + range_per_thread + leftovers : offset + range_per_thread; + for (size_t i = offset; i < end; ++i) + coefficients_.get()[i] *= scaling_factor; + }); + return *this; } diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp index 7b9be45c8e8..e1bc535842a 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp @@ -176,7 +176,7 @@ template class Polynomial { * * @param scaling_factor s */ - Polynomial& operator*=(const Fr scaling_facor); + Polynomial& operator*=(const Fr scaling_factor); /** * @brief evaluates p(X) = ∑ᵢ aᵢ⋅Xⁱ considered as multi-linear extension p(X₀,…,Xₘ₋₁) = ∑ᵢ aᵢ⋅Lᵢ(X₀,…,Xₘ₋₁) From 952369559f912139dfa9314f2f29c4e20ec60bc5 Mon Sep 17 00:00:00 2001 From: lucasxia01 Date: Fri, 15 Sep 2023 19:27:04 +0000 Subject: [PATCH 2/4] added namespace added calculate_num_threads_pow2 function small other fixes --- .../src/barretenberg/common/thread_utils.cpp | 28 ++++++++++++-- .../src/barretenberg/common/thread_utils.hpp | 13 +++++-- .../barretenberg/polynomials/polynomial.cpp | 37 ++++++++++--------- 3 files changed, 53 insertions(+), 25 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/common/thread_utils.cpp b/barretenberg/cpp/src/barretenberg/common/thread_utils.cpp index 56d43656924..9c58b41e768 100644 --- a/barretenberg/cpp/src/barretenberg/common/thread_utils.cpp +++ b/barretenberg/cpp/src/barretenberg/common/thread_utils.cpp @@ -1,20 +1,40 @@ #include "thread_utils.hpp" +namespace barretenberg::thread_utils { /** * @brief calculates number of threads to create based on minimum iterations per thread * @details Finds the number of cpus with get_num_cpus(), and calculates `desired_num_threads` - * Returns the min of `desired_num_threads` and `max_num_theads`. - * Note that it will not calculate a power of 2 necessarily, use `calc_num_threads_pow2` instead + * Returns the min of `desired_num_threads` and `max_num_threads`. + * Note that it will not calculate a power of 2 necessarily, use `calculate_num_threads_pow2` instead * * @param num_iterations * @param min_iterations_per_thread * @return size_t */ -size_t calc_num_threads(size_t num_iterations, size_t min_iterations_per_thread) +size_t calculate_num_threads(size_t num_iterations, size_t min_iterations_per_thread) { size_t max_num_threads = get_num_cpus(); // number of available threads size_t desired_num_threads = num_iterations / min_iterations_per_thread; size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified num_threads = num_threads > 0 ? num_threads : 1; return num_threads; -} \ No newline at end of file +} + +/** + * @brief calculates number of threads to create based on minimum iterations per thread, guaranteed power of 2 + * @details Same functionality as `calculate_num_threads` but guaranteed power of 2 + * @param num_iterations + * @param min_iterations_per_thread + * @return size_t + */ +size_t calculate_num_threads_pow2(size_t num_iterations, size_t min_iterations_per_thread) +{ + size_t max_num_threads = get_num_cpus_pow2(); // number of available threads (power of 2) + size_t desired_num_threads = num_iterations / min_iterations_per_thread; + desired_num_threads = static_cast(1ULL << numeric::get_msb(desired_num_threads)); + size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified + num_threads = num_threads > 0 ? num_threads : 1; + return num_threads; +} + +} // namespace barretenberg::thread_utils \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/common/thread_utils.hpp b/barretenberg/cpp/src/barretenberg/common/thread_utils.hpp index 2db092936d1..4f91a4a25f8 100644 --- a/barretenberg/cpp/src/barretenberg/common/thread_utils.hpp +++ b/barretenberg/cpp/src/barretenberg/common/thread_utils.hpp @@ -1,15 +1,22 @@ #include "thread.hpp" -const size_t MIN_ITERS_PER_THREAD = 1 << 4; +namespace barretenberg::thread_utils { + +const size_t DEFAULT_MIN_ITERS_PER_THREAD = 1 << 4; /** * @brief calculates number of threads to create based on minimum iterations per thread * @details Finds the number of cpus with get_num_cpus(), and calculates `desired_num_threads` * Returns the min of `desired_num_threads` and `max_num_theads`. - * Note that it will not calculate a power of 2 necessarily, use `calc_num_threads_pow2` instead + * Note that it will not calculate a power of 2 necessarily, use `calculate_num_threads_pow2` instead * * @param num_iterations * @param min_iterations_per_thread * @return size_t */ -size_t calc_num_threads(size_t num_iterations, size_t min_iterations_per_thread); \ No newline at end of file +size_t calculate_num_threads(size_t num_iterations, size_t min_iterations_per_thread = DEFAULT_MIN_ITERS_PER_THREAD); + +size_t calculate_num_threads_pow2(size_t num_iterations, + size_t min_iterations_per_thread = DEFAULT_MIN_ITERS_PER_THREAD); + +} // namespace barretenberg::thread_utils \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index f7f70706be8..19d090b278a 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -1,3 +1,9 @@ +#include "polynomial.hpp" +#include "barretenberg/common/assert.hpp" +#include "barretenberg/common/slab_allocator.hpp" +#include "barretenberg/common/thread.hpp" +#include "barretenberg/common/thread_utils.hpp" +#include "polynomial_arithmetic.hpp" #include #include #include @@ -7,13 +13,6 @@ #include #include -#include "barretenberg/common/assert.hpp" -#include "barretenberg/common/slab_allocator.hpp" -#include "barretenberg/common/thread.hpp" -#include "barretenberg/common/thread_utils.hpp" -#include "polynomial.hpp" -#include "polynomial_arithmetic.hpp" - namespace barretenberg { /** * Constructors / Destructors @@ -275,17 +274,16 @@ template void Polynomial::add_scaled(std::span other const size_t other_size = other.size(); ASSERT(in_place_operation_viable(other_size)); - /** Calculates number of threads with calc_num_threads - * Possible improvements: standardize this parallelization code - */ - size_t num_threads = calc_num_threads(other_size, MIN_ITERS_PER_THREAD); + // Calculates number of threads with thread_utils::calculate_num_threads + size_t num_threads = thread_utils::calculate_num_threads(other_size); size_t range_per_thread = other_size / num_threads; size_t leftovers = other_size - (range_per_thread * num_threads); parallel_for(num_threads, [&](size_t j) { size_t offset = j * range_per_thread; size_t end = (j == num_threads - 1) ? offset + range_per_thread + leftovers : offset + range_per_thread; - for (size_t i = offset; i < end; ++i) + for (size_t i = offset; i < end; ++i) { coefficients_.get()[i] += scaling_factor * other[i]; + } }); } @@ -294,14 +292,15 @@ template Polynomial& Polynomial::operator+=(std::span Polynomial& Polynomial::operator-=(std::span Polynomial& Polynomial::operator*=(const Fr scali { ASSERT(in_place_operation_viable()); - size_t num_threads = calc_num_threads(size_, MIN_ITERS_PER_THREAD); + size_t num_threads = thread_utils::calculate_num_threads(size_); size_t range_per_thread = size_ / num_threads; size_t leftovers = size_ - (range_per_thread * num_threads); parallel_for(num_threads, [&](size_t j) { size_t offset = j * range_per_thread; size_t end = (j == num_threads - 1) ? offset + range_per_thread + leftovers : offset + range_per_thread; - for (size_t i = offset; i < end; ++i) + for (size_t i = offset; i < end; ++i) { coefficients_.get()[i] *= scaling_factor; + } }); return *this; From 13219e2de11861cc50d8acdae8157a443bab4679 Mon Sep 17 00:00:00 2001 From: lucasxia01 Date: Fri, 15 Sep 2023 20:28:27 +0000 Subject: [PATCH 3/4] replaced sumcheck duplicated code with calculate_num_threads_pow2 --- .../cpp/src/barretenberg/common/thread_utils.cpp | 4 ++-- .../src/barretenberg/honk/sumcheck/sumcheck_round.hpp | 9 +++------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/common/thread_utils.cpp b/barretenberg/cpp/src/barretenberg/common/thread_utils.cpp index 9c58b41e768..69549bbd7f0 100644 --- a/barretenberg/cpp/src/barretenberg/common/thread_utils.cpp +++ b/barretenberg/cpp/src/barretenberg/common/thread_utils.cpp @@ -16,7 +16,7 @@ size_t calculate_num_threads(size_t num_iterations, size_t min_iterations_per_th size_t max_num_threads = get_num_cpus(); // number of available threads size_t desired_num_threads = num_iterations / min_iterations_per_thread; size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified - num_threads = num_threads > 0 ? num_threads : 1; + num_threads = num_threads > 0 ? num_threads : 1; // ensure num_threads is at least 1 return num_threads; } @@ -33,7 +33,7 @@ size_t calculate_num_threads_pow2(size_t num_iterations, size_t min_iterations_p size_t desired_num_threads = num_iterations / min_iterations_per_thread; desired_num_threads = static_cast(1ULL << numeric::get_msb(desired_num_threads)); size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified - num_threads = num_threads > 0 ? num_threads : 1; + num_threads = num_threads > 0 ? num_threads : 1; // ensure num_threads is at least 1 return num_threads; } diff --git a/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp b/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp index 1847771a49c..8cc4de52d41 100644 --- a/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp +++ b/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp @@ -1,6 +1,7 @@ #pragma once #include "barretenberg/common/log.hpp" #include "barretenberg/common/thread.hpp" +#include "barretenberg/common/thread_utils.hpp" #include "barretenberg/polynomials/barycentric.hpp" #include "barretenberg/polynomials/pow.hpp" #include "barretenberg/proof_system/flavor/flavor.hpp" @@ -140,12 +141,8 @@ template class SumcheckProverRound { // Note: Multithreading is "on" for every round but we reduce the number of threads from the max available based // on a specified minimum number of iterations per thread. This eventually leads to the use of a single thread. // For now we use a power of 2 number of threads simply to ensure the round size is evenly divided. - size_t max_num_threads = get_num_cpus_pow2(); // number of available threads (power of 2) - size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread - size_t desired_num_threads = round_size / min_iterations_per_thread; - size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified - num_threads = num_threads > 0 ? num_threads : 1; // ensure num threads is >= 1 - size_t iterations_per_thread = round_size / num_threads; // actual iterations per thread + size_t num_threads = barretenberg::thread_utils::calculate_num_threads_pow2(round_size); + size_t iterations_per_thread = round_size / num_threads; // actual iterations per thread // Constuct univariate accumulator containers; one per thread std::vector thread_univariate_accumulators(num_threads); From c9e3da33b9eae00a55e29a89020df7e48f8bac58 Mon Sep 17 00:00:00 2001 From: lucasxia01 Date: Thu, 21 Sep 2023 13:55:38 +0000 Subject: [PATCH 4/4] small fixes --- barretenberg/cpp/src/barretenberg/common/thread_utils.hpp | 7 +++++++ .../cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/barretenberg/cpp/src/barretenberg/common/thread_utils.hpp b/barretenberg/cpp/src/barretenberg/common/thread_utils.hpp index 4f91a4a25f8..55ee79ff1ab 100644 --- a/barretenberg/cpp/src/barretenberg/common/thread_utils.hpp +++ b/barretenberg/cpp/src/barretenberg/common/thread_utils.hpp @@ -16,6 +16,13 @@ const size_t DEFAULT_MIN_ITERS_PER_THREAD = 1 << 4; */ size_t calculate_num_threads(size_t num_iterations, size_t min_iterations_per_thread = DEFAULT_MIN_ITERS_PER_THREAD); +/** + * @brief calculates number of threads to create based on minimum iterations per thread, guaranteed power of 2 + * @details Same functionality as `calculate_num_threads` but guaranteed power of 2 + * @param num_iterations + * @param min_iterations_per_thread + * @return size_t + */ size_t calculate_num_threads_pow2(size_t num_iterations, size_t min_iterations_per_thread = DEFAULT_MIN_ITERS_PER_THREAD); diff --git a/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp b/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp index 8cc4de52d41..89c97a8b665 100644 --- a/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp +++ b/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp @@ -141,7 +141,9 @@ template class SumcheckProverRound { // Note: Multithreading is "on" for every round but we reduce the number of threads from the max available based // on a specified minimum number of iterations per thread. This eventually leads to the use of a single thread. // For now we use a power of 2 number of threads simply to ensure the round size is evenly divided. - size_t num_threads = barretenberg::thread_utils::calculate_num_threads_pow2(round_size); + size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread + size_t num_threads = + barretenberg::thread_utils::calculate_num_threads_pow2(round_size, min_iterations_per_thread); size_t iterations_per_thread = round_size / num_threads; // actual iterations per thread // Constuct univariate accumulator containers; one per thread