From 427cf594ec9ca4b472ec5d4a249c7b49805c78e2 Mon Sep 17 00:00:00 2001 From: maramihali Date: Thu, 5 Dec 2024 12:15:35 +0000 Subject: [PATCH] chore: parallelise inverse polynomial construction for lookup relations (#10413) Benchmark were showing that oink is the second most expensive round in PG after combiner. On top of that one component where we see discrepancies when increasing the ambient trace size is logderivative inverses construction. A step towards improving this is parallelising the construction of inverse polynomials (which is linear). Also the inverses can be committed to with `commit_sparse` which shows a slight improvement as well. BEFORE ``` CLIENT_IVC_BENCH_STRUCTURE(2^19) ClientIVCBench/Full/6 29146 ms 27299 ms ProtogalaxyProver::prove(t) 16265 58.29% ProtogalaxyProver_::run_oink_prover_on_each_incomplete_key(t) 5624 34.58% EXAMPLE_20(2^20) ClientIVCBench/Full/6 37145 ms 34235 ms ProtogalaxyProver::prove(t) 21283 60.75% ProtogalaxyProver_::run_oink_prover_on_each_incomplete_key(t) 8818 41.43% COMMIT::lookup_inverses(t) 406 9.82% ``` AFTER ``` CLIENT_IVC_BENCH_STRUCTURE(2^19) ClientIVCBench/Full/6 27351 ms 25477 ms ProtogalaxyProver::prove(t) 14627 55.72% ProtogalaxyProver_::run_oink_prover_on_each_incomplete_key(t) 4030 27.55% EXAMPLE_20(2^20) ClientIVCBench/Full/6 33852 ms 30893 ms ProtogalaxyProver::prove(t) 18250 56.97% ProtogalaxyProver_::run_oink_prover_on_each_incomplete_key(t) 5526 30.28% COMMIT::lookup_inverses(t) 301 7.43% ``` --- .../relations/databus_lookup_relation.hpp | 65 +++++++++++-------- .../relations/logderiv_lookup_relation.hpp | 28 +++++--- .../trace_to_polynomials.cpp | 2 +- .../barretenberg/ultra_honk/oink_prover.cpp | 4 +- 4 files changed, 60 insertions(+), 39 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/relations/databus_lookup_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/databus_lookup_relation.hpp index bb29d9bc8a9..3b00b5bff69 100644 --- a/barretenberg/cpp/src/barretenberg/relations/databus_lookup_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/databus_lookup_relation.hpp @@ -3,6 +3,7 @@ #include #include "barretenberg/common/constexpr_utils.hpp" +#include "barretenberg/common/thread.hpp" #include "barretenberg/polynomials/univariate.hpp" #include "barretenberg/relations/relation_types.hpp" @@ -231,32 +232,42 @@ template class DatabusLookupRelationImpl { const size_t circuit_size) { auto& inverse_polynomial = BusData::inverses(polynomials); - bool is_read = false; - bool nonzero_read_count = false; - for (size_t i = 0; i < circuit_size; ++i) { - // Determine if the present row contains a databus operation - auto q_busread = polynomials.q_busread[i]; - if constexpr (bus_idx == 0) { // calldata - is_read = q_busread == 1 && polynomials.q_l[i] == 1; - nonzero_read_count = polynomials.calldata_read_counts[i] > 0; - } - if constexpr (bus_idx == 1) { // secondary_calldata - is_read = q_busread == 1 && polynomials.q_r[i] == 1; - nonzero_read_count = polynomials.secondary_calldata_read_counts[i] > 0; - } - if constexpr (bus_idx == 2) { // return data - is_read = q_busread == 1 && polynomials.q_o[i] == 1; - nonzero_read_count = polynomials.return_data_read_counts[i] > 0; - } - // We only compute the inverse if this row contains a read gate or data that has been read - if (is_read || nonzero_read_count) { - // TODO(https://github.com/AztecProtocol/barretenberg/issues/940): avoid get_row if possible. - auto row = polynomials.get_row(i); // Note: this is a copy. use sparingly! - auto value = compute_read_term(row, relation_parameters) * - compute_write_term(row, relation_parameters); - inverse_polynomial.at(i) = value; + + size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread + size_t num_threads = bb::calculate_num_threads_pow2(circuit_size, min_iterations_per_thread); + size_t iterations_per_thread = circuit_size / num_threads; // actual iterations per thread + + parallel_for(num_threads, [&](size_t thread_idx) { + size_t start = thread_idx * iterations_per_thread; + size_t end = (thread_idx + 1) * iterations_per_thread; + bool is_read = false; + bool nonzero_read_count = false; + for (size_t i = start; i < end; ++i) { + // Determine if the present row contains a databus operation + auto q_busread = polynomials.q_busread[i]; + if constexpr (bus_idx == 0) { // calldata + is_read = q_busread == 1 && polynomials.q_l[i] == 1; + nonzero_read_count = polynomials.calldata_read_counts[i] > 0; + } + if constexpr (bus_idx == 1) { // secondary_calldata + is_read = q_busread == 1 && polynomials.q_r[i] == 1; + nonzero_read_count = polynomials.secondary_calldata_read_counts[i] > 0; + } + if constexpr (bus_idx == 2) { // return data + is_read = q_busread == 1 && polynomials.q_o[i] == 1; + nonzero_read_count = polynomials.return_data_read_counts[i] > 0; + } + // We only compute the inverse if this row contains a read gate or data that has been read + if (is_read || nonzero_read_count) { + // TODO(https://github.com/AztecProtocol/barretenberg/issues/940): avoid get_row if possible. + auto row = polynomials.get_row(i); // Note: this is a copy. use sparingly! + auto value = compute_read_term(row, relation_parameters) * + compute_write_term(row, relation_parameters); + inverse_polynomial.at(i) = value; + } } - } + }); + // Compute inverse polynomial I in place by inverting the product at each row // Note: zeroes are ignored as they are not used anyway FF::batch_invert(inverse_polynomial.coeffs()); @@ -299,8 +310,8 @@ template class DatabusLookupRelationImpl { constexpr size_t subrel_idx_1 = 2 * bus_idx; constexpr size_t subrel_idx_2 = 2 * bus_idx + 1; - // Establish the correctness of the polynomial of inverses I. Note: inverses is computed so that the value is 0 - // if !inverse_exists. Degree 3 (5) + // Establish the correctness of the polynomial of inverses I. Note: inverses is computed so that the value + // is 0 if !inverse_exists. Degree 3 (5) std::get(accumulator) += (read_term * write_term * inverses - inverse_exists) * scaling_factor; // Establish validity of the read. Note: no scaling factor here since this constraint is enforced across the diff --git a/barretenberg/cpp/src/barretenberg/relations/logderiv_lookup_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/logderiv_lookup_relation.hpp index afd1326a91d..686b751903c 100644 --- a/barretenberg/cpp/src/barretenberg/relations/logderiv_lookup_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/logderiv_lookup_relation.hpp @@ -3,6 +3,7 @@ #include #include "barretenberg/common/constexpr_utils.hpp" +#include "barretenberg/common/thread.hpp" #include "barretenberg/honk/proof_system/logderivative_library.hpp" #include "barretenberg/polynomials/univariate.hpp" #include "barretenberg/relations/relation_types.hpp" @@ -157,16 +158,25 @@ template class LogDerivLookupRelationImpl { { auto& inverse_polynomial = get_inverse_polynomial(polynomials); - for (size_t i = 0; i < circuit_size; ++i) { - // We only compute the inverse if this row contains a lookup gate or data that has been looked up - if (polynomials.q_lookup.get(i) == 1 || polynomials.lookup_read_tags.get(i) == 1) { - // TODO(https://github.com/AztecProtocol/barretenberg/issues/940): avoid get_row if possible. - auto row = polynomials.get_row(i); // Note: this is a copy. use sparingly! - auto value = compute_read_term(row, relation_parameters) * - compute_write_term(row, relation_parameters); - inverse_polynomial.at(i) = value; + size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread + size_t num_threads = bb::calculate_num_threads_pow2(circuit_size, min_iterations_per_thread); + size_t iterations_per_thread = circuit_size / num_threads; // actual iterations per thread + + parallel_for(num_threads, [&](size_t thread_idx) { + size_t start = thread_idx * iterations_per_thread; + size_t end = (thread_idx + 1) * iterations_per_thread; + for (size_t i = start; i < end; ++i) { + // We only compute the inverse if this row contains a lookup gate or data that has been looked up + if (polynomials.q_lookup.get(i) == 1 || polynomials.lookup_read_tags.get(i) == 1) { + // TODO(https://github.com/AztecProtocol/barretenberg/issues/940): avoid get_row if possible. + auto row = polynomials.get_row(i); // Note: this is a copy. use sparingly! + auto value = compute_read_term(row, relation_parameters) * + compute_write_term(row, relation_parameters); + inverse_polynomial.at(i) = value; + } } - } + }); + // Compute inverse polynomial I in place by inverting the product at each row FF::batch_invert(inverse_polynomial.coeffs()); }; diff --git a/barretenberg/cpp/src/barretenberg/trace_to_polynomials/trace_to_polynomials.cpp b/barretenberg/cpp/src/barretenberg/trace_to_polynomials/trace_to_polynomials.cpp index f778a03c503..549d42564fd 100644 --- a/barretenberg/cpp/src/barretenberg/trace_to_polynomials/trace_to_polynomials.cpp +++ b/barretenberg/cpp/src/barretenberg/trace_to_polynomials/trace_to_polynomials.cpp @@ -102,7 +102,7 @@ typename TraceToPolynomials::TraceData TraceToPolynomials::const auto block_size = static_cast(block.size()); // Save ranges over which the blocks are "active" for use in structured commitments - if constexpr (IsUltraFlavor) { + if constexpr (IsUltraFlavor) { // Mega and Ultra if (block.size() > 0) { proving_key.active_block_ranges.emplace_back(offset, offset + block.size()); } diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp b/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp index 111497e9175..c19a43f8f98 100644 --- a/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp +++ b/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp @@ -206,8 +206,8 @@ template void OinkProver::execute_log_derivative_ { PROFILE_THIS_NAME("COMMIT::lookup_inverses"); - witness_commitments.lookup_inverses = - proving_key->proving_key.commitment_key->commit(proving_key->proving_key.polynomials.lookup_inverses); + witness_commitments.lookup_inverses = proving_key->proving_key.commitment_key->commit_sparse( + proving_key->proving_key.polynomials.lookup_inverses); } transcript->send_to_verifier(domain_separator + commitment_labels.lookup_inverses, witness_commitments.lookup_inverses);