feat: structured commit (#9027)

Adds two new methods `commit_structured` and `commit_structured_with_nonzero_complement` designed to commit to wires and the permutation grand product, respectively. The first handles polynomials with islands of non-zero values by simply copying the nonzero inputs into contiguous memory using the known endpoints then using the conventional `commit` method. The second assumes blocks of arbitrary values interspersed with blocks of constant values (with the constant differing between blocks), i.e. the form of z_perm in the structured trace setting. This method uses `commit_structured` to compute the contribution from the non-constant regions. The constant region contribution is computed by first summing all points sharing a scalar using batched affine addition (implemented in new class `BatchedAfffineAddition`), then performing the MSM on the reduced result with one mul per constant scalar. Note: The core affine addition logic used herein was adapted from my earlier work on the `MsmSorter` which had additional logic for sorting polynomials to arrange them in sequences to be added (but was not multithreaded). There turns out not to be a use case for this, at least for now. I've created an issue to either refactor that method to use the new and improved logic in `BatchedAfffineAddition` or to simply delete it. The relevant before and after number for ClientIvc (total savings ~1.7s): ``` ClientIVCBench/Full/6 33537 ms COMMIT::wires(t) 2217 43.65% COMMIT::z_perm(t) 2304 45.36% ``` ``` ClientIVCBench/Full/6 31802 ms COMMIT::wires(t) 1720 51.07% COMMIT::z_perm(t) 1090 32.37% ```
AztecProtocol · Oct 11, 2024 · 26f406b · 26f406b
1 parent 4c1163a
commit 26f406b
Show file tree

Hide file tree

Showing 13 changed files with 860 additions and 17 deletions.
diff --git a/barretenberg/cpp/src/barretenberg/commitment_schemes/commit.bench.cpp b/barretenberg/cpp/src/barretenberg/commitment_schemes/commit.bench.cpp
@@ -1,5 +1,6 @@
 
 #include "barretenberg/commitment_schemes/commitment_key.hpp"
+#include "barretenberg/ecc//batched_affine_addition/batched_affine_addition.hpp"
 #include "barretenberg/polynomials/polynomial.hpp"
 #include "barretenberg/srs/factories/mem_bn254_crs_factory.hpp"
 #include <benchmark/benchmark.h>
@@ -27,6 +28,59 @@ template <typename FF> Polynomial<FF> sparse_random_poly(const size_t size, cons
     return polynomial;
 }
 
+template <typename FF> struct PolyData {
+    Polynomial<FF> polynomial;
+    std::vector<std::pair<size_t, size_t>> active_range_endpoints;
+};
+
+// Generate a polynomial with random coefficients organized in isolated blocks. (Mimics the wire polynomials
+// in the structured trace setting, or z_perm if non_zero_complement is set to true).
+template <typename FF> PolyData<FF> structured_random_poly(bool non_zero_complement = false)
+{
+    // An arbitrary but realistic test case taken from the actual structure of a wire in the client_ivc bench
+    std::vector<uint32_t> fixed_sizes = {
+        1 << 10, 1 << 7, 201000, 90000, 9000, 137000, 72000, 1 << 7, 2500, 11500,
+    };
+    std::vector<uint32_t> actual_sizes = {
+        10, 16, 48873, 18209, 4132, 23556, 35443, 3, 2, 2,
+    };
+
+    uint32_t full_size = 0;
+    for (auto size : fixed_sizes) {
+        full_size += size;
+    }
+
+    // In practice the polynomials will have a power-of-2 size
+    auto log2_n = static_cast<size_t>(numeric::get_msb(full_size));
+    if ((1UL << log2_n) != (full_size)) {
+        ++log2_n;
+    }
+    full_size = 1 << log2_n;
+
+    // Construct a polynomial with the prescribed structure; track the "active" regions
+    auto polynomial = Polynomial<FF>(full_size);
+    uint32_t start_idx = 0;
+    uint32_t end_idx = 0;
+    std::vector<std::pair<size_t, size_t>> active_range_endpoints;
+    for (auto [block_size, actual_size] : zip_view(fixed_sizes, actual_sizes)) {
+        end_idx = start_idx + actual_size;
+        for (size_t i = start_idx; i < end_idx; ++i) {
+            polynomial.at(i) = FF::random_element();
+        }
+        active_range_endpoints.emplace_back(start_idx, end_idx);
+        start_idx += block_size;
+        // If indicated, populate the active region complement with a random constant (mimicking z_perm)
+        if (non_zero_complement) {
+            FF const_random_coeff = FF::random_element();
+            for (size_t i = end_idx; i < start_idx; ++i) {
+                polynomial.at(i) = const_random_coeff;
+            }
+        }
+    }
+
+    return { polynomial, active_range_endpoints };
+}
+
 constexpr size_t MIN_LOG_NUM_POINTS = 16;
 constexpr size_t MAX_LOG_NUM_POINTS = 20;
 constexpr size_t MAX_NUM_POINTS = 1 << MAX_LOG_NUM_POINTS;
@@ -126,6 +180,7 @@ template <typename Curve> void bench_commit_random(::benchmark::State& state)
         key->commit(polynomial);
     }
 }
+
 // Commit to a polynomial with dense random nonzero entries but NOT our happiest case of an exact power of 2
 // Note this used to be a 50% regression just subtracting a power of 2 by 1.
 template <typename Curve> void bench_commit_random_non_power_of_2(::benchmark::State& state)
@@ -139,6 +194,59 @@ template <typename Curve> void bench_commit_random_non_power_of_2(::benchmark::S
         key->commit(polynomial);
     }
 }
+
+// Commit to a polynomial with block structured random entries using the basic commit method
+template <typename Curve> void bench_commit_structured_random_poly(::benchmark::State& state)
+{
+    using Fr = typename Curve::ScalarField;
+    auto key = create_commitment_key<Curve>(MAX_NUM_POINTS);
+
+    auto [polynomial, active_range_endpoints] = structured_random_poly<Fr>();
+
+    for (auto _ : state) {
+        key->commit(polynomial);
+    }
+}
+
+// Commit to a polynomial with block structured random entries using commit_structured
+template <typename Curve> void bench_commit_structured_random_poly_preprocessed(::benchmark::State& state)
+{
+    using Fr = typename Curve::ScalarField;
+    auto key = create_commitment_key<Curve>(MAX_NUM_POINTS);
+
+    auto [polynomial, active_range_endpoints] = structured_random_poly<Fr>();
+
+    for (auto _ : state) {
+        key->commit_structured(polynomial, active_range_endpoints);
+    }
+}
+
+// Commit to a polynomial with block structured random entries and constant valued complement
+template <typename Curve> void bench_commit_mock_z_perm(::benchmark::State& state)
+{
+    using Fr = typename Curve::ScalarField;
+    auto key = create_commitment_key<Curve>(MAX_NUM_POINTS);
+
+    auto [polynomial, active_range_endpoints] = structured_random_poly<Fr>(/*non_zero_complement=*/true);
+
+    for (auto _ : state) {
+        key->commit(polynomial);
+    }
+}
+
+// Commit to a polynomial with block structured random entries and constant valued complement using tailored method
+template <typename Curve> void bench_commit_mock_z_perm_preprocessed(::benchmark::State& state)
+{
+    using Fr = typename Curve::ScalarField;
+    auto key = create_commitment_key<Curve>(MAX_NUM_POINTS);
+
+    auto [polynomial, active_range_endpoints] = structured_random_poly<Fr>(/*non_zero_complement=*/true);
+
+    for (auto _ : state) {
+        key->commit_structured_with_nonzero_complement(polynomial, active_range_endpoints);
+    }
+}
+
 BENCHMARK(bench_commit_zero<curve::BN254>)
     ->DenseRange(MIN_LOG_NUM_POINTS, MAX_LOG_NUM_POINTS)
     ->Unit(benchmark::kMillisecond);
@@ -160,6 +268,10 @@ BENCHMARK(bench_commit_random<curve::BN254>)
 BENCHMARK(bench_commit_random_non_power_of_2<curve::BN254>)
     ->DenseRange(MIN_LOG_NUM_POINTS, MAX_LOG_NUM_POINTS)
     ->Unit(benchmark::kMillisecond);
+BENCHMARK(bench_commit_structured_random_poly<curve::BN254>)->Unit(benchmark::kMillisecond);
+BENCHMARK(bench_commit_structured_random_poly_preprocessed<curve::BN254>)->Unit(benchmark::kMillisecond);
+BENCHMARK(bench_commit_mock_z_perm<curve::BN254>)->Unit(benchmark::kMillisecond);
+BENCHMARK(bench_commit_mock_z_perm_preprocessed<curve::BN254>)->Unit(benchmark::kMillisecond);
 
 } // namespace bb
 

diff --git a/barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp b/barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp
@@ -9,7 +9,9 @@
 
 #include "barretenberg/common/debug_log.hpp"
 #include "barretenberg/common/op_count.hpp"
+#include "barretenberg/ecc/batched_affine_addition/batched_affine_addition.hpp"
 #include "barretenberg/ecc/scalar_multiplication/scalar_multiplication.hpp"
+#include "barretenberg/ecc/scalar_multiplication/sorted_msm.hpp"
 #include "barretenberg/numeric/bitop/get_msb.hpp"
 #include "barretenberg/numeric/bitop/pow.hpp"
 #include "barretenberg/polynomials/polynomial.hpp"
@@ -164,7 +166,7 @@ template <class Curve> class CommitmentKey {
         std::vector<Fr> scalars;
         std::vector<G1> points;
         scalars.reserve(num_nonzero_scalars);
-        points.reserve(num_nonzero_scalars);
+        points.reserve(2 * num_nonzero_scalars); //  2x accounts for endomorphism points
         for (size_t idx = 0; idx < num_threads; ++idx) {
             scalars.insert(scalars.end(), thread_scalars[idx].begin(), thread_scalars[idx].end());
             points.insert(points.end(), thread_points[idx].begin(), thread_points[idx].end());
@@ -173,6 +175,148 @@ template <class Curve> class CommitmentKey {
         // Call the version of pippenger which assumes all points are distinct
         return scalar_multiplication::pippenger_unsafe<Curve>(scalars, points, pippenger_runtime_state);
     }
+
+    /**
+     * @brief Efficiently commit to a polynomial whose nonzero elements are arranged in discrete blocks
+     * @details Given a set of ranges where the polynomial takes non-zero values, copy the non-zero inputs (scalars,
+     * points) into contiguous memory and commit to them using the normal pippenger algorithm. Defaults to the
+     * conventional commit method if the number of non-zero entries is beyond a threshold relative to the full
+     * polynomial size.
+     * @note The wire polynomials have the described form when a structured execution trace is in use.
+     * @warning Method makes a copy of all {point, scalar} pairs that comprise the reduced input. May not be efficient
+     * in terms of memory or computation for polynomials beyond a certain sparseness threshold.
+     *
+     * @param polynomial
+     * @param active_ranges
+     * @return Commitment
+     */
+    Commitment commit_structured(PolynomialSpan<const Fr> polynomial,
+                                 const std::vector<std::pair<size_t, size_t>>& active_ranges)
+    {
+        BB_OP_COUNT_TIME();
+        ASSERT(polynomial.end_index() <= srs->get_monomial_size());
+
+        // Percentage of nonzero coefficients beyond which we resort to the conventional commit method
+        constexpr size_t NONZERO_THRESHOLD = 75;
+
+        size_t total_num_scalars = 0;
+        for (const auto& range : active_ranges) {
+            total_num_scalars += range.second - range.first;
+        }
+
+        // Compute "active" percentage of polynomial; resort to standard commit if appropriate
+        size_t percentage_nonzero = total_num_scalars * 100 / polynomial.size();
+        if (percentage_nonzero > NONZERO_THRESHOLD) {
+            return commit(polynomial);
+        }
+
+        // Extract the precomputed point table (contains raw SRS points at even indices and the corresponding
+        // endomorphism point (\beta*x, -y) at odd indices).
+        std::span<G1> point_table = srs->get_monomial_points();
+
+        std::vector<Fr> scalars;
+        scalars.reserve(total_num_scalars);
+        for (const auto& range : active_ranges) {
+            auto start = &polynomial[range.first];
+            auto end = &polynomial[range.second];
+            scalars.insert(scalars.end(), start, end);
+        }
+        std::vector<G1> points;
+        points.reserve(total_num_scalars * 2);
+        for (const auto& range : active_ranges) {
+            auto start = &point_table[2 * range.first];
+            auto end = &point_table[2 * range.second];
+            points.insert(points.end(), start, end);
+        }
+
+        // Call pippenger
+        return scalar_multiplication::pippenger_unsafe<Curve>(scalars, points, pippenger_runtime_state);
+    }
+
+    /**
+     * @brief Efficiently commit to a polynomial with discrete blocks of arbitrary elements and constant elements
+     * @details Similar to method commit_structured() except the complement to the "active" region cantains non-zero
+     * constant values (which are assumed to differ between blocks). This is exactly the structure of the permutation
+     * grand product polynomial z_perm when a structured execution trace is in use.
+     * @warning Requires a copy of all {point, scalar} pairs (including endo points) corresponding to the primary blocks
+     * and a copy of all of the points (without endo points) corresponding to their complement.
+     *
+     * @param polynomial
+     * @param active_ranges
+     * @return Commitment
+     */
+    Commitment commit_structured_with_nonzero_complement(PolynomialSpan<const Fr> polynomial,
+                                                         const std::vector<std::pair<size_t, size_t>>& active_ranges)
+    {
+        BB_OP_COUNT_TIME();
+        ASSERT(polynomial.end_index() <= srs->get_monomial_size());
+
+        using BatchedAddition = BatchedAffineAddition<Curve>;
+
+        // Percentage of constant coefficients below which we resort to the conventional commit method
+        constexpr size_t CONSTANT_THRESHOLD = 50;
+
+        // Compute the active range complement over which the polynomial is assumed to be constant within each range
+        std::vector<std::pair<size_t, size_t>> active_ranges_complement;
+        for (size_t i = 0; i < active_ranges.size() - 1; ++i) {
+            const size_t start = active_ranges[i].second;
+            const size_t end = active_ranges[i + 1].first;
+            active_ranges_complement.emplace_back(start, end);
+        }
+        // Final complement range goes from end of last active range to the end of the polynomial
+        active_ranges_complement.emplace_back(active_ranges.back().second, polynomial.end_index());
+
+        // Compute the total number of scalars in the constant regions
+        size_t total_num_complement_scalars = 0;
+        for (const auto& range : active_ranges_complement) {
+            total_num_complement_scalars += range.second - range.first;
+        }
+
+        // Compute percentage of polynomial comprised of constant blocks; resort to standard commit if appropriate
+        size_t percentage_constant = total_num_complement_scalars * 100 / polynomial.size();
+        if (percentage_constant < CONSTANT_THRESHOLD) {
+            return commit(polynomial);
+        }
+
+        // Extract the precomputed point table (contains raw SRS points at even indices and the corresponding
+        // endomorphism point (\beta*x, -y) at odd indices).
+        std::span<G1> point_table = srs->get_monomial_points();
+
+        // Copy the raw SRS points (no endo points) corresponding to the constant regions into contiguous memory
+        // TODO(https://github.com/AztecProtocol/barretenberg/issues/1131): Peak memory usage could be improved by
+        // performing this copy and the subsequent summation as a precomputation prior to constructing the point table.
+        std::vector<G1> points;
+        points.reserve(2 * total_num_complement_scalars);
+        for (const auto& range : active_ranges_complement) {
+            const size_t start = 2 * range.first;
+            const size_t end = 2 * range.second;
+            for (size_t i = start; i < end; i += 2) {
+                points.emplace_back(point_table[i]);
+            }
+        }
+
+        // Populate the set of unique scalars with first coeff from each range (values assumed constant over each
+        // range). Also store the number of points in each sequence to be summed
+        std::vector<Fr> unique_scalars;
+        std::vector<size_t> sequence_counts;
+        for (const auto& range : active_ranges_complement) {
+            if (range.second - range.first > 0) { // only ranges with nonzero length
+                unique_scalars.emplace_back(polynomial.span[range.first]);
+                sequence_counts.emplace_back(range.second - range.first);
+            }
+        }
+
+        // Reduce each sequence to a single point
+        auto reduced_points = BatchedAddition::add_in_place(points, sequence_counts);
+
+        // Compute the full commitment as the sum of the "active" region commitment and the constant region contribution
+        Commitment result = commit_structured(polynomial, active_ranges);
+        for (auto [scalar, point] : zip_view(unique_scalars, reduced_points)) {
+            result = result + point * scalar;
+        }
+
+        return result;
+    }
 };
 
 } // namespace bb