AztecProtocol · ledwards2225 · Oct 11, 2024 · Oct 3, 2024 · Oct 3, 2024 · Oct 3, 2024
diff --git a/barretenberg/cpp/src/barretenberg/commitment_schemes/commit.bench.cpp b/barretenberg/cpp/src/barretenberg/commitment_schemes/commit.bench.cpp
@@ -1,5 +1,6 @@
 
 #include "barretenberg/commitment_schemes/commitment_key.hpp"
+#include "barretenberg/ecc//batched_affine_addition/batched_affine_addition.hpp"
 #include "barretenberg/polynomials/polynomial.hpp"
 #include "barretenberg/srs/factories/mem_bn254_crs_factory.hpp"
 #include <benchmark/benchmark.h>
@@ -27,6 +28,59 @@ template <typename FF> Polynomial<FF> sparse_random_poly(const size_t size, cons
     return polynomial;
 }
 
+template <typename FF> struct PolyData {
+    Polynomial<FF> polynomial;
+    std::vector<std::pair<size_t, size_t>> active_range_endpoints;
+};
+
+// Generate a polynomial with random coefficients organized in isolated blocks. (Mimics the wire polynomials
+// in the structured trace setting, or z_perm if non_zero_complement is set to true).
+template <typename FF> PolyData<FF> structured_random_poly(bool non_zero_complement = false)
+{
+    // An arbitrary but realistic test case taken from the actual structure of a wire in the client_ivc bench
+    std::vector<uint32_t> fixed_sizes = {
+        1 << 10, 1 << 7, 201000, 90000, 9000, 137000, 72000, 1 << 7, 2500, 11500,
+    };
+    std::vector<uint32_t> actual_sizes = {
+        10, 16, 48873, 18209, 4132, 23556, 35443, 3, 2, 2,
+    };
+
+    uint32_t full_size = 0;
+    for (auto size : fixed_sizes) {
+        full_size += size;
+    }
+
+    // In practice the polynomials will have a power-of-2 size
+    auto log2_n = static_cast<size_t>(numeric::get_msb(full_size));
+    if ((1UL << log2_n) != (full_size)) {
+        ++log2_n;
+    }
+    full_size = 1 << log2_n;
+
+    // Construct a polynomial with the prescribed structure; track the "active" regions
+    auto polynomial = Polynomial<FF>(full_size);
+    uint32_t start_idx = 0;
+    uint32_t end_idx = 0;
+    std::vector<std::pair<size_t, size_t>> active_range_endpoints;
+    for (auto [block_size, actual_size] : zip_view(fixed_sizes, actual_sizes)) {
+        end_idx = start_idx + actual_size;
+        for (size_t i = start_idx; i < end_idx; ++i) {
+            polynomial.at(i) = FF::random_element();
+        }
+        active_range_endpoints.emplace_back(start_idx, end_idx);
+        start_idx += block_size;
+        // If indicated, populate the active region complement with a random constant (mimicking z_perm)
+        if (non_zero_complement) {
+            FF const_random_coeff = FF::random_element();
+            for (size_t i = end_idx; i < start_idx; ++i) {
+                polynomial.at(i) = const_random_coeff;
+            }
+        }
+    }
+
+    return { polynomial, active_range_endpoints };
+}
+
 constexpr size_t MIN_LOG_NUM_POINTS = 16;
 constexpr size_t MAX_LOG_NUM_POINTS = 20;
 constexpr size_t MAX_NUM_POINTS = 1 << MAX_LOG_NUM_POINTS;
@@ -126,6 +180,7 @@ template <typename Curve> void bench_commit_random(::benchmark::State& state)
         key->commit(polynomial);
     }
 }
+
 // Commit to a polynomial with dense random nonzero entries but NOT our happiest case of an exact power of 2
 // Note this used to be a 50% regression just subtracting a power of 2 by 1.
 template <typename Curve> void bench_commit_random_non_power_of_2(::benchmark::State& state)
@@ -139,6 +194,59 @@ template <typename Curve> void bench_commit_random_non_power_of_2(::benchmark::S
         key->commit(polynomial);
     }
 }
+
+// Commit to a polynomial with block structured random entries using the basic commit method
+template <typename Curve> void bench_commit_structured_random_poly(::benchmark::State& state)
+{
+    using Fr = typename Curve::ScalarField;
+    auto key = create_commitment_key<Curve>(MAX_NUM_POINTS);
+
+    auto [polynomial, active_range_endpoints] = structured_random_poly<Fr>();
+
+    for (auto _ : state) {
+        key->commit(polynomial);
+    }
+}
+
+// Commit to a polynomial with block structured random entries using commit_structured
+template <typename Curve> void bench_commit_structured_random_poly_preprocessed(::benchmark::State& state)
+{
+    using Fr = typename Curve::ScalarField;
+    auto key = create_commitment_key<Curve>(MAX_NUM_POINTS);
+
+    auto [polynomial, active_range_endpoints] = structured_random_poly<Fr>();
+
+    for (auto _ : state) {
+        key->commit_structured(polynomial, active_range_endpoints);
+    }
+}
+
+// Commit to a polynomial with block structured random entries and constant valued complement
+template <typename Curve> void bench_commit_z_perm(::benchmark::State& state)
+{
+    using Fr = typename Curve::ScalarField;
+    auto key = create_commitment_key<Curve>(MAX_NUM_POINTS);
+
+    auto [polynomial, active_range_endpoints] = structured_random_poly<Fr>(/*non_zero_complement=*/true);
+
+    for (auto _ : state) {
+        key->commit(polynomial);
+    }
+}
+
+// Commit to a polynomial with block structured random entries and constant valued complement using tailored method
+template <typename Curve> void bench_commit_z_perm_preprocessed(::benchmark::State& state)
+{
+    using Fr = typename Curve::ScalarField;
+    auto key = create_commitment_key<Curve>(MAX_NUM_POINTS);
+
+    auto [polynomial, active_range_endpoints] = structured_random_poly<Fr>(/*non_zero_complement=*/true);
+
+    for (auto _ : state) {
+        key->commit_structured_with_nonzero_complement(polynomial, active_range_endpoints);
+    }
+}
+
 BENCHMARK(bench_commit_zero<curve::BN254>)
     ->DenseRange(MIN_LOG_NUM_POINTS, MAX_LOG_NUM_POINTS)
     ->Unit(benchmark::kMillisecond);
@@ -160,6 +268,10 @@ BENCHMARK(bench_commit_random<curve::BN254>)
 BENCHMARK(bench_commit_random_non_power_of_2<curve::BN254>)
     ->DenseRange(MIN_LOG_NUM_POINTS, MAX_LOG_NUM_POINTS)
     ->Unit(benchmark::kMillisecond);
+BENCHMARK(bench_commit_structured_random_poly<curve::BN254>)->Unit(benchmark::kMillisecond);
+BENCHMARK(bench_commit_structured_random_poly_preprocessed<curve::BN254>)->Unit(benchmark::kMillisecond);
+BENCHMARK(bench_commit_z_perm<curve::BN254>)->Unit(benchmark::kMillisecond);
+BENCHMARK(bench_commit_z_perm_preprocessed<curve::BN254>)->Unit(benchmark::kMillisecond);
 
 } // namespace bb
 

diff --git a/barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp b/barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp
@@ -9,7 +9,9 @@
 
 #include "barretenberg/common/debug_log.hpp"
 #include "barretenberg/common/op_count.hpp"
+#include "barretenberg/ecc/batched_affine_addition/batched_affine_addition.hpp"
 #include "barretenberg/ecc/scalar_multiplication/scalar_multiplication.hpp"
+#include "barretenberg/ecc/scalar_multiplication/sorted_msm.hpp"
 #include "barretenberg/numeric/bitop/get_msb.hpp"
 #include "barretenberg/numeric/bitop/pow.hpp"
 #include "barretenberg/polynomials/polynomial.hpp"
@@ -164,7 +166,7 @@ template <class Curve> class CommitmentKey {
         std::vector<Fr> scalars;
         std::vector<G1> points;
         scalars.reserve(num_nonzero_scalars);
-        points.reserve(num_nonzero_scalars);
+        points.reserve(2 * num_nonzero_scalars); //  2x accounts for endomorphism points
         for (size_t idx = 0; idx < num_threads; ++idx) {
             scalars.insert(scalars.end(), thread_scalars[idx].begin(), thread_scalars[idx].end());
             points.insert(points.end(), thread_points[idx].begin(), thread_points[idx].end());
@@ -173,6 +175,149 @@ template <class Curve> class CommitmentKey {
         // Call the version of pippenger which assumes all points are distinct
         return scalar_multiplication::pippenger_unsafe<Curve>(scalars, points, pippenger_runtime_state);
     }
+
+    /**
+     * @brief Efficiently commit to a polynomial whose nonzero elements are arranged in discrete blocks
+     * @details Given a set of ranges where the polynomial takes non-zero values, copy the non-zero inputs (scalars,
+     * points) into contiguous memory and commit to them using the normal pippenger algorithm. Defaults to the
+     * conventional commit method if the number of non-zero entries is beyond a threshold relative to the full
+     * polynomial size.
+     * @note The wire polynomials have the described form when a structured execution trace is in use.
+     * @warning Method makes a copy of all {point, scalar} pairs that comprise the reduced input. May not be efficient
+     * in terms of memory or computation for polynomials beyond a certain sparseness threshold.
+     *
+     * @param polynomial
+     * @param active_ranges
+     * @return Commitment
+     */
+    Commitment commit_structured(PolynomialSpan<const Fr> polynomial,
+                                 const std::vector<std::pair<size_t, size_t>>& active_ranges)
+    {
+        BB_OP_COUNT_TIME();
+        ASSERT(polynomial.end_index() <= srs->get_monomial_size());
+
+        // Percentage of nonzero coefficients beyond which we resort to the conventional commit method
+        const size_t DENSITY_THRESHOLD = 75;
+
+        size_t total_num_scalars = 0;
+        for (const auto& range : active_ranges) {
+            total_num_scalars += range.second - range.first;
+        }
+
+        // Compute "active" percentage of polynomial; resort to standard commit if appropriate
+        size_t usage_percentage = total_num_scalars * 100 / polynomial.size();
+        if (usage_percentage > DENSITY_THRESHOLD) {
+            return commit(polynomial);
+        }
+
+        // Extract the precomputed point table (contains raw SRS points at even indices and the corresponding
+        // endomorphism point (\beta*x, -y) at odd indices).
+        std::span<G1> point_table = srs->get_monomial_points();
+
+        std::vector<Fr> scalars;
+        scalars.reserve(total_num_scalars);
+        for (const auto& range : active_ranges) {
+            auto start = &polynomial[range.first];
+            auto end = &polynomial[range.second];
+            scalars.insert(scalars.end(), start, end);
+        }
+        std::vector<G1> points;
+        points.reserve(total_num_scalars * 2);
+        for (const auto& range : active_ranges) {
+            auto start = &point_table[2 * range.first];
+            auto end = &point_table[2 * range.second];
+            points.insert(points.end(), start, end);
+        }
+
+        // Call pippenger
+        return scalar_multiplication::pippenger_unsafe<Curve>(scalars, points, pippenger_runtime_state);
+    }
+
+    /**
+     * @brief Efficiently commit to a polynomial with discrete blocks of arbitrary elements and constant elements
+     * @details Similar to method commit_structured() except the complement blocks cantain non-zero constant values
+     * (which are assumed to differ between blocks). This is exactly the structure of the permutation grand product
+     * polynomial z_perm when a structured execution trace is in use.
+     * @warning Requires a copy of all {point, scalar} pairs (including endo points) corresponding to the primary blocks
+     * and a copy of all of the points (without endo points) corresponding to their complement.
+     *
+     * @param polynomial
+     * @param active_ranges
+     * @return Commitment
+     */
+    Commitment commit_structured_with_nonzero_complement(PolynomialSpan<const Fr> polynomial,
+                                                         const std::vector<std::pair<size_t, size_t>>& active_ranges)
+    {
+        BB_OP_COUNT_TIME();
+        ASSERT(polynomial.end_index() <= srs->get_monomial_size());
+
+        using BatchedAddition = BatchedAffineAddition<Curve>;
+
+        // Percentage of constant coefficients beyond which we resort to the conventional commit method
+        const size_t DENSITY_THRESHOLD = 50;
+
+        // Compute the active range complement over which the polynomial is assumed to be constant within each range
+        std::vector<std::pair<size_t, size_t>> active_ranges_complement;
+        for (size_t i = 0; i < active_ranges.size(); ++i) {
+            size_t start = active_ranges[i].second;
+            size_t end = active_ranges[i + 1].first;
+            active_ranges_complement.emplace_back(start, end);
+        }
+        active_ranges_complement.back().second = polynomial.end_index(); // Extend final range to end of polynomial
+
+        size_t total_num_complement_scalars = 0;
+        for (const auto& range : active_ranges_complement) {
+            total_num_complement_scalars += range.second - range.first;
+        }
+
+        // Extract the precomputed point table (contains raw SRS points at even indices and the corresponding
+        // endomorphism point (\beta*x, -y) at odd indices).
+        std::span<G1> point_table = srs->get_monomial_points();
+
+        // Compute complement percentage of polynomial; resort to standard commit if appropriate
+        size_t complement_percentage = total_num_complement_scalars * 100 / polynomial.size();
+        if (complement_percentage < DENSITY_THRESHOLD) {
+            return commit(polynomial);
+        }
+
+        Commitment active_region_contribution = commit_structured(polynomial, active_ranges);
+
+        // Copy the raw SRS points corresponding to the constant regions into contiguous memory
+        // TODO(https://github.com/AztecProtocol/barretenberg/issues/1131): Peak memory usage could be improved by
+        // performing this copy and the subsequent summation as a precomputation prior to constructing the point table.
+        std::vector<G1> points;
+        points.reserve(2 * total_num_complement_scalars);
+        for (const auto& range : active_ranges_complement) {
+            size_t start = 2 * range.first;
+            size_t end = 2 * range.second;
+            for (size_t i = start; i < end; i += 2) {
+                points.emplace_back(point_table[i]);
+            }
+        }
+
+        // Populate the set of unique scalars with first coeff from each range (values assumed constant over each range)
+        // Compute the number of points in each sequence to be summed
+        std::vector<Fr> unique_scalars;
+        std::vector<size_t> sequence_counts;
+        for (const auto& range : active_ranges_complement) {
+            if (range.second - range.first > 0) {
+                // info("unique scalar = ", polynomial.span[range.first]);
+                unique_scalars.emplace_back(polynomial.span[range.first]);
+                sequence_counts.emplace_back(range.second - range.first);
+            }
+        }
+
+        // Reduce each sequence to a single point
+        auto reduced_points = BatchedAddition::add_in_place(points, sequence_counts);
+
+        // Directly compute the full commitment given the reduced inputs
+        Commitment result = active_region_contribution;
+        for (auto [scalar, point] : zip_view(unique_scalars, reduced_points)) {
+            result = result + point * scalar;
+        }
+
+        return result;
+    }
 };
 
 } // namespace bb