Skip to content

Commit

Permalink
refactor: Optimize polynomial operations with parallel scan (#330)
Browse files Browse the repository at this point in the history
- Optimized performance by introducing parallel scans methods across multiple components, focusing on the `rlc<T, F>` method and the `powers` function.
- Switched from `DoubleEndedIteratorExt` to `IndexedParallelIteratorExt` across several files for iterator processing in parallel, and modified corresponding imports accordingly.
- Left `DoubleEndedIteratorExt` as an option, since it's more efficient on small polynomials,
- Added `rayon-scan` to general dependencies
  • Loading branch information
huitseeker authored Feb 19, 2024
1 parent e1a69d4 commit 8d2bb89
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 33 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ rand = "0.8.5"
ref-cast = "1.0.20" # allocation-less conversion in multilinear polys
derive_more = "0.99.17" # lightens impl macros for pasta
static_assertions = "1.1.0"
rayon-scan = "0.1.0"

[target.'cfg(any(target_arch = "x86_64", target_arch = "aarch64"))'.dependencies]
# grumpkin-msm has been patched to support MSMs for the pasta curve cycle
Expand Down
10 changes: 4 additions & 6 deletions src/provider/hyperkzg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ use crate::{
kzg_commitment::{KZGCommitmentEngine, KZGProverKey, KZGVerifierKey, UniversalKZGParam},
pedersen::Commitment,
traits::DlogGroup,
util::iterators::DoubleEndedIteratorExt as _,
util::iterators::IndexedParallelIteratorExt as _,
},
spartan::polys::univariate::UniPoly,
spartan::{polys::univariate::UniPoly, powers},
traits::{
commitment::{CommitmentEngineTrait, Len},
evaluation::EvaluationEngineTrait,
Expand Down Expand Up @@ -91,9 +91,7 @@ where

/// Compute powers of q : (1, q, q^2, ..., q^(k-1))
pub fn batch_challenge_powers(q: E::Fr, k: usize) -> Vec<E::Fr> {
std::iter::successors(Some(E::Fr::ONE), |&x| Some(x * q))
.take(k)
.collect()
powers(&q, k)
}

/// TODO: write doc
Expand Down Expand Up @@ -181,7 +179,7 @@ where
-> (Vec<E::G1Affine>, Vec<Vec<E::Fr>>) {
let kzg_compute_batch_polynomial = |f: Vec<Vec<E::Fr>>, q: E::Fr| -> Vec<E::Fr> {
// Compute B(x) = f_0(x) + q * f_1(x) + ... + q^(k-1) * f_{k-1}(x)
let B: UniPoly<E::Fr> = f.into_iter().map(UniPoly::new).rlc(&q);
let B: UniPoly<E::Fr> = f.into_par_iter().map(UniPoly::new).rlc(&q);
B.coeffs
};
///////// END kzg_open_batch closure helpers
Expand Down
11 changes: 5 additions & 6 deletions src/provider/shplonk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::provider::kzg_commitment::KZGCommitmentEngine;
use crate::provider::kzg_commitment::{KZGProverKey, KZGVerifierKey, UniversalKZGParam};
use crate::provider::pedersen::Commitment;
use crate::provider::traits::DlogGroup;
use crate::provider::util::iterators::DoubleEndedIteratorExt;
use crate::provider::util::iterators::IndexedParallelIteratorExt as _;
use crate::spartan::polys::univariate::UniPoly;
use crate::traits::commitment::Len;
use crate::traits::evaluation::EvaluationEngineTrait;
Expand All @@ -12,9 +12,7 @@ use crate::{CommitmentEngineTrait, NovaError};
use ff::{Field, PrimeFieldBits};
use group::{Curve, Group as group_Group};
use pairing::{Engine, MillerLoopResult, MultiMillerLoop};
use rayon::iter::{
IndexedParallelIterator, IntoParallelIterator, IntoParallelRefMutIterator, ParallelIterator,
};
use rayon::prelude::*;
use serde::{de::DeserializeOwned, Deserialize, Serialize};
use std::marker::PhantomData;

Expand Down Expand Up @@ -191,7 +189,7 @@ where
// Phase 3
// Compute B(x) = f_0(x) + q * f_1(x) + ... + q^(k-1) * f_{k-1}(x)
let q = HyperKZG::<E, NE>::get_batch_challenge(&evals, transcript);
let batched_Pi: UniPoly<E::Fr> = polys.into_iter().map(UniPoly::new).rlc(&q);
let batched_Pi: UniPoly<E::Fr> = polys.into_par_iter().map(UniPoly::new).rlc(&q);

// Q(x), R(x) = P(x) / D(x), where D(x) = (x - r) * (x + r) * (x - r^2) = 1 * x^3 - r^2 * x^2 - r^2 * x + r^4
let D = UniPoly::new(vec![u[2] * u[2], -u[2], -u[2], E::Fr::from(1)]);
Expand Down Expand Up @@ -300,7 +298,7 @@ where
}
}

let C_P: E::G1 = pi.comms.iter().map(|comm| comm.to_curve()).rlc(&q);
let C_P: E::G1 = pi.comms.par_iter().map(|comm| comm.to_curve()).rlc(&q);
let C_Q = pi.C_Q;
let C_H = pi.C_H;
let r_squared = u[2];
Expand Down Expand Up @@ -339,6 +337,7 @@ where
#[cfg(test)]
mod tests {
use super::*;
use crate::provider::util::iterators::DoubleEndedIteratorExt as _;
use crate::traits::TranscriptEngineTrait;
use crate::{provider::keccak::Keccak256Transcript, CommitmentEngineTrait, CommitmentKey};
use halo2curves::bn256::G1;
Expand Down
45 changes: 43 additions & 2 deletions src/provider/util/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,14 @@ pub mod msm {
}

pub mod iterators {
use std::borrow::Borrow;
use ff::Field;
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
use rayon_scan::ScanParallelIterator;
use std::iter::DoubleEndedIterator;
use std::ops::{AddAssign, MulAssign};
use std::{
borrow::Borrow,
ops::{AddAssign, MulAssign},
};

pub trait DoubleEndedIteratorExt: DoubleEndedIterator {
/// This function employs Horner's scheme and core traits to create a combination of an iterator input with the powers
Expand All @@ -38,6 +43,42 @@ pub mod iterators {
}

impl<I: DoubleEndedIterator> DoubleEndedIteratorExt for I {}

pub trait IndexedParallelIteratorExt: IndexedParallelIterator {
/// This function core traits to create a combination of an iterator input with the powers
/// of a provided coefficient.
fn rlc<T, F>(self, coefficient: &F) -> T
where
F: Field,
Self::Item: Borrow<T>,
T: Clone + for<'a> MulAssign<&'a F> + for<'r> AddAssign<&'r T> + Send + Sync,
{
debug_assert!(self.len() > 0);
// generate an iterator of powers of the right length
let v = {
let mut v = vec![*coefficient; self.len()];
v[0] = F::ONE;
v
};
// the collect is due to Scan being unindexed
let powers: Vec<_> = v.into_par_iter().scan(|a, b| *a * *b, F::ONE).collect();

self
.zip_eq(powers.into_par_iter())
.map(|(pt, val)| {
let mut pt = pt.borrow().clone();
pt *= &val;
pt
})
.reduce_with(|mut a, b| {
a += &b;
a
})
.unwrap()
}
}

impl<I: IndexedParallelIterator> IndexedParallelIteratorExt for I {}
}

#[cfg(test)]
Expand Down
8 changes: 4 additions & 4 deletions src/spartan/batched.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ impl<E: Engine, EE: EvaluationEngineTrait<E>> BatchedRelaxedR1CSSNARKTrait<E>

// Sample challenge for random linear-combination of outer claims
let outer_r = transcript.squeeze(b"out_r")?;
let outer_r_powers = powers::<E>(&outer_r, num_instances);
let outer_r_powers = powers(&outer_r, num_instances);

// Verify outer sumcheck: Az * Bz - uCz_E for each instance
let (sc_proof_outer, r_x, claims_outer) = SumcheckProof::prove_cubic_with_additive_term_batch(
Expand Down Expand Up @@ -257,7 +257,7 @@ impl<E: Engine, EE: EvaluationEngineTrait<E>> BatchedRelaxedR1CSSNARKTrait<E>
let inner_r = transcript.squeeze(b"in_r")?;
let inner_r_square = inner_r.square();
let inner_r_cube = inner_r_square * inner_r;
let inner_r_powers = powers::<E>(&inner_r_cube, num_instances);
let inner_r_powers = powers(&inner_r_cube, num_instances);

let claims_inner_joint = evals_Az_Bz_Cz
.iter()
Expand Down Expand Up @@ -405,7 +405,7 @@ impl<E: Engine, EE: EvaluationEngineTrait<E>> BatchedRelaxedR1CSSNARKTrait<E>

// Sample challenge for random linear-combination of outer claims
let outer_r = transcript.squeeze(b"out_r")?;
let outer_r_powers = powers::<E>(&outer_r, num_instances);
let outer_r_powers = powers(&outer_r, num_instances);

let (claim_outer_final, r_x) = self.sc_proof_outer.verify_batch(
&vec![E::Scalar::ZERO; num_instances],
Expand Down Expand Up @@ -456,7 +456,7 @@ impl<E: Engine, EE: EvaluationEngineTrait<E>> BatchedRelaxedR1CSSNARKTrait<E>
let inner_r = transcript.squeeze(b"in_r")?;
let inner_r_square = inner_r.square();
let inner_r_cube = inner_r_square * inner_r;
let inner_r_powers = powers::<E>(&inner_r_cube, num_instances);
let inner_r_powers = powers(&inner_r_cube, num_instances);

// Compute inner claims Mzᵢ = (Azᵢ + r⋅Bzᵢ + r²⋅Czᵢ),
// which are batched by Sumcheck into one claim: ∑ᵢ r³ⁱ⋅Mzᵢ
Expand Down
4 changes: 2 additions & 2 deletions src/spartan/batched_ppsnark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,7 @@ impl<E: Engine, EE: EvaluationEngineTrait<E>> BatchedRelaxedR1CSSNARKTrait<E>
let rho = transcript.squeeze(b"r")?;

let s = transcript.squeeze(b"r")?;
let s_powers = powers::<E>(&s, num_instances * num_claims_per_instance);
let s_powers = powers(&s, num_instances * num_claims_per_instance);

let (claim_sc_final, rand_sc) = {
// Gather all claims into a single vector
Expand Down Expand Up @@ -1176,7 +1176,7 @@ impl<E: Engine, EE: EvaluationEngineTrait<E>> BatchedRelaxedR1CSSNARK<E, EE> {

// Sample a challenge for the random linear combination of all scaled claims
let s = transcript.squeeze(b"r")?;
let coeffs = powers::<E>(&s, claims.len());
let coeffs = powers(&s, claims.len());

// At the start of each round, the running claim is equal to the random linear combination
// of the Sumcheck claims, evaluated over the bound polynomials.
Expand Down
19 changes: 10 additions & 9 deletions src/spartan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,17 @@ use crate::{
use ff::Field;
use itertools::Itertools as _;
use polys::multilinear::SparsePolynomial;

use rayon::{iter::IntoParallelRefIterator, prelude::*};
use rayon_scan::ScanParallelIterator as _;
use ref_cast::RefCast;

// Creates a vector of the first `n` powers of `s`.
fn powers<E: Engine>(s: &E::Scalar, n: usize) -> Vec<E::Scalar> {
/// Creates a vector of the first `n` powers of `s`.
pub fn powers<F: Field>(s: &F, n: usize) -> Vec<F> {
assert!(n >= 1);
std::iter::successors(Some(E::Scalar::ONE), |&x| Some(x * s))
.take(n)
.collect()
let mut v = vec![*s; n];
v[0] = F::ONE;
v.into_par_iter().scan(|a, b| *a * *b, F::ONE).collect()
}

/// A type that holds a witness to a polynomial evaluation instance
Expand All @@ -51,7 +52,7 @@ impl<E: Engine> PolyEvalWitness<E> {
/// We allow the input polynomials to have different sizes, and interpret smaller ones as
/// being padded with 0 to the maximum size of all polynomials.
fn batch_diff_size(W: &[&Self], s: E::Scalar) -> Self {
let powers = powers::<E>(&s, W.len());
let powers = powers(&s, W.len());

let size_max = W.iter().map(|w| w.p.len()).max().unwrap();
let p_vec = W.par_iter().map(|w| &w.p);
Expand Down Expand Up @@ -128,7 +129,7 @@ impl<E: Engine> PolyEvalInstance<E> {
assert_eq!(e_vec.len(), num_instances);

let num_vars_max = x.len();
let powers: Vec<E::Scalar> = powers::<E>(&s, num_instances);
let powers: Vec<E::Scalar> = powers(&s, num_instances);
// Rescale evaluations by the first Lagrange polynomial,
// so that we can check its evaluation against x
let evals_scaled = zip_with!(iter, (e_vec, num_vars), |eval, num_rounds| {
Expand Down Expand Up @@ -222,7 +223,7 @@ mod tests {
.skip(1)
.for_each(|p| assert_eq!(p.len(), p_vec[0].len()));

let powers_of_s = powers::<E>(s, p_vec.len());
let powers_of_s = powers(s, p_vec.len());

let p = zip_with!(par_iter, (p_vec, powers_of_s), |v, weight| {
// compute the weighted sum for each vector
Expand Down Expand Up @@ -250,7 +251,7 @@ mod tests {
let num_instances = c_vec.len();
assert_eq!(e_vec.len(), num_instances);

let powers_of_s = powers::<E>(s, num_instances);
let powers_of_s = powers(s, num_instances);
// Weighted sum of evaluations
let e = zip_with!(par_iter, (e_vec, powers_of_s), |e, p| *e * p).sum();
// Weighted sum of commitments
Expand Down
4 changes: 2 additions & 2 deletions src/spartan/ppsnark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ impl<E: Engine, EE: EvaluationEngineTrait<E>> RelaxedR1CSSNARK<E, EE> {
.collect::<Vec<E::Scalar>>();

let s = transcript.squeeze(b"r")?;
let coeffs = powers::<E>(&s, claims.len());
let coeffs = powers(&s, claims.len());

// compute the joint claim
let claim = zip_with!((claims.iter(), coeffs.iter()), |c_1, c_2| *c_1 * c_2).sum();
Expand Down Expand Up @@ -879,7 +879,7 @@ impl<E: Engine, EE: EvaluationEngineTrait<E>> RelaxedR1CSSNARKTrait<E> for Relax

let num_claims = 10;
let s = transcript.squeeze(b"r")?;
let coeffs = powers::<E>(&s, num_claims);
let coeffs = powers(&s, num_claims);
let claim = (coeffs[7] + coeffs[8]) * claim; // rest are zeros

// verify sc
Expand Down
4 changes: 2 additions & 2 deletions src/spartan/snark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ pub(in crate::spartan) fn batch_eval_prove<E: Engine>(

// generate a challenge, and powers of it for random linear combination
let rho = transcript.squeeze(b"r")?;
let powers_of_rho = powers::<E>(&rho, num_claims);
let powers_of_rho = powers(&rho, num_claims);

let (claims, u_xs, comms): (Vec<_>, Vec<_>, Vec<_>) =
u_vec.into_iter().map(|u| (u.e, u.x, u.c)).multiunzip();
Expand Down Expand Up @@ -511,7 +511,7 @@ pub(in crate::spartan) fn batch_eval_verify<E: Engine>(

// generate a challenge
let rho = transcript.squeeze(b"r")?;
let powers_of_rho = powers::<E>(&rho, num_claims);
let powers_of_rho = powers(&rho, num_claims);

// Compute nᵢ and n = maxᵢ{nᵢ}
let num_rounds = u_vec.iter().map(|u| u.x.len()).collect::<Vec<_>>();
Expand Down

1 comment on commit 8d2bb89

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmarks

Table of Contents

Overview

This benchmark report shows the Arecibo GPU benchmarks.
NVIDIA L4
Intel(R) Xeon(R) CPU @ 2.20GHz
32 vCPUs
125 GB RAM
Workflow run: https://github.com/lurk-lab/arecibo/actions/runs/7963498637

Benchmark Results

RecursiveSNARK-NIVC-2

ref=e1a69d4 ref=8d2bb89
Prove-NumCons-6540 45.31 ms (✅ 1.00x) 45.17 ms (✅ 1.00x faster)
Verify-NumCons-6540 36.01 ms (✅ 1.00x) 35.67 ms (✅ 1.01x faster)
Prove-NumCons-1028888 330.65 ms (✅ 1.00x) 322.86 ms (✅ 1.02x faster)
Verify-NumCons-1028888 253.50 ms (✅ 1.00x) 250.11 ms (✅ 1.01x faster)

CompressedSNARK-NIVC-Commitments-2

ref=e1a69d4 ref=8d2bb89
Prove-NumCons-6540 10.74 s (✅ 1.00x) 10.80 s (✅ 1.01x slower)
Verify-NumCons-6540 49.10 ms (✅ 1.00x) 52.83 ms (✅ 1.08x slower)
Prove-NumCons-1028888 54.09 s (✅ 1.00x) 53.07 s (✅ 1.02x faster)
Verify-NumCons-1028888 48.92 ms (✅ 1.00x) 52.19 ms (✅ 1.07x slower)

Made with criterion-table

Please sign in to comment.