chore: leaf aggregation tuning (#1150)

* chore: leaf aggregation tuning * chore: toggle leaf for ecrecover * chore: update metrics doc * chore: more scoping
openvm-org · Jan 1, 2025 · ec646a6 · ec646a6
1 parent 349e8da
commit ec646a6
Show file tree

Hide file tree

Showing 9 changed files with 116 additions and 60 deletions.
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -67,6 +67,19 @@ build with target set to your **host** machine, while running `bench_from_exe` i
 rust-objdump -d target/riscv32im-risc0-zkvm-elf/release/openvm-fibonacci-program
 ```
 
+## Running a Benchmark Locally
+
+Running a benchmark locally is simple. Just run the following command:
+
+```bash
+OUTPUT_PATH="metrics.json" cargo run --release --bin <benchmark_name>
+```
+
+where `<benchmark_name>.rs` is one of the files in [`src/bin`](./src/bin).
+The `OUTPUT_PATH` environmental variable shouuld be set to the file path where you want the collected metrics to be written to. If unset, then metrics are not printed to file.
+
+To run a benchmark with the leaf aggregation, add `--features aggregation` to the above command.
+
 ## Adding a Benchmark to CI
 
 To add the benchmark to CI, update the [ci/benchmark-config.json](../ci/benchmark-config.json) file and set it's configuration parameters. To make the benchmark run on every PR, follow the existing format with `e2e_bench = false`. To make the benchmark run only when label `run_benchmark_e2e` is present, set `e2e_bench = true` and specify values for `root_log_blowup` and `internal_log_blowup`.

diff --git a/benchmarks/src/bin/ecrecover.rs b/benchmarks/src/bin/ecrecover.rs
@@ -159,6 +159,9 @@ fn main() -> Result<()> {
                 vm_config,
                 exe,
                 input_stream.into(),
+                #[cfg(feature = "aggregation")]
+                true,
+                #[cfg(not(feature = "aggregation"))]
                 false,
             )
         })?;

diff --git a/benchmarks/src/utils.rs b/benchmarks/src/utils.rs
@@ -2,7 +2,6 @@ use std::{fs::read, path::PathBuf};
 
 use clap::{command, Parser};
 use eyre::Result;
-use metrics::counter;
 use openvm_build::{build_guest_package, get_package, guest_methods, GuestOptions};
 use openvm_circuit::arch::{instructions::exe::VmExe, VirtualMachine, VmConfig};
 use openvm_sdk::{
@@ -21,6 +20,7 @@ use openvm_stark_sdk::{
 };
 use openvm_transpiler::{elf::Elf, openvm_platform::memory::MEM_SIZE};
 use tempfile::tempdir;
+use tracing::info_span;
 
 type F = BabyBear;
 type SC = BabyBearPoseidon2Config;
@@ -92,31 +92,34 @@ where
     VC::Executor: Chip<SC>,
     VC::Periphery: Chip<SC>,
 {
-    counter!("fri.log_blowup").absolute(app_config.app_fri_params.fri_params.log_blowup as u64);
+    let bench_name = bench_name.to_string();
     let engine = BabyBearPoseidon2Engine::new(app_config.app_fri_params.fri_params);
     let vm = VirtualMachine::new(engine, app_config.app_vm_config.clone());
     // 1. Generate proving key from config.
-    let app_pk = metrics_span("keygen_time_ms", || {
-        AppProvingKey::keygen(app_config.clone())
+    let app_pk = info_span!("keygen", group = &bench_name).in_scope(|| {
+        metrics_span("keygen_time_ms", || {
+            AppProvingKey::keygen(app_config.clone())
+        })
     });
     // 2. Commit to the exe by generating cached trace for program.
-    let committed_exe = metrics_span("commit_exe_time_ms", || {
-        commit_app_exe(app_config.app_fri_params.fri_params, exe)
+    let committed_exe = info_span!("commit_exe", group = &bench_name).in_scope(|| {
+        metrics_span("commit_exe_time_ms", || {
+            commit_app_exe(app_config.app_fri_params.fri_params, exe)
+        })
     });
     // 3. Executes runtime
     // 4. Generate trace
     // 5. Generate STARK proofs for each segment (segmentation is determined by `config`), with timer.
     let vk = app_pk.app_vm_pk.vm_pk.get_vk();
-    let prover =
-        AppProver::new(app_pk.app_vm_pk, committed_exe).with_program_name(bench_name.to_string());
-    let app_proofs = prover.generate_app_proof(input_stream);
-    // 6. Verify STARK proofs.
-    vm.verify(&vk, app_proofs.per_segment.clone())
+    let prover = AppProver::new(app_pk.app_vm_pk, committed_exe).with_program_name(bench_name);
+    let app_proof = prover.generate_app_proof(input_stream);
+    // 6. Verify STARK proofs, including boundary conditions.
+    vm.verify(&vk, app_proof.per_segment.clone())
         .expect("Verification failed");
     if bench_leaf {
         let leaf_vm_pk = leaf_keygen(app_config.leaf_fri_params.fri_params);
         let leaf_prover = LeafProver::new(leaf_vm_pk, app_pk.leaf_committed_exe);
-        leaf_prover.generate_proof(&app_proofs);
+        leaf_prover.generate_proof(&app_proof);
     }
     Ok(())
 }
diff --git a/crates/circuits/sha256-air/src/trace.rs b/crates/circuits/sha256-air/src/trace.rs
@@ -12,7 +12,7 @@ use sha2::{compress256, digest::generic_array::GenericArray};
 use super::{
     air::Sha256Air, big_sig0_field, big_sig1_field, ch_field, columns::Sha256RoundCols, compose,
     get_flag_pt_array, maj_field, small_sig0_field, small_sig1_field, SHA256_BLOCK_WORDS,
-    SHA256_DIGEST_WIDTH, SHA256_HASH_WORDS, SHA256_ROUND_WIDTH, SHA256_WIDTH,
+    SHA256_DIGEST_WIDTH, SHA256_HASH_WORDS, SHA256_ROUND_WIDTH,
 };
 use crate::{
     big_sig0, big_sig1, ch, columns::Sha256DigestCols, limbs_into_u32, maj, small_sig0, small_sig1,
@@ -60,7 +60,7 @@ impl Sha256Air {
         #[cfg(debug_assertions)]
         {
             assert!(trace.len() == trace_width * SHA256_ROWS_PER_BLOCK);
-            assert!(trace_start_col + SHA256_WIDTH <= trace_width);
+            assert!(trace_start_col + super::SHA256_WIDTH <= trace_width);
             assert!(self.bitwise_lookup_bus == bitwise_lookup_chip.bus());
             if local_block_idx == 0 {
                 assert!(*prev_hash == SHA256_H);

diff --git a/crates/sdk/src/keygen/mod.rs b/crates/sdk/src/keygen/mod.rs
@@ -26,6 +26,7 @@ use openvm_stark_sdk::{
     p3_bn254_fr::Bn254Fr,
 };
 use serde::{Deserialize, Serialize};
+use tracing::info_span;
 
 use crate::{
     commit::babybear_digest_to_bn254,
@@ -329,7 +330,8 @@ pub fn leaf_keygen(fri_params: FriParameters) -> Arc<VmProvingKey<SC, NativeConf
     };
     let vm_config = agg_config.leaf_vm_config();
     let leaf_engine = BabyBearPoseidon2Engine::new(fri_params);
-    let leaf_vm_pk = VirtualMachine::new(leaf_engine, vm_config.clone()).keygen();
+    let leaf_vm_pk = info_span!("keygen", group = "leaf")
+        .in_scope(|| VirtualMachine::new(leaf_engine, vm_config.clone()).keygen());
     Arc::new(VmProvingKey {
         fri_params,
         vm_config,

diff --git a/crates/sdk/src/prover/agg.rs b/crates/sdk/src/prover/agg.rs
@@ -3,7 +3,7 @@ use std::sync::Arc;
 use openvm_native_circuit::NativeConfig;
 use openvm_native_recursion::hints::Hintable;
 use openvm_stark_sdk::{
-    config::baby_bear_poseidon2::BabyBearPoseidon2Engine,
+    config::{baby_bear_poseidon2::BabyBearPoseidon2Engine, FriParameters},
     openvm_stark_backend::prover::types::Proof,
 };
 use tracing::info_span;
@@ -23,7 +23,7 @@ use crate::{
     NonRootCommittedExe, RootSC, F, SC,
 };
 
-const DEFAULT_NUM_CHILDREN_LEAF: usize = 2;
+const DEFAULT_NUM_CHILDREN_LEAF: usize = 1;
 const DEFAULT_NUM_CHILDREN_INTERNAL: usize = 2;
 const DEFAULT_MAX_INTERNAL_WRAPPER_LAYERS: usize = 4;
 
@@ -37,7 +37,8 @@ pub struct AggStarkProver {
 }
 pub struct LeafProver {
     prover: VmLocalProver<SC, NativeConfig, BabyBearPoseidon2Engine>,
-    pub num_children_leaf: usize,
+    /// Each leaf proof aggregations `<= num_children` App VM proofs
+    pub num_children: usize,
 }
 
 impl AggStarkProver {
@@ -61,7 +62,7 @@ impl AggStarkProver {
     }
 
     pub fn with_num_children_leaf(mut self, num_children_leaf: usize) -> Self {
-        self.leaf_prover.num_children_leaf = num_children_leaf;
+        self.leaf_prover.num_children = num_children_leaf;
         self
     }
 
@@ -125,41 +126,42 @@ impl AggStarkProver {
                 &proofs,
                 self.num_children_internal,
             );
-            proofs = info_span!("internal verifier", group = "internal").in_scope(|| {
-                #[cfg(feature = "bench-metrics")]
-                metrics::counter!("fri.log_blowup")
-                    .absolute(self.internal_prover.pk.fri_params.log_blowup as u64);
-                internal_inputs
-                    .into_iter()
-                    .map(|input| {
-                        internal_node_idx += 1;
-                        info_span!(
-                            "Internal verifier proof",
-                            idx = internal_node_idx,
-                            hgt = internal_node_height
-                        )
-                        .in_scope(|| {
-                            SingleSegmentVmProver::prove(&self.internal_prover, input.write())
+            proofs = info_span!("agg_layer", group = "internal.{}", internal_node_height).in_scope(
+                || {
+                    #[cfg(feature = "bench-metrics")]
+                    {
+                        metrics::counter!("fri.log_blowup")
+                            .absolute(self.internal_prover.fri_params().log_blowup as u64);
+                        metrics::counter!("num_children")
+                            .absolute(self.num_children_internal as u64);
+                    }
+                    internal_inputs
+                        .into_iter()
+                        .map(|input| {
+                            internal_node_idx += 1;
+                            info_span!("single_internal_agg", idx = internal_node_idx,).in_scope(
+                                || {
+                                    SingleSegmentVmProver::prove(
+                                        &self.internal_prover,
+                                        input.write(),
+                                    )
+                                },
+                            )
                         })
-                    })
-                    .collect()
-            });
+                        .collect()
+                },
+            );
             internal_node_height += 1;
         }
         proofs.pop().unwrap()
     }
 
     fn generate_root_proof_impl(&self, root_input: RootVmVerifierInput<SC>) -> Proof<RootSC> {
-        info_span!("root verifier", group = "root").in_scope(|| {
+        info_span!("agg_layer", group = "root", idx = 0).in_scope(|| {
             let input = root_input.write();
             #[cfg(feature = "bench-metrics")]
-            metrics::counter!("fri.log_blowup").absolute(
-                self.root_prover
-                    .root_verifier_pk
-                    .vm_pk
-                    .fri_params
-                    .log_blowup as u64,
-            );
+            metrics::counter!("fri.log_blowup")
+                .absolute(self.root_prover.fri_params().log_blowup as u64);
             SingleSegmentVmProver::prove(&self.root_prover, input)
         })
     }
@@ -176,33 +178,38 @@ impl LeafProver {
         );
         Self {
             prover,
-            num_children_leaf: DEFAULT_NUM_CHILDREN_LEAF,
+            num_children: DEFAULT_NUM_CHILDREN_LEAF,
         }
     }
-    pub fn with_num_children_leaf(mut self, num_children_leaf: usize) -> Self {
-        self.num_children_leaf = num_children_leaf;
+    pub fn with_num_children(mut self, num_children_leaf: usize) -> Self {
+        self.num_children = num_children_leaf;
         self
     }
     pub fn generate_proof(&self, app_proofs: &ContinuationVmProof<SC>) -> Vec<Proof<SC>> {
-        info_span!("leaf verifier", group = "leaf").in_scope(|| {
+        info_span!("agg_layer", group = "leaf").in_scope(|| {
             #[cfg(feature = "bench-metrics")]
-            metrics::counter!("fri.log_blowup")
-                .absolute(self.prover.pk.fri_params.log_blowup as u64);
-            let leaf_inputs = LeafVmVerifierInput::chunk_continuation_vm_proof(
-                app_proofs,
-                self.num_children_leaf,
-            );
+            {
+                metrics::counter!("fri.log_blowup").absolute(self.fri_params().log_blowup as u64);
+                metrics::counter!("num_children").absolute(self.num_children as u64);
+            }
+            let leaf_inputs =
+                LeafVmVerifierInput::chunk_continuation_vm_proof(app_proofs, self.num_children);
+            tracing::info!("num_leaf_proofs={}", leaf_inputs.len());
             leaf_inputs
                 .into_iter()
                 .enumerate()
                 .map(|(leaf_node_idx, input)| {
-                    info_span!("leaf verifier proof", idx = leaf_node_idx).in_scope(|| {
+                    info_span!("single_leaf_agg", idx = leaf_node_idx).in_scope(|| {
                         SingleSegmentVmProver::prove(&self.prover, input.write_to_stream())
                     })
                 })
                 .collect::<Vec<_>>()
         })
     }
+    #[allow(dead_code)]
+    pub(crate) fn fri_params(&self) -> &FriParameters {
+        &self.prover.pk.fri_params
+    }
 }
 
 fn heights_le(a: &[usize], b: &[usize]) -> bool {

diff --git a/crates/sdk/src/prover/root.rs b/crates/sdk/src/prover/root.rs
@@ -3,7 +3,7 @@ use openvm_circuit::arch::{SingleSegmentVmExecutor, Streams};
 use openvm_native_circuit::NativeConfig;
 use openvm_native_recursion::hints::Hintable;
 use openvm_stark_sdk::{
-    config::baby_bear_poseidon2_root::BabyBearPoseidon2RootEngine,
+    config::{baby_bear_poseidon2_root::BabyBearPoseidon2RootEngine, FriParameters},
     engine::{StarkEngine, StarkFriEngine},
     openvm_stark_backend::prover::types::Proof,
 };
@@ -40,12 +40,19 @@ impl RootVerifierLocalProver {
             .unwrap();
         result.air_heights
     }
+    pub fn vm_config(&self) -> &NativeConfig {
+        &self.root_verifier_pk.vm_pk.vm_config
+    }
+    #[allow(dead_code)]
+    pub(crate) fn fri_params(&self) -> &FriParameters {
+        &self.root_verifier_pk.vm_pk.fri_params
+    }
 }
 
 impl SingleSegmentVmProver<RootSC> for RootVerifierLocalProver {
     fn prove(&self, input: impl Into<Streams<F>>) -> Proof<RootSC> {
         let input = input.into();
-        let vm = SingleSegmentVmExecutor::new(self.root_verifier_pk.vm_pk.vm_config.clone());
+        let vm = SingleSegmentVmExecutor::new(self.vm_config().clone());
         let mut proof_input = vm
             .execute_and_generate(self.root_verifier_pk.root_committed_exe.clone(), input)
             .unwrap();
@@ -68,7 +75,7 @@ impl SingleSegmentVmProver<RootSC> for RootVerifierLocalProver {
             // Overwrite the AIR ID.
             proof_input.per_air[i].0 = i;
         }
-        let e = BabyBearPoseidon2RootEngine::new(self.root_verifier_pk.vm_pk.fri_params);
+        let e = BabyBearPoseidon2RootEngine::new(*self.fri_params());
         e.prove(&self.root_verifier_pk.vm_pk.vm_pk, proof_input)
     }
 }

diff --git a/crates/sdk/src/prover/vm/local.rs b/crates/sdk/src/prover/vm/local.rs
@@ -14,7 +14,7 @@ use openvm_stark_backend::{
     prover::types::Proof,
     Chip,
 };
-use openvm_stark_sdk::engine::StarkFriEngine;
+use openvm_stark_sdk::{config::FriParameters, engine::StarkFriEngine};
 
 use crate::prover::vm::{
     types::VmProvingKey, AsyncContinuationVmProver, AsyncSingleSegmentVmProver,
@@ -58,6 +58,10 @@ impl<SC: StarkGenericConfig, VC, E: StarkFriEngine<SC>> VmLocalProver<SC, VC, E>
     pub fn vm_config(&self) -> &VC {
         &self.pk.vm_config
     }
+    #[allow(dead_code)]
+    pub(crate) fn fri_params(&self) -> &FriParameters {
+        &self.pk.fri_params
+    }
 }
 
 impl<SC: StarkGenericConfig, VC: VmConfig<Val<SC>>, E: StarkFriEngine<SC>> ContinuationVmProver<SC>

diff --git a/docs/crates/metrics.md b/docs/crates/metrics.md
@@ -17,3 +17,20 @@ For a single segment proof, the following metrics are collected:
   - The total proving time of the proof is the sum of `execute_time_ms + trace_gen_time_ms + stark_prove_excluding_trace_time_ms`.
 - `total_cycles` (counter): The total number of cycles in the segment.
 - `total_cells_used` (counter): The total number of main trace cells used by all chips in the segment. This does not include cells needed to pad rows to power-of-two matrix heights. Only main trace cells, not preprocessed or permutation trace cells, are counted.
+
+## Scoping
+
+As mentioned above, different proofs must be scoped for metrics post-processing. We currently use labels which are added within a scoped span using the [`metrics_tracing_context`](https://docs.rs/metrics-tracing-context/latest/metrics_tracing_context/) crate. To make post-processing easier, we have the following conventions:
+
+- The `group` label should be the top level scope for all proofs which can be proven in parallel in an aggregation tree.
+
+The `openvm-sdk` crate applies the following additional labeling conventions:
+
+- For App proofs, the `group` label is set to `app_proof` or the `program_name: String` set in the `AppProver`.
+  - App proofs are distinguished by the `segment` label, which is set to the segment index.
+- The leaf aggregation layer has `group = leaf`.
+  - Leaf proofs (each without continuations) are distinguished by the `idx` label, which is set to the leaf node index.
+- The internal aggregation layers have `group = internal.{hgt}` where `hgt` is the height within the aggregation tree (`hgt = 0` is the furthest from the root).
+  - Internal proofs (each without continuations) are distinguished by the `idx` label, which is set to the internal node index. The internal node index is not reset across internal layers, but it is separate from the leaf node index.
+- The root aggregation layer has `group = root`.
+  - There is only a single root proof, but we add `idx = 0` for uniformity.