From 7395b95672e94318de695dc0fc71863ef31b2e30 Mon Sep 17 00:00:00 2001
From: Cody Gunton <codygunton@gmail.com>
Date: Mon, 26 Aug 2024 13:35:00 -0400
Subject: [PATCH] refactor(Protogalaxy): Isolate some state and clarify skipped
 zero computation (#8173)

Some steps toward clarifying state during Protogalaxy proof
construction:
 - Move accumulators into the class that contains state.
- Reduce size of Prover header. Move internal functions into a purely
static class. This accounts for most of the diff.
- Clarify the known-zero-value while removing loose coupling of template
parameters.

The next step will be to reduce the amount of state in ProverInstances.
---
 .../relations_bench/relations.bench.cpp       |   6 +-
 .../protogalaxy/combiner.test.cpp             |  36 +-
 .../protogalaxy/protogalaxy.test.cpp          |  19 +-
 .../protogalaxy/protogalaxy_prover.hpp        | 325 +-----------
 .../protogalaxy/protogalaxy_prover_impl.hpp   | 256 +--------
 .../protogalaxy_prover_internal.hpp           | 501 ++++++++++++++++++
 .../protogalaxy/protogalaxy_prover_mega.cpp   |   1 +
 .../protogalaxy/protogalaxy_prover_ultra.cpp  |   1 +
 .../sumcheck/instance/instances.hpp           |   1 +
 9 files changed, 559 insertions(+), 587 deletions(-)
 create mode 100644 barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp
index f735d2cfb19..b6545d90245 100644
--- a/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp
+++ b/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp
@@ -1,7 +1,8 @@
 #include "barretenberg/eccvm/eccvm_flavor.hpp"
-#include "barretenberg/protogalaxy/protogalaxy_prover.hpp"
+#include "barretenberg/protogalaxy/protogalaxy_prover_internal.hpp" // just for an alias; should perhaps move to prover
 #include "barretenberg/stdlib_circuit_builders/mega_flavor.hpp"
 #include "barretenberg/stdlib_circuit_builders/ultra_flavor.hpp"
+#include "barretenberg/sumcheck/instance/instances.hpp"
 #include "barretenberg/translator_vm/translator_flavor.hpp"
 #include <benchmark/benchmark.h>
 
@@ -53,8 +54,7 @@ template <typename Flavor, typename Relation> void execute_relation_for_univaria
 template <typename Flavor, typename Relation> void execute_relation_for_pg_univariates(::benchmark::State& state)
 {
     using ProverInstances = ProverInstances_<Flavor>;
-    using ProtoGalaxyProver = ProtoGalaxyProver_<ProverInstances>;
-    using Input = ProtoGalaxyProver::ExtendedUnivariates;
+    using Input = ProtogalaxyProverInternal<ProverInstances>::ExtendedUnivariates;
     using Accumulator = typename Relation::template ProtogalaxyTupleOfUnivariatesOverSubrelations<ProverInstances::NUM>;
 
     execute_relation<Flavor, Relation, Input, Accumulator>(state);
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp b/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp
index 179e81b519e..6d29ac6a4dc 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp
@@ -1,7 +1,6 @@
 #include "barretenberg/honk/utils/testing.hpp"
-#include "barretenberg/polynomials/pow.hpp"
 #include "barretenberg/protogalaxy/protogalaxy_prover.hpp"
-#include "barretenberg/relations/relation_parameters.hpp"
+#include "barretenberg/protogalaxy/protogalaxy_prover_internal.hpp"
 #include "barretenberg/relations/ultra_arithmetic_relation.hpp"
 #include "barretenberg/stdlib_circuit_builders/ultra_flavor.hpp"
 #include "barretenberg/sumcheck/instance/instances.hpp"
@@ -21,6 +20,7 @@ TEST(Protogalaxy, CombinerOn2Instances)
     using ProverInstance = ProverInstance_<Flavor>;
     using ProverInstances = ProverInstances_<Flavor, NUM_INSTANCES>;
     using ProtoGalaxyProver = ProtoGalaxyProver_<ProverInstances>;
+    using Fun = ProtogalaxyProverInternal<ProverInstances>;
 
     const auto restrict_to_standard_arithmetic_relation = [](auto& polys) {
         std::fill(polys.q_arith.begin(), polys.q_arith.end(), 1);
@@ -56,7 +56,7 @@ TEST(Protogalaxy, CombinerOn2Instances)
             ProverInstances instances{ instance_data };
             instances.alphas.fill(bb::Univariate<FF, 12>(FF(0))); // focus on the arithmetic relation only
             auto pow_polynomial = PowPolynomial(std::vector<FF>{ 2 });
-            auto result = prover.compute_combiner</*OptimisationEnabled=*/false>(instances, pow_polynomial);
+            auto result = Fun::compute_combiner(instances, pow_polynomial, prover.state.univariate_accumulators);
             // The expected_result values are computed by running the python script combiner_example_gen.py
             auto expected_result = Univariate<FF, 12>(std::array<FF, 12>{ 9704UL,
                                                                           13245288UL,
@@ -134,8 +134,9 @@ TEST(Protogalaxy, CombinerOn2Instances)
                       0    0    0    0    0    0    0              0    0    6   18   36   60   90      */
 
             auto pow_polynomial = PowPolynomial(std::vector<FF>{ 2 });
-            auto result = prover.compute_combiner</*OptimisationEnabled=*/false>(instances, pow_polynomial);
-            auto optimised_result = prover.compute_combiner(instances, pow_polynomial);
+            auto result = Fun::compute_combiner(instances, pow_polynomial, prover.state.univariate_accumulators);
+            auto optimised_result =
+                Fun::compute_combiner(instances, pow_polynomial, prover.state.optimised_univariate_accumulators);
             auto expected_result =
                 Univariate<FF, 12>(std::array<FF, 12>{ 0, 0, 12, 36, 72, 120, 180, 252, 336, 432, 540, 660 });
 
@@ -154,6 +155,7 @@ TEST(Protogalaxy, CombinerOptimizationConsistency)
     using ProverInstance = ProverInstance_<Flavor>;
     using ProverInstances = ProverInstances_<Flavor, NUM_INSTANCES>;
     using ProtoGalaxyProver = ProtoGalaxyProver_<ProverInstances>;
+    using Fun = ProtogalaxyProverInternal<ProverInstances>;
     using UltraArithmeticRelation = UltraArithmeticRelation<FF>;
 
     constexpr size_t UNIVARIATE_LENGTH = 12;
@@ -252,8 +254,9 @@ TEST(Protogalaxy, CombinerOptimizationConsistency)
                 precomputed_result[idx] = std::get<0>(accumulator)[0];
             }
             auto expected_result = Univariate<FF, UNIVARIATE_LENGTH>(precomputed_result);
-            auto result = prover.compute_combiner</*OptimisationEnabled=*/false>(instances, pow_polynomial);
-            auto optimised_result = prover.compute_combiner(instances, pow_polynomial);
+            auto result = Fun::compute_combiner(instances, pow_polynomial, prover.state.univariate_accumulators);
+            auto optimised_result =
+                Fun::compute_combiner(instances, pow_polynomial, prover.state.optimised_univariate_accumulators);
 
             EXPECT_EQ(result, expected_result);
             EXPECT_EQ(optimised_result, expected_result);
@@ -320,8 +323,9 @@ TEST(Protogalaxy, CombinerOptimizationConsistency)
                       0    0    0    0    0    0    0              0    0    6   18   36   60   90      */
 
             auto pow_polynomial = PowPolynomial(std::vector<FF>{ 2 });
-            auto result = prover.compute_combiner</*OptimisationEnabled=*/false>(instances, pow_polynomial);
-            auto optimised_result = prover.compute_combiner(instances, pow_polynomial);
+            auto result = Fun::compute_combiner(instances, pow_polynomial, prover.state.univariate_accumulators);
+            auto optimised_result =
+                Fun::compute_combiner(instances, pow_polynomial, prover.state.optimised_univariate_accumulators);
             auto expected_result =
                 Univariate<FF, 12>(std::array<FF, 12>{ 0, 0, 12, 36, 72, 120, 180, 252, 336, 432, 540, 660 });
 
@@ -333,15 +337,16 @@ TEST(Protogalaxy, CombinerOptimizationConsistency)
     run_test(false);
 };
 
-// Tests a combiner on 4 instances, note currently we don't plan
-// to fold with num instances > 2, this would require an additional explicit instantiation in
-// protogalaxy_prover_ultra.cpp. Currently, we rather save the compile time.
-// TEST(Protogalaxy, CombinerOn4Instances)
+// // Tests a combiner on 4 instances, note currently we don't plan
+// // to fold with num instances > 2, this would require an additional explicit instantiation in
+// // protogalaxy_prover_ultra.cpp. Currently, we rather save the compile time.
+// TEST(Protogalaxy, DISABLED_CombinerOn4Instances)
 // {
 //     constexpr size_t NUM_INSTANCES = 4;
 //     using ProverInstance = ProverInstance_<Flavor>;
 //     using ProverInstances = ProverInstances_<Flavor, NUM_INSTANCES>;
 //     using ProtoGalaxyProver = ProtoGalaxyProver_<ProverInstances>;
+//     using Fun = ProtogalaxyProverInternal<ProverInstances>;
 
 //     const auto zero_all_selectors = [](auto& polys) {
 //         std::fill(polys.q_arith.begin(), polys.q_arith.end(), 0);
@@ -376,8 +381,9 @@ TEST(Protogalaxy, CombinerOptimizationConsistency)
 //         zero_all_selectors(instances[3]->proving_key.polynomials);
 
 //         auto pow_polynomial = PowPolynomial(std::vector<FF>{ 2 });
-//         auto result = prover.compute_combiner</*OptimisationEnabled=*/false>(instances, pow_polynomial);
-//         auto optimised_result = prover.compute_combiner(instances, pow_polynomial);
+//         auto result = Fun::compute_combiner(instances, pow_polynomial, prover.state.univariate_accumulators);
+//         auto optimised_result =
+//             Fun::compute_combiner(instances, pow_polynomial, prover.state.optimised_univariate_accumulators);
 //         std::array<FF, 40> zeroes;
 //         std::fill(zeroes.begin(), zeroes.end(), 0);
 //         auto expected_result = Univariate<FF, 40>(zeroes);
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy.test.cpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy.test.cpp
index 4a700d6bd16..a8d33f910bd 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy.test.cpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy.test.cpp
@@ -1,6 +1,7 @@
 #include "barretenberg/goblin/mock_circuits.hpp"
 #include "barretenberg/polynomials/pow.hpp"
 #include "barretenberg/protogalaxy/protogalaxy_prover.hpp"
+#include "barretenberg/protogalaxy/protogalaxy_prover_internal.hpp"
 #include "barretenberg/protogalaxy/protogalaxy_verifier.hpp"
 #include "barretenberg/protogalaxy/prover_verifier_shared.hpp"
 #include "barretenberg/stdlib_circuit_builders/mock_circuits.hpp"
@@ -37,6 +38,7 @@ template <typename Flavor> class ProtoGalaxyTests : public testing::Test {
     using DeciderVerifier = DeciderVerifier_<Flavor>;
     using FoldingProver = ProtoGalaxyProver_<ProverInstances>;
     using FoldingVerifier = ProtoGalaxyVerifier_<VerifierInstances>;
+    using Fun = ProtogalaxyProverInternal<ProverInstances>;
 
     using TupleOfInstances =
         std::tuple<std::vector<std::shared_ptr<ProverInstance>>, std::vector<std::shared_ptr<VerifierInstance>>>;
@@ -93,7 +95,7 @@ template <typename Flavor> class ProtoGalaxyTests : public testing::Test {
     static void check_accumulator_target_sum_manual(std::shared_ptr<ProverInstance>& accumulator, bool expected_result)
     {
         auto instance_size = accumulator->proving_key.circuit_size;
-        auto expected_honk_evals = ProtoGalaxyProver::compute_full_honk_evaluations(
+        auto expected_honk_evals = Fun::compute_full_honk_evaluations(
             accumulator->proving_key.polynomials, accumulator->alphas, accumulator->relation_parameters);
         // Construct pow(\vec{betas*}) as in the paper
         auto expected_pows = PowPolynomial(accumulator->gate_challenges);
@@ -146,7 +148,7 @@ template <typename Flavor> class ProtoGalaxyTests : public testing::Test {
         for (auto& alpha : instance->alphas) {
             alpha = FF::random_element();
         }
-        auto full_honk_evals = ProtoGalaxyProver::compute_full_honk_evaluations(
+        auto full_honk_evals = Fun::compute_full_honk_evaluations(
             instance->proving_key.polynomials, instance->alphas, instance->relation_parameters);
 
         // Evaluations should be 0 for valid circuit
@@ -165,7 +167,7 @@ template <typename Flavor> class ProtoGalaxyTests : public testing::Test {
         std::vector<FF> betas = { FF(5), FF(8), FF(11) };
         std::vector<FF> deltas = { FF(2), FF(4), FF(8) };
         std::vector<FF> full_honk_evaluations = { FF(1), FF(1), FF(1), FF(1), FF(1), FF(1), FF(1), FF(1) };
-        auto perturbator = ProtoGalaxyProver::construct_perturbator_coefficients(betas, deltas, full_honk_evaluations);
+        auto perturbator = Fun::construct_perturbator_coefficients(betas, deltas, full_honk_evaluations);
         std::vector<FF> expected_values = { FF(648), FF(936), FF(432), FF(64) };
         EXPECT_EQ(perturbator.size(), 4); // log(instance_size) + 1
         for (size_t i = 0; i < perturbator.size(); i++) {
@@ -195,8 +197,7 @@ template <typename Flavor> class ProtoGalaxyTests : public testing::Test {
             alpha = FF::random_element();
         }
 
-        auto full_honk_evals =
-            ProtoGalaxyProver::compute_full_honk_evaluations(full_polynomials, alphas, relation_parameters);
+        auto full_honk_evals = Fun::compute_full_honk_evaluations(full_polynomials, alphas, relation_parameters);
         std::vector<FF> betas(log_instance_size);
         for (size_t idx = 0; idx < log_instance_size; idx++) {
             betas[idx] = FF::random_element();
@@ -220,7 +221,7 @@ template <typename Flavor> class ProtoGalaxyTests : public testing::Test {
         accumulator->alphas = alphas;
 
         auto deltas = compute_round_challenge_pows(log_instance_size, FF::random_element());
-        auto perturbator = ProtoGalaxyProver::compute_perturbator(accumulator, deltas);
+        auto perturbator = Fun::compute_perturbator(accumulator, deltas);
 
         // Ensure the constant coefficient of the perturbator is equal to the target sum as indicated by the paper
         EXPECT_EQ(perturbator[0], target_sum);
@@ -235,7 +236,7 @@ template <typename Flavor> class ProtoGalaxyTests : public testing::Test {
     {
         auto compressed_perturbator = FF(2); // F(\alpha) in the paper
         auto combiner = bb::Univariate<FF, 12>(std::array<FF, 12>{ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 });
-        auto combiner_quotient = ProtoGalaxyProver::compute_combiner_quotient(compressed_perturbator, combiner);
+        auto combiner_quotient = Fun::compute_combiner_quotient(compressed_perturbator, combiner);
 
         // K(i) = (G(i) - ( L_0(i) * F(\alpha)) / Z(i), i = {2,.., 13} for ProverInstances::NUM = 2
         // K(i) = (G(i) - (1 - i) * F(\alpha)) / i * (i - 1)
@@ -274,7 +275,7 @@ template <typename Flavor> class ProtoGalaxyTests : public testing::Test {
         instance2->relation_parameters.eta = 3;
 
         ProverInstances instances{ { instance1, instance2 } };
-        ProtoGalaxyProver::combine_relation_parameters(instances);
+        Fun::combine_relation_parameters(instances);
 
         bb::Univariate<FF, 11> expected_eta{ { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21 } };
         EXPECT_EQ(instances.relation_parameters.eta, expected_eta);
@@ -301,7 +302,7 @@ template <typename Flavor> class ProtoGalaxyTests : public testing::Test {
         instance2->alphas.fill(4);
 
         ProverInstances instances{ { instance1, instance2 } };
-        ProtoGalaxyProver::combine_alpha(instances);
+        Fun::combine_alpha(instances);
 
         bb::Univariate<FF, 12> expected_alpha{ { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24 } };
         for (const auto& alpha : instances.alphas) {
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
index 56d020074a0..f8e43112e16 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
@@ -1,26 +1,24 @@
 #pragma once
-#include "barretenberg/common/op_count.hpp"
-#include "barretenberg/common/thread.hpp"
-#include "barretenberg/flavor/flavor.hpp"
-#include "barretenberg/polynomials/pow.hpp"
 #include "barretenberg/polynomials/univariate.hpp"
 #include "barretenberg/protogalaxy/folding_result.hpp"
-#include "barretenberg/relations/relation_parameters.hpp"
-#include "barretenberg/relations/utils.hpp"
-#include "barretenberg/stdlib_circuit_builders/mega_flavor.hpp"
-#include "barretenberg/stdlib_circuit_builders/ultra_flavor.hpp"
-#include "barretenberg/sumcheck/instance/instances.hpp"
 
 namespace bb {
 template <class ProverInstances_> struct ProtogalaxyProofConstructionState {
     using FF = typename ProverInstances_::FF;
     using ProverInstance = typename ProverInstances_::Instance;
+    using Flavor = typename ProverInstances_::Flavor;
+    using TupleOfTuplesOfUnivariates =
+        typename Flavor::template ProtogalaxyTupleOfTuplesOfUnivariates<ProverInstances_::NUM>;
+    using OptimisedTupleOfTuplesOfUnivariates =
+        typename Flavor::template OptimisedProtogalaxyTupleOfTuplesOfUnivariates<ProverInstances_::NUM>;
 
     std::shared_ptr<ProverInstance> accumulator;
     LegacyPolynomial<FF> perturbator;
     std::vector<FF> deltas;
     Univariate<FF, ProverInstances_::BATCHED_EXTENDED_LENGTH, ProverInstances_::NUM> combiner_quotient;
     FF compressed_perturbator;
+    OptimisedTupleOfTuplesOfUnivariates optimised_univariate_accumulators;
+    TupleOfTuplesOfUnivariates univariate_accumulators;
     FoldingResult<typename ProverInstances_::Flavor> result;
 };
 
@@ -31,45 +29,7 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
     using Transcript = typename Flavor::Transcript;
     using FF = typename Flavor::FF;
     using Instance = typename ProverInstances::Instance;
-    using Utils = bb::RelationUtils<Flavor>;
-    using RowEvaluations = typename Flavor::AllValues;
-    using ProvingKey = typename Flavor::ProvingKey;
-    using ProverPolynomials = typename Flavor::ProverPolynomials;
-    using Relations = typename Flavor::Relations;
-    using RelationSeparator = typename Flavor::RelationSeparator;
-    using CombinedRelationSeparator = typename ProverInstances::RelationSeparator;
-    using VerificationKey = typename Flavor::VerificationKey;
     using CommitmentKey = typename Flavor::CommitmentKey;
-    using WitnessCommitments = typename Flavor::WitnessCommitments;
-    using CommitmentLabels = typename Flavor::CommitmentLabels;
-    using Commitment = typename Flavor::Commitment;
-
-    using BaseUnivariate = Univariate<FF, ProverInstances::NUM>;
-    // The length of ExtendedUnivariate is the largest length (==max_relation_degree + 1) of a univariate polynomial
-    // obtained by composing a relation with folded instance + relation parameters .
-    using ExtendedUnivariate = Univariate<FF, (Flavor::MAX_TOTAL_RELATION_LENGTH - 1) * (ProverInstances::NUM - 1) + 1>;
-    // Same as ExtendedUnivariate, but uses optimised univariates which skip redundant computation in optimistic cases
-    // (when we know that the evaluation of all relations is 0 on a particular index, for example)
-    using OptimisedExtendedUnivariate =
-        Univariate<FF,
-                   (Flavor::MAX_TOTAL_RELATION_LENGTH - 1) * (ProverInstances::NUM - 1) + 1,
-                   0,
-                   ProverInstances::NUM - 1>;
-    // Represents the total length of the combiner univariate, obtained by combining the already folded relations with
-    // the folded relation batching challenge.
-    using ExtendedUnivariateWithRandomization =
-        Univariate<FF,
-                   (Flavor::MAX_TOTAL_RELATION_LENGTH - 1 + ProverInstances::NUM - 1) * (ProverInstances::NUM - 1) + 1>;
-    using ExtendedUnivariates = typename Flavor::template ProverUnivariates<ExtendedUnivariate::LENGTH>;
-    using OptimisedExtendedUnivariates =
-        typename Flavor::template OptimisedProverUnivariates<ExtendedUnivariate::LENGTH,
-                                                             /* SKIP_COUNT= */ ProverInstances::NUM - 1>;
-
-    using TupleOfTuplesOfUnivariates =
-        typename Flavor::template ProtogalaxyTupleOfTuplesOfUnivariates<ProverInstances::NUM>;
-    using OptimisedTupleOfTuplesOfUnivariates =
-        typename Flavor::template OptimisedProtogalaxyTupleOfTuplesOfUnivariates<ProverInstances::NUM>;
-    using RelationEvaluations = typename Flavor::TupleOfArraysOfValues;
 
     static constexpr size_t NUM_SUBRELATIONS = ProverInstances::NUM_SUBRELATIONS;
 
@@ -114,277 +74,6 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
     // FoldingParameters set and be the result of a previous round of folding.
     std::shared_ptr<Instance> get_accumulator() { return instances[0]; }
 
-    /**
-     * @brief Compute the values of the full Honk relation at each row in the execution trace, representing f_i(ω) in
-     * the ProtoGalaxy paper, given the evaluations of all the prover polynomials and \vec{α} (the batching challenges
-     * that help establishing each subrelation is independently valid in Honk - from the Plonk paper, DO NOT confuse
-     * with α in ProtoGalaxy).
-     *
-     * @details When folding Mega instances, one of the relations is linearly dependent. We define such relations
-     * as acting on the entire execution trace and hence requiring to be accumulated separately as we iterate over each
-     * row. At the end of the function, the linearly dependent contribution is accumulated at index 0 representing the
-     * sum f_0(ω) + α_j*g(ω) where f_0 represents the full honk evaluation at row 0, g(ω) is the linearly dependent
-     * subrelation and α_j is its corresponding batching challenge.
-     */
-    static std::vector<FF> compute_full_honk_evaluations(const ProverPolynomials& instance_polynomials,
-                                                         const RelationSeparator& alpha,
-                                                         const RelationParameters<FF>& relation_parameters);
-
-    /**
-     * @brief  Recursively compute the parent nodes of each level in the tree, starting from the leaves. Note that at
-     * each level, the resulting parent nodes will be polynomials of degree (level+1) because we multiply by an
-     * additional factor of X.
-     */
-    static std::vector<FF> construct_coefficients_tree(const std::vector<FF>& betas,
-                                                       const std::vector<FF>& deltas,
-                                                       const std::vector<std::vector<FF>>& prev_level_coeffs,
-                                                       size_t level = 1);
-
-    /**
-     * @brief We construct the coefficients of the perturbator polynomial in O(n) time following the technique in
-     * Claim 4.4. Consider a binary tree whose leaves are the evaluations of the full Honk relation at each row in the
-     * execution trace. The subsequent levels in the tree are constructed using the following technique: At level i in
-     * the tree, label the branch connecting the left node n_l to its parent by 1 and for the right node n_r by β_i +
-     * δ_i X. The value of the parent node n will be constructed as n = n_l + n_r * (β_i + δ_i X). Recurse over each
-     * layer until the root is reached which will correspond to the perturbator polynomial F(X).
-     * TODO(https://github.com/AztecProtocol/barretenberg/issues/745): make computation of perturbator more memory
-     * efficient, operate in-place and use std::resize; add multithreading
-     */
-    static std::vector<FF> construct_perturbator_coefficients(const std::vector<FF>& betas,
-                                                              const std::vector<FF>& deltas,
-                                                              const std::vector<FF>& full_honk_evaluations);
-
-    /**
-     * @brief Construct the power perturbator polynomial F(X) in coefficient form from the accumulator, representing the
-     * relaxed instance.
-     *
-     *
-     */
-    static LegacyPolynomial<FF> compute_perturbator(std::shared_ptr<Instance> accumulator,
-                                                    const std::vector<FF>& deltas);
-
-    OptimisedTupleOfTuplesOfUnivariates optimised_univariate_accumulators;
-    TupleOfTuplesOfUnivariates univariate_accumulators;
-
-    /**
-     * @brief Prepare a univariate polynomial for relation execution in one step of the main loop in folded instance
-     * construction.
-     * @details For a fixed prover polynomial index, extract that polynomial from each instance in Instances. From
-     *each polynomial, extract the value at row_idx. Use these values to create a univariate polynomial, and then
-     *extend (i.e., compute additional evaluations at adjacent domain values) as needed.
-     * @todo TODO(https://github.com/AztecProtocol/barretenberg/issues/751) Optimize memory
-     *
-     *
-     */
-
-    template <size_t skip_count = 0>
-    void extend_univariates(
-        std::conditional_t<skip_count != 0, OptimisedExtendedUnivariates, ExtendedUnivariates>& extended_univariates,
-        const ProverInstances& instances,
-        const size_t row_idx)
-    {
-        auto base_univariates = instances.template row_to_univariates<skip_count>(row_idx);
-        for (auto [extended_univariate, base_univariate] : zip_view(extended_univariates.get_all(), base_univariates)) {
-            extended_univariate = base_univariate.template extend_to<ExtendedUnivariate::LENGTH, skip_count>();
-        }
-    }
-
-    /**
-     * @brief Add the value of each relation over univariates to an appropriate accumulator
-     *
-     * @tparam TupleOfTuplesOfUnivariates_ A tuple of univariate accumulators, where the univariates may be optimized to
-     * avoid computation on some indices.
-     * @tparam ExtendedUnivariates_ T
-     * @tparam Parameters relation parameters type
-     * @tparam relation_idx The index of the relation
-     * @param univariate_accumulators
-     * @param extended_univariates
-     * @param relation_parameters
-     * @param scaling_factor
-     */
-    template <typename TupleOfTuplesOfUnivariates_,
-              typename ExtendedUnivariates_,
-              typename Parameters,
-              size_t relation_idx = 0>
-    void accumulate_relation_univariates(TupleOfTuplesOfUnivariates_& univariate_accumulators,
-                                         const ExtendedUnivariates_& extended_univariates,
-                                         const Parameters& relation_parameters,
-                                         const FF& scaling_factor)
-    {
-        using Relation = std::tuple_element_t<relation_idx, Relations>;
-
-        //  Check if the relation is skippable to speed up accumulation
-        if constexpr (!isSkippable<Relation, decltype(extended_univariates)>) {
-            // If not, accumulate normally
-            Relation::accumulate(std::get<relation_idx>(univariate_accumulators),
-                                 extended_univariates,
-                                 relation_parameters,
-                                 scaling_factor);
-        } else {
-            // If so, only compute the contribution if the relation is active
-            if (!Relation::skip(extended_univariates)) {
-                Relation::accumulate(std::get<relation_idx>(univariate_accumulators),
-                                     extended_univariates,
-                                     relation_parameters,
-                                     scaling_factor);
-            }
-        }
-
-        // Repeat for the next relation.
-        if constexpr (relation_idx + 1 < Flavor::NUM_RELATIONS) {
-            accumulate_relation_univariates<TupleOfTuplesOfUnivariates_,
-                                            ExtendedUnivariates_,
-                                            Parameters,
-                                            relation_idx + 1>(
-                univariate_accumulators, extended_univariates, relation_parameters, scaling_factor);
-        }
-    }
-
-    /**
-     * @brief Compute the combiner polynomial $G$ in the Protogalaxy paper
-     * @details We have implemented an optimization that (eg in the case where we fold one instance-witness pair at a
-     * time) assumes the value G(1) is 0, which is true in the case where the witness to be folded is valid.
-     * @todo (https://github.com/AztecProtocol/barretenberg/issues/968) Make combiner tests better
-     *
-     * @tparam skip_zero_computations whether to use the the optimization that skips computing zero.
-     * @param instances
-     * @param pow_betas
-     * @return ExtendedUnivariateWithRandomization
-     */
-    template <bool skip_zero_computations = true>
-    ExtendedUnivariateWithRandomization compute_combiner(const ProverInstances& instances, PowPolynomial<FF>& pow_betas)
-    {
-        BB_OP_COUNT_TIME();
-        size_t common_instance_size = instances[0]->proving_key.circuit_size;
-        pow_betas.compute_values(instances[0]->proving_key.log_circuit_size);
-        // Determine number of threads for multithreading.
-        // Note: Multithreading is "on" for every round but we reduce the number of threads from the max available based
-        // on a specified minimum number of iterations per thread. This eventually leads to the use of a
-        // single thread. For now we use a power of 2 number of threads simply to ensure the round size is evenly
-        // divided.
-        size_t max_num_threads = get_num_cpus_pow2(); // number of available threads (power of 2)
-        size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread
-        size_t desired_num_threads = common_instance_size / min_iterations_per_thread;
-        size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified
-        num_threads = num_threads > 0 ? num_threads : 1;                     // ensure num threads is >= 1
-        size_t iterations_per_thread = common_instance_size / num_threads;   // actual iterations per thread
-
-        // Univariates are optimised for usual PG, but we need the unoptimised version for tests (it's a version that
-        // doesn't skip computation), so we need to define types depending on the template instantiation
-        using ThreadAccumulators =
-            std::conditional_t<skip_zero_computations, OptimisedTupleOfTuplesOfUnivariates, TupleOfTuplesOfUnivariates>;
-        using ExtendedUnivatiatesType =
-            std::conditional_t<skip_zero_computations, OptimisedExtendedUnivariates, ExtendedUnivariates>;
-
-        // Construct univariate accumulator containers; one per thread
-        std::vector<ThreadAccumulators> thread_univariate_accumulators(num_threads);
-        for (auto& accum : thread_univariate_accumulators) {
-            // just normal relation lengths
-            Utils::zero_univariates(accum);
-        }
-
-        // Construct extended univariates containers; one per thread
-        std::vector<ExtendedUnivatiatesType> extended_univariates;
-        extended_univariates.resize(num_threads);
-
-        // Accumulate the contribution from each sub-relation
-        parallel_for(num_threads, [&](size_t thread_idx) {
-            size_t start = thread_idx * iterations_per_thread;
-            size_t end = (thread_idx + 1) * iterations_per_thread;
-
-            for (size_t idx = start; idx < end; idx++) {
-                // Instantiate univariates, possibly with skipping toto ignore computation in those indices (they are
-                // still available for skipping relations, but all derived univariate will ignore those evaluations)
-                // No need to initialise extended_univariates to 0, as it's assigned to.
-                constexpr size_t skip_count = skip_zero_computations ? ProverInstances::NUM - 1 : 0;
-                extend_univariates<skip_count>(extended_univariates[thread_idx], instances, idx);
-
-                FF pow_challenge = pow_betas[idx];
-
-                // Accumulate the i-th row's univariate contribution. Note that the relation parameters passed to
-                // this function have already been folded. Moreover, linear-dependent relations that act over the
-                // entire execution trace rather than on rows, will not be multiplied by the pow challenge.
-                if constexpr (skip_zero_computations) {
-                    accumulate_relation_univariates(
-                        thread_univariate_accumulators[thread_idx],
-                        extended_univariates[thread_idx],
-                        instances.optimised_relation_parameters, // these parameters have already been folded
-                        pow_challenge);
-                } else {
-                    accumulate_relation_univariates(
-                        thread_univariate_accumulators[thread_idx],
-                        extended_univariates[thread_idx],
-                        instances.relation_parameters, // these parameters have already been folded
-                        pow_challenge);
-                }
-            }
-        });
-        const auto batch_univariates = [&](auto& possibly_optimised_univariate_accumulators) {
-            Utils::zero_univariates(possibly_optimised_univariate_accumulators);
-            // Accumulate the per-thread univariate accumulators into a single set of accumulators
-            for (auto& accumulators : thread_univariate_accumulators) {
-                Utils::add_nested_tuples(possibly_optimised_univariate_accumulators, accumulators);
-            }
-
-            if constexpr (skip_zero_computations) { // Convert from optimised version to non-optimised
-                deoptimise_univariates(possibly_optimised_univariate_accumulators, univariate_accumulators);
-            };
-            //  Batch the univariate contributions from each sub-relation to obtain the round univariate
-            return batch_over_relations(univariate_accumulators, instances.alphas);
-        };
-
-        if constexpr (skip_zero_computations) { // Convert from optimised version to non-optimised
-            return batch_univariates(optimised_univariate_accumulators);
-        } else {
-            return batch_univariates(univariate_accumulators);
-        }
-    }
-
-    /**
-     * @brief Convert univariates from optimised form to regular
-     *
-     * @details We need to convert before we batch relations, since optimised versions don't have enough information to
-     * extend the univariates to maximum length
-     *
-     * @param optimised_univariate_accumulators
-     * @param new_univariate_accumulators
-     */
-    static void deoptimise_univariates(const OptimisedTupleOfTuplesOfUnivariates& optimised_univariate_accumulators,
-                                       TupleOfTuplesOfUnivariates& new_univariate_accumulators
-
-    );
-
-    static ExtendedUnivariateWithRandomization batch_over_relations(TupleOfTuplesOfUnivariates& univariate_accumulators,
-                                                                    const CombinedRelationSeparator& alpha);
-
-    static std::pair<typename ProverInstances::FF, std::array<typename ProverInstances::FF, ProverInstances::NUM>>
-    _compute_vanishing_polynomial_and_lagranges(const FF& challenge);
-    /**
-     * @brief Compute the combiner quotient defined as $K$ polynomial in the paper.
-     *
-     * TODO(https://github.com/AztecProtocol/barretenberg/issues/764): generalize the computation of vanishing
-     * polynomials and Lagrange basis and use batch_invert.
-     *
-     */
-    static Univariate<FF, ProverInstances::BATCHED_EXTENDED_LENGTH, ProverInstances::NUM> compute_combiner_quotient(
-        FF compressed_perturbator, ExtendedUnivariateWithRandomization combiner);
-
-    /**
-     * @brief Combine each relation parameter, in part, from all the instances into univariates, used in the
-     * computation of combiner.
-     * @details For a given relation parameter type, extract that parameter from each instance, place the values in
-     * a univariate (i.e., sum them against an appropriate univariate Lagrange basis) and then extended as needed
-     * during the constuction of the combiner.
-     */
-    static void combine_relation_parameters(ProverInstances& instances);
-
-    /**
-     * @brief Combine the relation batching parameters (alphas) from each instance into a univariate, used in the
-     * computation of combiner.
-     *
-     */
-    static void combine_alpha(ProverInstances& instances);
-
     /**
      * @brief Compute the next accumulator (ϕ*, ω*, \vec{\beta*}, e*), send the public data ϕ*  and the folding
      * parameters
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_impl.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_impl.hpp
index d8c55c76aab..89fccacc250 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_impl.hpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_impl.hpp
@@ -1,218 +1,12 @@
 #pragma once
-#include "barretenberg/common/container.hpp"
 #include "barretenberg/common/op_count.hpp"
-#include "barretenberg/common/thread.hpp"
-#include "barretenberg/flavor/flavor.hpp"
+#include "barretenberg/protogalaxy/protogalaxy_prover_internal.hpp"
 #include "barretenberg/protogalaxy/prover_verifier_shared.hpp"
+#include "barretenberg/relations/relation_parameters.hpp"
 #include "barretenberg/ultra_honk/oink_prover.hpp"
 #include "protogalaxy_prover.hpp"
 
 namespace bb {
-// See protogalaxy_prover.hpp for details
-template <class ProverInstances_>
-std::vector<typename ProtoGalaxyProver_<ProverInstances_>::FF> ProtoGalaxyProver_<
-    ProverInstances_>::compute_full_honk_evaluations(const ProverPolynomials& instance_polynomials,
-                                                     const RelationSeparator& alpha,
-                                                     const RelationParameters<FF>& relation_parameters)
-{
-    BB_OP_COUNT_TIME_NAME("ProtoGalaxyProver_::compute_full_honk_evaluations");
-    auto instance_size = instance_polynomials.get_polynomial_size();
-    std::vector<FF> full_honk_evaluations(instance_size);
-    std::vector<FF> linearly_dependent_contribution_accumulators = parallel_for_heuristic(
-        instance_size,
-        /*accumulator default*/ FF(0),
-        [&](size_t row, FF& linearly_dependent_contribution_accumulator) {
-            auto row_evaluations = instance_polynomials.get_row(row);
-            RelationEvaluations relation_evaluations;
-            Utils::zero_elements(relation_evaluations);
-
-            Utils::template accumulate_relation_evaluations<>(
-                row_evaluations, relation_evaluations, relation_parameters, FF(1));
-
-            auto output = FF(0);
-            auto running_challenge = FF(1);
-            Utils::scale_and_batch_elements(
-                relation_evaluations, alpha, running_challenge, output, linearly_dependent_contribution_accumulator);
-
-            full_honk_evaluations[row] = output;
-        },
-        thread_heuristics::ALWAYS_MULTITHREAD);
-    full_honk_evaluations[0] += sum(linearly_dependent_contribution_accumulators);
-    return full_honk_evaluations;
-}
-
-// See protogalaxy_prover.hpp for details
-template <class ProverInstances_>
-std::vector<typename ProtoGalaxyProver_<ProverInstances_>::FF> ProtoGalaxyProver_<
-    ProverInstances_>::construct_coefficients_tree(const std::vector<FF>& betas,
-                                                   const std::vector<FF>& deltas,
-                                                   const std::vector<std::vector<FF>>& prev_level_coeffs,
-                                                   size_t level)
-{
-    if (level == betas.size()) {
-        return prev_level_coeffs[0];
-    }
-
-    auto degree = level + 1;
-    auto prev_level_width = prev_level_coeffs.size();
-    std::vector<std::vector<FF>> level_coeffs(prev_level_width / 2, std::vector<FF>(degree + 1, 0));
-    parallel_for_heuristic(
-        prev_level_width / 2,
-        [&](size_t parent) {
-            size_t node = parent * 2;
-            std::copy(prev_level_coeffs[node].begin(), prev_level_coeffs[node].end(), level_coeffs[parent].begin());
-            for (size_t d = 0; d < degree; d++) {
-                level_coeffs[parent][d] += prev_level_coeffs[node + 1][d] * betas[level];
-                level_coeffs[parent][d + 1] += prev_level_coeffs[node + 1][d] * deltas[level];
-            }
-        },
-        /* overestimate */ thread_heuristics::FF_MULTIPLICATION_COST * degree * 3);
-    return construct_coefficients_tree(betas, deltas, level_coeffs, level + 1);
-}
-
-// See protogalaxy_prover.hpp for details
-template <class ProverInstances_>
-std::vector<typename ProtoGalaxyProver_<ProverInstances_>::FF> ProtoGalaxyProver_<
-    ProverInstances_>::construct_perturbator_coefficients(const std::vector<FF>& betas,
-                                                          const std::vector<FF>& deltas,
-                                                          const std::vector<FF>& full_honk_evaluations)
-{
-    auto width = full_honk_evaluations.size();
-    std::vector<std::vector<FF>> first_level_coeffs(width / 2, std::vector<FF>(2, 0));
-    parallel_for_heuristic(
-        width / 2,
-        [&](size_t parent) {
-            size_t node = parent * 2;
-            first_level_coeffs[parent][0] = full_honk_evaluations[node] + full_honk_evaluations[node + 1] * betas[0];
-            first_level_coeffs[parent][1] = full_honk_evaluations[node + 1] * deltas[0];
-        },
-        /* overestimate */ thread_heuristics::FF_MULTIPLICATION_COST * 3);
-    return construct_coefficients_tree(betas, deltas, first_level_coeffs);
-}
-
-// See protogalaxy_prover.hpp for details
-template <class ProverInstances_>
-LegacyPolynomial<typename ProtoGalaxyProver_<ProverInstances_>::FF> ProtoGalaxyProver_<
-    ProverInstances_>::compute_perturbator(const std::shared_ptr<Instance> accumulator, const std::vector<FF>& deltas)
-{
-    BB_OP_COUNT_TIME();
-    auto full_honk_evaluations = compute_full_honk_evaluations(
-        accumulator->proving_key.polynomials, accumulator->alphas, accumulator->relation_parameters);
-    const auto betas = accumulator->gate_challenges;
-    assert(betas.size() == deltas.size());
-    auto coeffs = construct_perturbator_coefficients(betas, deltas, full_honk_evaluations);
-    return LegacyPolynomial<FF>(coeffs);
-}
-
-// See protogalaxy_prover.hpp for details
-template <class ProverInstances_>
-void ProtoGalaxyProver_<ProverInstances_>::deoptimise_univariates(
-    const OptimisedTupleOfTuplesOfUnivariates& optimised_univariate_accumulators,
-    TupleOfTuplesOfUnivariates& new_univariate_accumulators)
-{
-    auto deoptimise = [&]<size_t outer_idx, size_t inner_idx>(auto& element) {
-        auto& optimised_element = std::get<inner_idx>(std::get<outer_idx>(optimised_univariate_accumulators));
-        element = optimised_element.convert();
-    };
-
-    Utils::template apply_to_tuple_of_tuples<0, 0>(new_univariate_accumulators, deoptimise);
-}
-
-template <class ProverInstances_>
-ProtoGalaxyProver_<ProverInstances_>::ExtendedUnivariateWithRandomization ProtoGalaxyProver_<
-    ProverInstances_>::batch_over_relations(TupleOfTuplesOfUnivariates& univariate_accumulators,
-                                            const CombinedRelationSeparator& alpha)
-{
-    auto result = std::get<0>(std::get<0>(univariate_accumulators))
-                      .template extend_to<ProverInstances::BATCHED_EXTENDED_LENGTH>();
-    size_t idx = 0;
-    auto scale_and_sum = [&]<size_t outer_idx, size_t inner_idx>(auto& element) {
-        auto extended = element.template extend_to<ProverInstances::BATCHED_EXTENDED_LENGTH>();
-        extended *= alpha[idx];
-        result += extended;
-        idx++;
-    };
-
-    Utils::template apply_to_tuple_of_tuples<0, 1>(univariate_accumulators, scale_and_sum);
-    Utils::zero_univariates(univariate_accumulators);
-
-    return result;
-}
-
-// See protogalaxy_prover.hpp for details
-template <class ProverInstances_>
-Univariate<typename ProtoGalaxyProver_<ProverInstances_>::FF,
-           ProverInstances_::BATCHED_EXTENDED_LENGTH,
-           ProverInstances_::NUM>
-ProtoGalaxyProver_<ProverInstances_>::compute_combiner_quotient(const FF compressed_perturbator,
-                                                                ExtendedUnivariateWithRandomization combiner)
-{
-    std::array<FF, ProverInstances::BATCHED_EXTENDED_LENGTH - ProverInstances::NUM> combiner_quotient_evals = {};
-
-    constexpr FF inverse_two = FF(2).invert();
-    constexpr FF inverse_six = FF(6).invert();
-    for (size_t point = ProverInstances::NUM; point < combiner.size(); point++) {
-        auto idx = point - ProverInstances::NUM;
-        FF lagrange_0;
-        FF vanishing_polynomial;
-        if constexpr (ProverInstances::NUM == 2) {
-            lagrange_0 = FF(1) - FF(point);
-            vanishing_polynomial = FF(point) * (FF(point) - 1);
-        } else if constexpr (ProverInstances::NUM == 3) {
-            lagrange_0 = (FF(1) - FF(point)) * (FF(2) - FF(point)) * inverse_two;
-            vanishing_polynomial = FF(point) * (FF(point) - 1) * (FF(point) - 2);
-        } else if constexpr (ProverInstances::NUM == 4) {
-            lagrange_0 = (FF(1) - FF(point)) * (FF(2) - FF(point)) * (FF(3) - FF(point)) * inverse_six;
-            vanishing_polynomial = FF(point) * (FF(point) - 1) * (FF(point) - 2) * (FF(point) - 3);
-        }
-        static_assert(ProverInstances::NUM < 5);
-
-        combiner_quotient_evals[idx] =
-            (combiner.value_at(point) - compressed_perturbator * lagrange_0) * vanishing_polynomial.invert();
-    }
-
-    Univariate<FF, ProverInstances::BATCHED_EXTENDED_LENGTH, ProverInstances::NUM> combiner_quotient(
-        combiner_quotient_evals);
-    return combiner_quotient;
-}
-
-// See protogalaxy_prover.hpp for details
-template <class ProverInstances_>
-void ProtoGalaxyProver_<ProverInstances_>::combine_relation_parameters(ProverInstances& instances)
-{
-    size_t param_idx = 0;
-    auto to_fold = instances.relation_parameters.get_to_fold();
-    auto to_fold_optimised = instances.optimised_relation_parameters.get_to_fold();
-    for (auto [folded_parameter, optimised_folded_parameter] : zip_view(to_fold, to_fold_optimised)) {
-        Univariate<FF, ProverInstances::NUM> tmp(0);
-        size_t instance_idx = 0;
-        for (auto& instance : instances) {
-            tmp.value_at(instance_idx) = instance->relation_parameters.get_to_fold()[param_idx];
-            instance_idx++;
-        }
-        folded_parameter = tmp.template extend_to<ProverInstances::EXTENDED_LENGTH>();
-        optimised_folded_parameter =
-            tmp.template extend_to<ProverInstances::EXTENDED_LENGTH, ProverInstances::NUM - 1>();
-        param_idx++;
-    }
-}
-
-// See protogalaxy_prover.hpp for details
-template <class ProverInstances_> void ProtoGalaxyProver_<ProverInstances_>::combine_alpha(ProverInstances& instances)
-{
-    size_t alpha_idx = 0;
-    for (auto& alpha : instances.alphas) {
-        Univariate<FF, ProverInstances::NUM> tmp;
-        size_t instance_idx = 0;
-        for (auto& instance : instances) {
-            tmp.value_at(instance_idx) = instance->alphas[alpha_idx];
-            instance_idx++;
-        }
-        alpha = tmp.template extend_to<ProverInstances::BATCHED_EXTENDED_LENGTH>();
-        alpha_idx++;
-    }
-}
-
 template <class ProverInstances>
 void ProtoGalaxyProver_<ProverInstances>::finalise_and_send_instance(std::shared_ptr<Instance> instance,
                                                                      const std::string& domain_separator)
@@ -248,34 +42,6 @@ template <class ProverInstances> void ProtoGalaxyProver_<ProverInstances>::prepa
  * TODO(https://github.com/AztecProtocol/barretenberg/issues/764): Generalize the vanishing polynomial formula
  * and the computation of Lagrange basis for k instances
  */
-template <class ProverInstances>
-std::pair<typename ProverInstances::FF, std::array<typename ProverInstances::FF, ProverInstances::NUM>>
-ProtoGalaxyProver_<ProverInstances>::_compute_vanishing_polynomial_and_lagranges(const FF& challenge)
-{
-    FF vanishing_polynomial_at_challenge;
-    std::array<FF, ProverInstances::NUM> lagranges;
-    constexpr FF inverse_two = FF(2).invert();
-
-    if constexpr (ProverInstances::NUM == 2) {
-        vanishing_polynomial_at_challenge = challenge * (challenge - FF(1));
-        lagranges = { FF(1) - challenge, challenge };
-    } else if constexpr (ProverInstances::NUM == 3) {
-        vanishing_polynomial_at_challenge = challenge * (challenge - FF(1)) * (challenge - FF(2));
-        lagranges = { (FF(1) - challenge) * (FF(2) - challenge) * inverse_two,
-                      challenge * (FF(2) - challenge),
-                      challenge * (challenge - FF(1)) / FF(2) };
-    } else if constexpr (ProverInstances::NUM == 4) {
-        constexpr FF inverse_six = FF(6).invert();
-        vanishing_polynomial_at_challenge = challenge * (challenge - FF(1)) * (challenge - FF(2)) * (challenge - FF(3));
-        lagranges = { (FF(1) - challenge) * (FF(2) - challenge) * (FF(3) - challenge) * inverse_six,
-                      challenge * (FF(2) - challenge) * (FF(3) - challenge) * inverse_two,
-                      challenge * (challenge - FF(1)) * (FF(3) - challenge) * inverse_two,
-                      challenge * (challenge - FF(1)) * (challenge - FF(2)) * inverse_six };
-    }
-    static_assert(ProverInstances::NUM < 5);
-
-    return { vanishing_polynomial_at_challenge, lagranges };
-}
 
 template <class ProverInstances>
 std::shared_ptr<typename ProverInstances::Instance> ProtoGalaxyProver_<ProverInstances>::compute_next_accumulator(
@@ -284,8 +50,10 @@ std::shared_ptr<typename ProverInstances::Instance> ProtoGalaxyProver_<ProverIns
     FF& challenge,
     const FF& compressed_perturbator)
 {
+    using Fun = ProtogalaxyProverInternal<ProverInstances>;
+
     auto combiner_quotient_at_challenge = combiner_quotient.evaluate(challenge);
-    auto [vanishing_polynomial_at_challenge, lagranges] = _compute_vanishing_polynomial_and_lagranges(challenge);
+    auto [vanishing_polynomial_at_challenge, lagranges] = Fun::compute_vanishing_polynomial_and_lagranges(challenge);
 
     // TODO(https://github.com/AztecProtocol/barretenberg/issues/881): bad pattern
     auto next_accumulator = std::move(instances[0]);
@@ -345,6 +113,8 @@ template <class ProverInstances> void ProtoGalaxyProver_<ProverInstances>::prepa
 template <class ProverInstances> void ProtoGalaxyProver_<ProverInstances>::perturbator_round()
 {
     BB_OP_COUNT_TIME_NAME("ProtoGalaxyProver_::perturbator_round");
+
+    using Fun = ProtogalaxyProverInternal<ProverInstances>;
     state.accumulator = get_accumulator();
     FF delta = transcript->template get_challenge<FF>("delta");
     state.deltas = compute_round_challenge_pows(state.accumulator->proving_key.log_circuit_size, delta);
@@ -352,7 +122,7 @@ template <class ProverInstances> void ProtoGalaxyProver_<ProverInstances>::pertu
         LegacyPolynomial<FF>(state.accumulator->proving_key.log_circuit_size + 1); // initialize to all zeros
     // compute perturbator only if this is not the first round and has an accumulator
     if (state.accumulator->is_accumulator) {
-        state.perturbator = compute_perturbator(state.accumulator, state.deltas);
+        state.perturbator = Fun::compute_perturbator(state.accumulator, state.deltas);
         // Prover doesn't send the constant coefficient of F because this is supposed to be equal to the target sum of
         // the accumulator which the folding verifier has from the previous iteration.
         for (size_t idx = 1; idx <= state.accumulator->proving_key.log_circuit_size; idx++) {
@@ -364,16 +134,18 @@ template <class ProverInstances> void ProtoGalaxyProver_<ProverInstances>::pertu
 template <class ProverInstances> void ProtoGalaxyProver_<ProverInstances>::combiner_quotient_round()
 {
     BB_OP_COUNT_TIME_NAME("ProtoGalaxyProver_::combiner_quotient_round");
+
+    using Fun = ProtogalaxyProverInternal<ProverInstances>;
     auto perturbator_challenge = transcript->template get_challenge<FF>("perturbator_challenge");
     instances.next_gate_challenges =
         update_gate_challenges(perturbator_challenge, state.accumulator->gate_challenges, state.deltas);
-    combine_relation_parameters(instances);
-    combine_alpha(instances);
+    Fun::combine_relation_parameters(instances);
+    Fun::combine_alpha(instances);
     auto pow_polynomial = PowPolynomial<FF>(instances.next_gate_challenges);
-    auto combiner = compute_combiner(instances, pow_polynomial);
+    auto combiner = Fun::compute_combiner(instances, pow_polynomial, state.optimised_univariate_accumulators);
 
     state.compressed_perturbator = state.perturbator.evaluate(perturbator_challenge);
-    state.combiner_quotient = compute_combiner_quotient(state.compressed_perturbator, combiner);
+    state.combiner_quotient = Fun::compute_combiner_quotient(state.compressed_perturbator, combiner);
 
     for (size_t idx = ProverInstances::NUM; idx < ProverInstances::BATCHED_EXTENDED_LENGTH; idx++) {
         transcript->send_to_verifier("combiner_quotient_" + std::to_string(idx), state.combiner_quotient.value_at(idx));
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp
new file mode 100644
index 00000000000..ecddd061331
--- /dev/null
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp
@@ -0,0 +1,501 @@
+#pragma once
+#include "barretenberg/common/container.hpp"
+#include "barretenberg/common/op_count.hpp"
+#include "barretenberg/common/thread.hpp"
+#include "barretenberg/protogalaxy/prover_verifier_shared.hpp"
+#include "barretenberg/relations/relation_parameters.hpp"
+#include "barretenberg/relations/relation_types.hpp"
+#include "barretenberg/relations/utils.hpp"
+#include "barretenberg/ultra_honk/oink_prover.hpp"
+
+namespace bb {
+
+/**
+ * @brief A purely static class (never add state to this!) consisting of functions used by the Protogalaxy prover.
+ *
+ * @tparam ProverInstances_
+ */
+template <class ProverInstances_> class ProtogalaxyProverInternal {
+  public:
+    using ProverInstances = ProverInstances_;
+    using Flavor = typename ProverInstances::Flavor;
+    using FF = typename Flavor::FF;
+    using Instance = typename ProverInstances::Instance;
+    using RelationUtils = bb::RelationUtils<Flavor>;
+    using ProverPolynomials = typename Flavor::ProverPolynomials;
+    using Relations = typename Flavor::Relations;
+    using RelationSeparator = typename Flavor::RelationSeparator;
+    using CombinedRelationSeparator = typename ProverInstances::RelationSeparator;
+
+    // The length of ExtendedUnivariate is the largest length (==max_relation_degree + 1) of a univariate polynomial
+    // obtained by composing a relation with folded instance + relation parameters .
+    using ExtendedUnivariate = Univariate<FF, (Flavor::MAX_TOTAL_RELATION_LENGTH - 1) * (ProverInstances::NUM - 1) + 1>;
+    // Represents the total length of the combiner univariate, obtained by combining the already folded relations with
+    // the folded relation batching challenge.
+    using ExtendedUnivariateWithRandomization =
+        Univariate<FF,
+                   (Flavor::MAX_TOTAL_RELATION_LENGTH - 1 + ProverInstances::NUM - 1) * (ProverInstances::NUM - 1) + 1>;
+    using ExtendedUnivariates = typename Flavor::template ProverUnivariates<ExtendedUnivariate::LENGTH>;
+    using OptimisedExtendedUnivariates =
+        typename Flavor::template OptimisedProverUnivariates<ExtendedUnivariate::LENGTH,
+                                                             /* SKIP_COUNT= */ ProverInstances::NUM - 1>;
+
+    using TupleOfTuplesOfUnivariates =
+        typename Flavor::template ProtogalaxyTupleOfTuplesOfUnivariates<ProverInstances::NUM>;
+    using OptimisedTupleOfTuplesOfUnivariates =
+        typename Flavor::template OptimisedProtogalaxyTupleOfTuplesOfUnivariates<ProverInstances::NUM>;
+    using RelationEvaluations = typename Flavor::TupleOfArraysOfValues;
+
+    static constexpr size_t NUM_SUBRELATIONS = ProverInstances::NUM_SUBRELATIONS;
+
+    /**
+     * @brief Compute the values of the full Honk relation at each row in the execution trace, representing f_i(ω) in
+     * the ProtoGalaxy paper, given the evaluations of all the prover polynomials and \vec{α} (the batching challenges
+     * that help establishing each subrelation is independently valid in Honk - from the Plonk paper, DO NOT confuse
+     * with α in ProtoGalaxy).
+     *
+     * @details When folding Mega instances, one of the relations is linearly dependent. We define such relations
+     * as acting on the entire execution trace and hence requiring to be accumulated separately as we iterate over each
+     * row. At the end of the function, the linearly dependent contribution is accumulated at index 0 representing the
+     * sum f_0(ω) + α_j*g(ω) where f_0 represents the full honk evaluation at row 0, g(ω) is the linearly dependent
+     * subrelation and α_j is its corresponding batching challenge.
+     */
+    static std::vector<FF> compute_full_honk_evaluations(const ProverPolynomials& instance_polynomials,
+                                                         const RelationSeparator& alpha,
+                                                         const RelationParameters<FF>& relation_parameters)
+
+    {
+        BB_OP_COUNT_TIME_NAME("ProtoGalaxyProver_::compute_full_honk_evaluations");
+        auto instance_size = instance_polynomials.get_polynomial_size();
+        std::vector<FF> full_honk_evaluations(instance_size);
+        std::vector<FF> linearly_dependent_contribution_accumulators = parallel_for_heuristic(
+            instance_size,
+            /*accumulator default*/ FF(0),
+            [&](size_t row, FF& linearly_dependent_contribution_accumulator) {
+                auto row_evaluations = instance_polynomials.get_row(row);
+                RelationEvaluations relation_evaluations;
+                RelationUtils::zero_elements(relation_evaluations);
+
+                RelationUtils::template accumulate_relation_evaluations<>(
+                    row_evaluations, relation_evaluations, relation_parameters, FF(1));
+
+                auto output = FF(0);
+                auto running_challenge = FF(1);
+                RelationUtils::scale_and_batch_elements(relation_evaluations,
+                                                        alpha,
+                                                        running_challenge,
+                                                        output,
+                                                        linearly_dependent_contribution_accumulator);
+
+                full_honk_evaluations[row] = output;
+            },
+            thread_heuristics::ALWAYS_MULTITHREAD);
+        full_honk_evaluations[0] += sum(linearly_dependent_contribution_accumulators);
+        return full_honk_evaluations;
+    }
+
+    /**
+     * @brief  Recursively compute the parent nodes of each level in the tree, starting from the leaves. Note that at
+     * each level, the resulting parent nodes will be polynomials of degree (level+1) because we multiply by an
+     * additional factor of X.
+     */
+    static std::vector<FF> construct_coefficients_tree(const std::vector<FF>& betas,
+                                                       const std::vector<FF>& deltas,
+                                                       const std::vector<std::vector<FF>>& prev_level_coeffs,
+                                                       size_t level = 1)
+    {
+        if (level == betas.size()) {
+            return prev_level_coeffs[0];
+        }
+
+        auto degree = level + 1;
+        auto prev_level_width = prev_level_coeffs.size();
+        std::vector<std::vector<FF>> level_coeffs(prev_level_width / 2, std::vector<FF>(degree + 1, 0));
+        parallel_for_heuristic(
+            prev_level_width / 2,
+            [&](size_t parent) {
+                size_t node = parent * 2;
+                std::copy(prev_level_coeffs[node].begin(), prev_level_coeffs[node].end(), level_coeffs[parent].begin());
+                for (size_t d = 0; d < degree; d++) {
+                    level_coeffs[parent][d] += prev_level_coeffs[node + 1][d] * betas[level];
+                    level_coeffs[parent][d + 1] += prev_level_coeffs[node + 1][d] * deltas[level];
+                }
+            },
+            /* overestimate */ thread_heuristics::FF_MULTIPLICATION_COST * degree * 3);
+        return construct_coefficients_tree(betas, deltas, level_coeffs, level + 1);
+    }
+
+    /**
+     * @brief We construct the coefficients of the perturbator polynomial in O(n) time following the technique in
+     * Claim 4.4. Consider a binary tree whose leaves are the evaluations of the full Honk relation at each row in the
+     * execution trace. The subsequent levels in the tree are constructed using the following technique: At level i in
+     * the tree, label the branch connecting the left node n_l to its parent by 1 and for the right node n_r by β_i +
+     * δ_i X. The value of the parent node n will be constructed as n = n_l + n_r * (β_i + δ_i X). Recurse over each
+     * layer until the root is reached which will correspond to the perturbator polynomial F(X).
+     * TODO(https://github.com/AztecProtocol/barretenberg/issues/745): make computation of perturbator more memory
+     * efficient, operate in-place and use std::resize; add multithreading
+     */
+    static std::vector<FF> construct_perturbator_coefficients(const std::vector<FF>& betas,
+                                                              const std::vector<FF>& deltas,
+                                                              const std::vector<FF>& full_honk_evaluations)
+    {
+        auto width = full_honk_evaluations.size();
+        std::vector<std::vector<FF>> first_level_coeffs(width / 2, std::vector<FF>(2, 0));
+        parallel_for_heuristic(
+            width / 2,
+            [&](size_t parent) {
+                size_t node = parent * 2;
+                first_level_coeffs[parent][0] =
+                    full_honk_evaluations[node] + full_honk_evaluations[node + 1] * betas[0];
+                first_level_coeffs[parent][1] = full_honk_evaluations[node + 1] * deltas[0];
+            },
+            /* overestimate */ thread_heuristics::FF_MULTIPLICATION_COST * 3);
+        return construct_coefficients_tree(betas, deltas, first_level_coeffs);
+    }
+
+    /**
+     * @brief Construct the power perturbator polynomial F(X) in coefficient form from the accumulator, representing the
+     * relaxed instance.
+     *
+     *
+     */
+    static LegacyPolynomial<FF> compute_perturbator(std::shared_ptr<Instance> accumulator,
+                                                    const std::vector<FF>& deltas)
+    {
+        BB_OP_COUNT_TIME();
+        auto full_honk_evaluations = compute_full_honk_evaluations(
+            accumulator->proving_key.polynomials, accumulator->alphas, accumulator->relation_parameters);
+        const auto betas = accumulator->gate_challenges;
+        assert(betas.size() == deltas.size());
+        auto coeffs = construct_perturbator_coefficients(betas, deltas, full_honk_evaluations);
+        return LegacyPolynomial<FF>(coeffs);
+    }
+
+    /**
+     * @brief Prepare a univariate polynomial for relation execution in one step of the main loop in folded instance
+     * construction.
+     * @details For a fixed prover polynomial index, extract that polynomial from each instance in Instances. From
+     *each polynomial, extract the value at row_idx. Use these values to create a univariate polynomial, and then
+     *extend (i.e., compute additional evaluations at adjacent domain values) as needed.
+     * @todo TODO(https://github.com/AztecProtocol/barretenberg/issues/751) Optimize memory
+     */
+
+    template <size_t skip_count = 0>
+    static void extend_univariates(
+        std::conditional_t<skip_count != 0, OptimisedExtendedUnivariates, ExtendedUnivariates>& extended_univariates,
+        const ProverInstances& instances,
+        const size_t row_idx)
+    {
+        auto base_univariates = instances.template row_to_univariates<skip_count>(row_idx);
+        for (auto [extended_univariate, base_univariate] : zip_view(extended_univariates.get_all(), base_univariates)) {
+            extended_univariate = base_univariate.template extend_to<ExtendedUnivariate::LENGTH, skip_count>();
+        }
+    }
+
+    /**
+     * @brief Add the value of each relation over univariates to an appropriate accumulator
+     *
+     * @tparam TupleOfTuplesOfUnivariates_ A tuple of univariate accumulators, where the univariates may be optimized to
+     * avoid computation on some indices.
+     * @tparam ExtendedUnivariates_ T
+     * @tparam Parameters relation parameters type
+     * @tparam relation_idx The index of the relation
+     * @param univariate_accumulators
+     * @param extended_univariates
+     * @param relation_parameters
+     * @param scaling_factor
+     */
+    template <typename TupleOfTuplesOfUnivariates_,
+              typename ExtendedUnivariates_,
+              typename Parameters,
+              size_t relation_idx = 0>
+    static void accumulate_relation_univariates(TupleOfTuplesOfUnivariates_& univariate_accumulators,
+                                                const ExtendedUnivariates_& extended_univariates,
+                                                const Parameters& relation_parameters,
+                                                const FF& scaling_factor)
+    {
+        using Relation = std::tuple_element_t<relation_idx, Relations>;
+
+        //  Check if the relation is skippable to speed up accumulation
+        if constexpr (!isSkippable<Relation, decltype(extended_univariates)>) {
+            // If not, accumulate normally
+            Relation::accumulate(std::get<relation_idx>(univariate_accumulators),
+                                 extended_univariates,
+                                 relation_parameters,
+                                 scaling_factor);
+        } else {
+            // If so, only compute the contribution if the relation is active
+            if (!Relation::skip(extended_univariates)) {
+                Relation::accumulate(std::get<relation_idx>(univariate_accumulators),
+                                     extended_univariates,
+                                     relation_parameters,
+                                     scaling_factor);
+            }
+        }
+
+        // Repeat for the next relation.
+        if constexpr (relation_idx + 1 < Flavor::NUM_RELATIONS) {
+            accumulate_relation_univariates<TupleOfTuplesOfUnivariates_,
+                                            ExtendedUnivariates_,
+                                            Parameters,
+                                            relation_idx + 1>(
+                univariate_accumulators, extended_univariates, relation_parameters, scaling_factor);
+        }
+    }
+
+    /**
+     * @brief Compute the combiner polynomial $G$ in the Protogalaxy paper
+     * @details We have implemented an optimization that (eg in the case where we fold one instance-witness pair at a
+     * time) assumes the value G(1) is 0, which is true in the case where the witness to be folded is valid.
+     * @todo (https://github.com/AztecProtocol/barretenberg/issues/968) Make combiner tests better
+     *
+     * @tparam skip_zero_computations whether to use the the optimization that skips computing zero.
+     * @param instances
+     * @param pow_betas
+     * @return ExtendedUnivariateWithRandomization
+     */
+    template <typename TupleOfTuples>
+    static ExtendedUnivariateWithRandomization compute_combiner(const ProverInstances& instances,
+                                                                PowPolynomial<FF>& pow_betas,
+                                                                TupleOfTuples& univariate_accumulators)
+    {
+        BB_OP_COUNT_TIME();
+
+        // Whether to use univariates whose operators ignore some values which an honest prover would compute to be zero
+        constexpr bool skip_zero_computations = std::same_as<TupleOfTuples, OptimisedTupleOfTuplesOfUnivariates>;
+
+        size_t common_instance_size = instances[0]->proving_key.circuit_size;
+        pow_betas.compute_values(instances[0]->proving_key.log_circuit_size);
+        // Determine number of threads for multithreading.
+        // Note: Multithreading is "on" for every round but we reduce the number of threads from the max available based
+        // on a specified minimum number of iterations per thread. This eventually leads to the use of a
+        // single thread. For now we use a power of 2 number of threads simply to ensure the round size is evenly
+        // divided.
+        size_t max_num_threads = get_num_cpus_pow2(); // number of available threads (power of 2)
+        size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread
+        size_t desired_num_threads = common_instance_size / min_iterations_per_thread;
+        size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified
+        num_threads = num_threads > 0 ? num_threads : 1;                     // ensure num threads is >= 1
+        size_t iterations_per_thread = common_instance_size / num_threads;   // actual iterations per thread
+
+        // Univariates are optimised for usual PG, but we need the unoptimised version for tests (it's a version that
+        // doesn't skip computation), so we need to define types depending on the template instantiation
+        using ThreadAccumulators = TupleOfTuples;
+        using ExtendedUnivatiatesType =
+            std::conditional_t<skip_zero_computations, OptimisedExtendedUnivariates, ExtendedUnivariates>;
+
+        // Construct univariate accumulator containers; one per thread
+        std::vector<ThreadAccumulators> thread_univariate_accumulators(num_threads);
+        for (auto& accum : thread_univariate_accumulators) {
+            // just normal relation lengths
+            RelationUtils::zero_univariates(accum);
+        }
+
+        // Construct extended univariates containers; one per thread
+        std::vector<ExtendedUnivatiatesType> extended_univariates;
+        extended_univariates.resize(num_threads);
+
+        // Accumulate the contribution from each sub-relation
+        parallel_for(num_threads, [&](size_t thread_idx) {
+            size_t start = thread_idx * iterations_per_thread;
+            size_t end = (thread_idx + 1) * iterations_per_thread;
+
+            for (size_t idx = start; idx < end; idx++) {
+                // Instantiate univariates, possibly with skipping toto ignore computation in those indices (they are
+                // still available for skipping relations, but all derived univariate will ignore those evaluations)
+                // No need to initialise extended_univariates to 0, as it's assigned to.
+                constexpr size_t skip_count = skip_zero_computations ? ProverInstances::NUM - 1 : 0;
+                extend_univariates<skip_count>(extended_univariates[thread_idx], instances, idx);
+
+                FF pow_challenge = pow_betas[idx];
+
+                // Accumulate the i-th row's univariate contribution. Note that the relation parameters passed to
+                // this function have already been folded. Moreover, linear-dependent relations that act over the
+                // entire execution trace rather than on rows, will not be multiplied by the pow challenge.
+                if constexpr (skip_zero_computations) {
+                    accumulate_relation_univariates(
+                        thread_univariate_accumulators[thread_idx],
+                        extended_univariates[thread_idx],
+                        instances.optimised_relation_parameters, // these parameters have already been folded
+                        pow_challenge);
+                } else {
+                    accumulate_relation_univariates(
+                        thread_univariate_accumulators[thread_idx],
+                        extended_univariates[thread_idx],
+                        instances.relation_parameters, // these parameters have already been folded
+                        pow_challenge);
+                }
+            }
+        });
+
+        RelationUtils::zero_univariates(univariate_accumulators);
+        // Accumulate the per-thread univariate accumulators into a single set of accumulators
+        for (auto& accumulators : thread_univariate_accumulators) {
+            RelationUtils::add_nested_tuples(univariate_accumulators, accumulators);
+        }
+        // This does nothing if TupleOfTuples is TupleOfTuplesOfUnivariates
+        TupleOfTuplesOfUnivariates deoptimized_univariates = deoptimise_univariates(univariate_accumulators);
+        //  Batch the univariate contributions from each sub-relation to obtain the round univariate
+        return batch_over_relations(deoptimized_univariates, instances.alphas);
+    }
+
+    /**
+     * @brief Convert univariates from optimised form to regular
+     *
+     * @details We need to convert before we batch relations, since optimised versions don't have enough information to
+     * extend the univariates to maximum length
+     *
+     * @param optimised_univariate_accumulators
+     * @param new_univariate_accumulators
+     */
+    template <typename PossiblyOptimisedTupleOfTuplesOfUnivariates>
+    static TupleOfTuplesOfUnivariates deoptimise_univariates(const PossiblyOptimisedTupleOfTuplesOfUnivariates& tup)
+    {
+        // If input does not have optimized operators, return the input
+        if constexpr (std::same_as<PossiblyOptimisedTupleOfTuplesOfUnivariates, TupleOfTuplesOfUnivariates>) {
+            return tup;
+        }
+
+        auto deoptimise = [&]<size_t outer_idx, size_t inner_idx>(auto& element) {
+            auto& optimised_element = std::get<inner_idx>(std::get<outer_idx>(tup));
+            element = optimised_element.convert();
+        };
+
+        TupleOfTuplesOfUnivariates result;
+        RelationUtils::template apply_to_tuple_of_tuples<0, 0>(result, deoptimise);
+        return result;
+    }
+
+    static ExtendedUnivariateWithRandomization batch_over_relations(TupleOfTuplesOfUnivariates& univariate_accumulators,
+                                                                    const CombinedRelationSeparator& alpha)
+    {
+        auto result = std::get<0>(std::get<0>(univariate_accumulators))
+                          .template extend_to<ProverInstances::BATCHED_EXTENDED_LENGTH>();
+        size_t idx = 0;
+        auto scale_and_sum = [&]<size_t outer_idx, size_t inner_idx>(auto& element) {
+            auto extended = element.template extend_to<ProverInstances::BATCHED_EXTENDED_LENGTH>();
+            extended *= alpha[idx];
+            result += extended;
+            idx++;
+        };
+
+        RelationUtils::template apply_to_tuple_of_tuples<0, 1>(univariate_accumulators, scale_and_sum);
+        RelationUtils::zero_univariates(univariate_accumulators);
+
+        return result;
+    }
+
+    static std::pair<typename ProverInstances::FF, std::array<typename ProverInstances::FF, ProverInstances::NUM>>
+    compute_vanishing_polynomial_and_lagranges(const FF& challenge)
+    {
+        FF vanishing_polynomial_at_challenge;
+        std::array<FF, ProverInstances::NUM> lagranges;
+        constexpr FF inverse_two = FF(2).invert();
+
+        if constexpr (ProverInstances::NUM == 2) {
+            vanishing_polynomial_at_challenge = challenge * (challenge - FF(1));
+            lagranges = { FF(1) - challenge, challenge };
+        } else if constexpr (ProverInstances::NUM == 3) {
+            vanishing_polynomial_at_challenge = challenge * (challenge - FF(1)) * (challenge - FF(2));
+            lagranges = { (FF(1) - challenge) * (FF(2) - challenge) * inverse_two,
+                          challenge * (FF(2) - challenge),
+                          challenge * (challenge - FF(1)) / FF(2) };
+        } else if constexpr (ProverInstances::NUM == 4) {
+            constexpr FF inverse_six = FF(6).invert();
+            vanishing_polynomial_at_challenge =
+                challenge * (challenge - FF(1)) * (challenge - FF(2)) * (challenge - FF(3));
+            lagranges = { (FF(1) - challenge) * (FF(2) - challenge) * (FF(3) - challenge) * inverse_six,
+                          challenge * (FF(2) - challenge) * (FF(3) - challenge) * inverse_two,
+                          challenge * (challenge - FF(1)) * (FF(3) - challenge) * inverse_two,
+                          challenge * (challenge - FF(1)) * (challenge - FF(2)) * inverse_six };
+        }
+        static_assert(ProverInstances::NUM < 5);
+
+        return { vanishing_polynomial_at_challenge, lagranges };
+    }
+
+    /**
+     * @brief Compute the combiner quotient defined as $K$ polynomial in the paper.
+     *
+     * TODO(https://github.com/AztecProtocol/barretenberg/issues/764): generalize the computation of vanishing
+     * polynomials and Lagrange basis and use batch_invert.
+     *
+     */
+    static Univariate<FF, ProverInstances::BATCHED_EXTENDED_LENGTH, ProverInstances::NUM> compute_combiner_quotient(
+        FF compressed_perturbator, ExtendedUnivariateWithRandomization combiner)
+    {
+        std::array<FF, ProverInstances::BATCHED_EXTENDED_LENGTH - ProverInstances::NUM> combiner_quotient_evals = {};
+
+        constexpr FF inverse_two = FF(2).invert();
+        constexpr FF inverse_six = FF(6).invert();
+        for (size_t point = ProverInstances::NUM; point < combiner.size(); point++) {
+            auto idx = point - ProverInstances::NUM;
+            FF lagrange_0;
+            FF vanishing_polynomial;
+            if constexpr (ProverInstances::NUM == 2) {
+                lagrange_0 = FF(1) - FF(point);
+                vanishing_polynomial = FF(point) * (FF(point) - 1);
+            } else if constexpr (ProverInstances::NUM == 3) {
+                lagrange_0 = (FF(1) - FF(point)) * (FF(2) - FF(point)) * inverse_two;
+                vanishing_polynomial = FF(point) * (FF(point) - 1) * (FF(point) - 2);
+            } else if constexpr (ProverInstances::NUM == 4) {
+                lagrange_0 = (FF(1) - FF(point)) * (FF(2) - FF(point)) * (FF(3) - FF(point)) * inverse_six;
+                vanishing_polynomial = FF(point) * (FF(point) - 1) * (FF(point) - 2) * (FF(point) - 3);
+            }
+            static_assert(ProverInstances::NUM < 5);
+
+            combiner_quotient_evals[idx] =
+                (combiner.value_at(point) - compressed_perturbator * lagrange_0) * vanishing_polynomial.invert();
+        }
+
+        Univariate<FF, ProverInstances::BATCHED_EXTENDED_LENGTH, ProverInstances::NUM> combiner_quotient(
+            combiner_quotient_evals);
+        return combiner_quotient;
+    }
+
+    /**
+     * @brief Combine each relation parameter, in part, from all the instances into univariates, used in the
+     * computation of combiner.
+     * @details For a given relation parameter type, extract that parameter from each instance, place the values in
+     * a univariate (i.e., sum them against an appropriate univariate Lagrange basis) and then extended as needed
+     * during the constuction of the combiner.
+     */
+    static void combine_relation_parameters(ProverInstances& instances)
+    {
+        size_t param_idx = 0;
+        auto to_fold = instances.relation_parameters.get_to_fold();
+        auto to_fold_optimised = instances.optimised_relation_parameters.get_to_fold();
+        for (auto [folded_parameter, optimised_folded_parameter] : zip_view(to_fold, to_fold_optimised)) {
+            Univariate<FF, ProverInstances::NUM> tmp(0);
+            size_t instance_idx = 0;
+            for (auto& instance : instances) {
+                tmp.value_at(instance_idx) = instance->relation_parameters.get_to_fold()[param_idx];
+                instance_idx++;
+            }
+            folded_parameter = tmp.template extend_to<ProverInstances::EXTENDED_LENGTH>();
+            optimised_folded_parameter =
+                tmp.template extend_to<ProverInstances::EXTENDED_LENGTH, ProverInstances::NUM - 1>();
+            param_idx++;
+        }
+    }
+
+    /**
+     * @brief Combine the relation batching parameters (alphas) from each instance into a univariate, used in the
+     * computation of combiner.
+     */
+    static void combine_alpha(ProverInstances& instances)
+    {
+        size_t alpha_idx = 0;
+        for (auto& alpha : instances.alphas) {
+            Univariate<FF, ProverInstances::NUM> tmp;
+            size_t instance_idx = 0;
+            for (auto& instance : instances) {
+                tmp.value_at(instance_idx) = instance->alphas[alpha_idx];
+                instance_idx++;
+            }
+            alpha = tmp.template extend_to<ProverInstances::BATCHED_EXTENDED_LENGTH>();
+            alpha_idx++;
+        }
+    }
+};
+} // namespace bb
\ No newline at end of file
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_mega.cpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_mega.cpp
index 287ea4861d7..2d96969cf8d 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_mega.cpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_mega.cpp
@@ -1,5 +1,6 @@
 // Note: this is split up from protogalaxy_prover_impl.hpp for compile performance reasons
 #include "barretenberg/flavor/flavor.hpp"
+#include "barretenberg/sumcheck/instance/instances.hpp"
 #include "barretenberg/ultra_honk/oink_prover.hpp"
 #include "protogalaxy_prover_impl.hpp"
 namespace bb {
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_ultra.cpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_ultra.cpp
index 5084ec8824c..8cc9c4f768f 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_ultra.cpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_ultra.cpp
@@ -1,4 +1,5 @@
 // Note: this is split up from protogalaxy_prover_impl.hpp for compile performance reasons
+#include "barretenberg/sumcheck/instance/instances.hpp"
 #include "protogalaxy_prover_impl.hpp"
 
 // TODO(https://github.com/AztecProtocol/barretenberg/issues/1076) Remove this instantiation.
diff --git a/barretenberg/cpp/src/barretenberg/sumcheck/instance/instances.hpp b/barretenberg/cpp/src/barretenberg/sumcheck/instance/instances.hpp
index fdffcc15a4d..a6e4c324c1e 100644
--- a/barretenberg/cpp/src/barretenberg/sumcheck/instance/instances.hpp
+++ b/barretenberg/cpp/src/barretenberg/sumcheck/instance/instances.hpp
@@ -20,6 +20,7 @@ template <typename Flavor_, size_t NUM_ = 2> struct ProverInstances_ {
     using RelationParameters = bb::RelationParameters<Univariate<FF, EXTENDED_LENGTH>>;
     using OptimisedRelationParameters = bb::RelationParameters<Univariate<FF, EXTENDED_LENGTH, 0, NUM_ - 1>>;
     using RelationSeparator = std::array<Univariate<FF, BATCHED_EXTENDED_LENGTH>, NUM_SUBRELATIONS - 1>;
+
     ArrayType _data;
     RelationParameters relation_parameters;
     OptimisedRelationParameters optimised_relation_parameters;