From dea0b420ec7745612a0ac943c31914221a89556c Mon Sep 17 00:00:00 2001
From: Rumata888 <isennovskiy@gmail.com>
Date: Wed, 17 Apr 2024 13:22:00 +0000
Subject: [PATCH 01/13] Parallelise pertubator

---
 .../protogalaxy/protogalaxy_prover.hpp        | 49 +++++++++++--------
 1 file changed, 29 insertions(+), 20 deletions(-)
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
index 470d8f110b5..ee3fd0c08d0 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
@@ -209,14 +209,20 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
         auto prev_level_width = prev_level_coeffs.size();
         // we need degree + 1 terms to represent the intermediate polynomials
         std::vector<std::vector<FF>> level_coeffs(prev_level_width >> 1, std::vector<FF>(degree + 1, 0));
-        for (size_t node = 0; node < prev_level_width; node += 2) {
-            auto parent = node >> 1;
-            std::copy(prev_level_coeffs[node].begin(), prev_level_coeffs[node].end(), level_coeffs[parent].begin());
-            for (size_t d = 0; d < degree; d++) {
-                level_coeffs[parent][d] += prev_level_coeffs[node + 1][d] * betas[level];
-                level_coeffs[parent][d + 1] += prev_level_coeffs[node + 1][d] * deltas[level];
-            }
-        }
+        run_loop_in_parallel(
+            prev_level_width >> 1,
+            [&](size_t start, size_t end) {
+                for (size_t node = start << 1; node < end << 1; node += 2) {
+                    auto parent = node >> 1;
+                    std::copy(
+                        prev_level_coeffs[node].begin(), prev_level_coeffs[node].end(), level_coeffs[parent].begin());
+                    for (size_t d = 0; d < degree; d++) {
+                        level_coeffs[parent][d] += prev_level_coeffs[node + 1][d] * betas[level];
+                        level_coeffs[parent][d + 1] += prev_level_coeffs[node + 1][d] * deltas[level];
+                    }
+                }
+            },
+            8);
         return construct_coefficients_tree(betas, deltas, level_coeffs, level + 1);
     }
 
@@ -236,11 +242,14 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
     {
         auto width = full_honk_evaluations.size();
         std::vector<std::vector<FF>> first_level_coeffs(width >> 1, std::vector<FF>(2, 0));
-        for (size_t node = 0; node < width; node += 2) {
-            auto parent = node >> 1;
-            first_level_coeffs[parent][0] = full_honk_evaluations[node] + full_honk_evaluations[node + 1] * betas[0];
-            first_level_coeffs[parent][1] = full_honk_evaluations[node + 1] * deltas[0];
-        }
+        run_loop_in_parallel(width >> 1, [&](size_t start, size_t end) {
+            for (size_t node = start << 1; node < end << 1; node += 2) {
+                auto parent = node >> 1;
+                first_level_coeffs[parent][0] =
+                    full_honk_evaluations[node] + full_honk_evaluations[node + 1] * betas[0];
+                first_level_coeffs[parent][1] = full_honk_evaluations[node + 1] * deltas[0];
+            }
+        });
         return construct_coefficients_tree(betas, deltas, first_level_coeffs);
     }
 
@@ -309,9 +318,10 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
         size_t common_instance_size = instances[0]->proving_key.circuit_size;
         pow_betas.compute_values();
         // Determine number of threads for multithreading.
-        // Note: Multithreading is "on" for every round but we reduce the number of threads from the max available based
-        // on a specified minimum number of iterations per thread. This eventually leads to the use of a single thread.
-        // For now we use a power of 2 number of threads simply to ensure the round size is evenly divided.
+        // Note: Multithreading is "on" for every round but we reduce the number of threads from the max available
+        // based on a specified minimum number of iterations per thread. This eventually leads to the use of a
+        // single thread. For now we use a power of 2 number of threads simply to ensure the round size is evenly
+        // divided.
         size_t max_num_threads = get_num_cpus_pow2(); // number of available threads (power of 2)
         size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread
         size_t desired_num_threads = common_instance_size / min_iterations_per_thread;
@@ -340,9 +350,9 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
 
                 FF pow_challenge = pow_betas[idx];
 
-                // Accumulate the i-th row's univariate contribution. Note that the relation parameters passed to this
-                // function have already been folded. Moreover, linear-dependent relations that act over the entire
-                // execution trace rather than on rows, will not be multiplied by the pow challenge.
+                // Accumulate the i-th row's univariate contribution. Note that the relation parameters passed to
+                // this function have already been folded. Moreover, linear-dependent relations that act over the
+                // entire execution trace rather than on rows, will not be multiplied by the pow challenge.
                 accumulate_relation_univariates(
                     thread_univariate_accumulators[thread_idx],
                     extended_univariates[thread_idx],
@@ -362,7 +372,6 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
     static ExtendedUnivariateWithRandomization batch_over_relations(TupleOfTuplesOfUnivariates& univariate_accumulators,
                                                                     const CombinedRelationSeparator& alpha)
     {
-
         // First relation does not get multiplied by a batching challenge
         auto result = std::get<0>(std::get<0>(univariate_accumulators))
                           .template extend_to<ProverInstances::BATCHED_EXTENDED_LENGTH>();

From 9e89bfd437b1957269014070bd9851a8088743e3 Mon Sep 17 00:00:00 2001
From: Rumata888 <isennovskiy@gmail.com>
Date: Wed, 17 Apr 2024 17:31:37 +0000
Subject: [PATCH 02/13] poc

---
 .../cpp/src/barretenberg/flavor/flavor.hpp    | 20 +++++++
 .../barretenberg/polynomials/univariate.hpp   | 15 ++++-
 .../protogalaxy/protogalaxy_prover.hpp        | 60 ++++++++++++++++---
 .../barretenberg/relations/relation_types.hpp | 27 +++++++++
 .../goblin_ultra_flavor.hpp                   |  4 ++
 .../stdlib_circuit_builders/ultra_flavor.hpp  |  3 +
 .../barretenberg/vm/generated/avm_flavor.hpp  |  3 +
 7 files changed, 122 insertions(+), 10 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp b/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp
index d2a847a0dbc..6be12a498ff 100644
--- a/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp
@@ -241,6 +241,26 @@ static constexpr auto create_protogalaxy_tuple_of_tuples_of_univariates()
     }
 }
 
+/**
+ * @brief Recursive utility function to construct a container for the subrelation accumulators of Protogalaxy folding.
+ * @details The size of the outer tuple is equal to the number of relations. Each relation contributes an inner tuple of
+ * univariates whose size is equal to the number of subrelations of the relation. The length of a univariate in an inner
+ * tuple is determined by the corresponding subrelation length and the number of instances to be folded.
+ */
+template <typename Tuple, size_t NUM_INSTANCES, size_t Index = 0>
+static constexpr auto create_optimised_protogalaxy_tuple_of_tuples_of_univariates()
+{
+    if constexpr (Index >= std::tuple_size<Tuple>::value) {
+        return std::tuple<>{}; // Return empty when reach end of the tuple
+    } else {
+        using UnivariateTuple = typename std::tuple_element_t<Index, Tuple>::
+            template OptimisedProtogalaxyTupleOfUnivariatesOverSubrelations<NUM_INSTANCES>;
+        return std::tuple_cat(
+            std::tuple<UnivariateTuple>{},
+            create_optimised_protogalaxy_tuple_of_tuples_of_univariates<Tuple, NUM_INSTANCES, Index + 1>());
+    }
+}
+
 /**
  * @brief Recursive utility function to construct a container for the subrelation accumulators of sumcheck proving.
  * @details The size of the outer tuple is equal to the number of relations. Each relation contributes an inner tuple of
diff --git a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
index aedc1353787..943f9be4201 100644
--- a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
+++ b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
@@ -271,7 +271,7 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
      * subtraction: setting Δ = v1-v0, the values of f(X) are f(0)=v0, f(1)= v0 + Δ, v2 = f(1) + Δ, v3 = f(2) + Δ...
      *
      */
-    template <size_t EXTENDED_DOMAIN_END> Univariate<Fr, EXTENDED_DOMAIN_END> extend_to() const
+    template <size_t EXTENDED_DOMAIN_END, bool optimised = false> Univariate<Fr, EXTENDED_DOMAIN_END> extend_to() const
     {
         const size_t EXTENDED_LENGTH = EXTENDED_DOMAIN_END - domain_start;
         using Data = BarycentricData<Fr, LENGTH, EXTENDED_LENGTH>;
@@ -282,11 +282,20 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
         std::copy(evaluations.begin(), evaluations.end(), result.evaluations.begin());
 
         static constexpr Fr inverse_two = Fr(2).invert();
+        // static_assert(!optimised || (LENGTH <= 2));
         if constexpr (LENGTH == 2) {
             Fr delta = value_at(1) - value_at(0);
             static_assert(EXTENDED_LENGTH != 0);
-            for (size_t idx = domain_end - 1; idx < EXTENDED_DOMAIN_END - 1; idx++) {
-                result.value_at(idx + 1) = result.value_at(idx) + delta;
+            if constexpr (optimised) {
+                Fr current = result.value_at(1);
+                for (size_t idx = domain_end - 2; idx < EXTENDED_DOMAIN_END - 1; idx++) {
+                    current += delta;
+                    result.value_at(idx + 1) = current;
+                }
+            } else {
+                for (size_t idx = domain_end - 1; idx < EXTENDED_DOMAIN_END - 1; idx++) {
+                    result.value_at(idx + 1) = result.value_at(idx) + delta;
+                }
             }
             return result;
         } else if constexpr (LENGTH == 3) {
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
index ee3fd0c08d0..423cd67afda 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
@@ -49,15 +49,21 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
     // The length of ExtendedUnivariate is the largest length (==max_relation_degree + 1) of a univariate polynomial
     // obtained by composing a relation with folded instance + relation parameters .
     using ExtendedUnivariate = Univariate<FF, (Flavor::MAX_TOTAL_RELATION_LENGTH - 1) * (ProverInstances::NUM - 1) + 1>;
+    using OptimisedExtendedUnivariate =
+        Univariate<FF, (Flavor::MAX_TOTAL_RELATION_LENGTH - 2) * (ProverInstances::NUM - 1) + 1>;
     // Represents the total length of the combiner univariate, obtained by combining the already folded relations with
     // the folded relation batching challenge.
     using ExtendedUnivariateWithRandomization =
         Univariate<FF,
                    (Flavor::MAX_TOTAL_RELATION_LENGTH - 1 + ProverInstances::NUM - 1) * (ProverInstances::NUM - 1) + 1>;
     using ExtendedUnivariates = typename Flavor::template ProverUnivariates<ExtendedUnivariate::LENGTH>;
+    using OptimisedExtendedUnivariates =
+        typename Flavor::template ProverUnivariates<ExtendedUnivariate::LENGTH - (1 * (ProverInstances::NUM - 1))>;
 
     using TupleOfTuplesOfUnivariates =
         typename Flavor::template ProtogalaxyTupleOfTuplesOfUnivariates<ProverInstances::NUM>;
+    using OptimisedTupleOfTuplesOfUnivariates =
+        typename Flavor::template OptimisedProtogalaxyTupleOfTuplesOfUnivariates<ProverInstances::NUM>;
     using RelationEvaluations = typename Flavor::TupleOfArraysOfValues;
 
     static constexpr size_t NUM_SUBRELATIONS = ProverInstances::NUM_SUBRELATIONS;
@@ -291,9 +297,27 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
         }
     }
 
+    /**
+     * @brief Prepare a univariate polynomial for relation execution in one step of the main loop in folded instance
+     * construction.
+     * @details For a fixed prover polynomial index, extract that polynomial from each instance in Instances. From each
+     * polynomial, extract the value at row_idx. Use these values to create a univariate polynomial, and then extend
+     * (i.e., compute additional evaluations at adjacent domain values) as needed.
+     * @todo TODO(https://github.com/AztecProtocol/barretenberg/issues/751) Optimize memory
+     */
+    void optimised_extend_univariates(OptimisedExtendedUnivariates& extended_univariates,
+                                      const ProverInstances& instances,
+                                      const size_t row_idx)
+    {
+        auto base_univariates = instances.row_to_univariates(row_idx);
+        for (auto [extended_univariate, base_univariate] : zip_view(extended_univariates.get_all(), base_univariates)) {
+            extended_univariate = base_univariate.template extend_to<OptimisedExtendedUnivariate::LENGTH, true>();
+        }
+    }
+
     template <typename Parameters, size_t relation_idx = 0>
-    void accumulate_relation_univariates(TupleOfTuplesOfUnivariates& univariate_accumulators,
-                                         const ExtendedUnivariates& extended_univariates,
+    void accumulate_relation_univariates(OptimisedTupleOfTuplesOfUnivariates& univariate_accumulators,
+                                         const OptimisedExtendedUnivariates& extended_univariates,
                                          const Parameters& relation_parameters,
                                          const FF& scaling_factor)
     {
@@ -329,14 +353,14 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
         num_threads = num_threads > 0 ? num_threads : 1;                     // ensure num threads is >= 1
         size_t iterations_per_thread = common_instance_size / num_threads;   // actual iterations per thread
         // Construct univariate accumulator containers; one per thread
-        std::vector<TupleOfTuplesOfUnivariates> thread_univariate_accumulators(num_threads);
+        std::vector<OptimisedTupleOfTuplesOfUnivariates> thread_univariate_accumulators(num_threads);
         for (auto& accum : thread_univariate_accumulators) {
             // just normal relation lengths
             Utils::zero_univariates(accum);
         }
 
         // Construct extended univariates containers; one per thread
-        std::vector<ExtendedUnivariates> extended_univariates;
+        std::vector<OptimisedExtendedUnivariates> extended_univariates;
         extended_univariates.resize(num_threads);
 
         // Accumulate the contribution from each sub-relation
@@ -346,7 +370,7 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
 
             for (size_t idx = start; idx < end; idx++) {
                 // No need to initialise extended_univariates to 0, it's assigned to
-                extend_univariates(extended_univariates[thread_idx], instances, idx);
+                optimised_extend_univariates(extended_univariates[thread_idx], instances, idx);
 
                 FF pow_challenge = pow_betas[idx];
 
@@ -360,15 +384,36 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
                     pow_challenge);
             }
         });
-
+        OptimisedTupleOfTuplesOfUnivariates optimised_univariate_accumulators;
+        Utils::zero_univariates(optimised_univariate_accumulators);
         // Accumulate the per-thread univariate accumulators into a single set of accumulators
         for (auto& accumulators : thread_univariate_accumulators) {
-            Utils::add_nested_tuples(univariate_accumulators, accumulators);
+            Utils::add_nested_tuples(optimised_univariate_accumulators, accumulators);
         }
+        deoptimise_univariates(optimised_univariate_accumulators, univariate_accumulators);
         // Batch the univariate contributions from each sub-relation to obtain the round univariate
         return batch_over_relations(univariate_accumulators, instances.alphas);
     }
 
+    static void deoptimise_univariates(OptimisedTupleOfTuplesOfUnivariates& optimised_univariate_accumulators,
+                                       TupleOfTuplesOfUnivariates& univariate_accumulators
+
+    )
+    {
+        auto deoptimise = [&]<size_t outer_idx, size_t inner_idx>(auto& element) {
+            auto& optimised_element = std::get<inner_idx>(std::get<outer_idx>(optimised_univariate_accumulators));
+            static_assert(std::remove_reference_t<decltype(optimised_element)>::LENGTH + (ProverInstances::NUM - 1) ==
+                          std::remove_reference_t<decltype(element)>::LENGTH);
+            element.evaluations[0] = optimised_element.evaluations[0];
+            element.evaluations[1] = FF(0);
+            for (size_t i = 1; i < std::remove_reference_t<decltype(optimised_element)>::LENGTH; i++) {
+                element.evaluations[i + 1] = optimised_element.evaluations[i];
+            }
+        };
+
+        Utils::template apply_to_tuple_of_tuples<0, 0>(univariate_accumulators, deoptimise);
+    }
+
     static ExtendedUnivariateWithRandomization batch_over_relations(TupleOfTuplesOfUnivariates& univariate_accumulators,
                                                                     const CombinedRelationSeparator& alpha)
     {
@@ -378,6 +423,7 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
         size_t idx = 0;
         auto scale_and_sum = [&]<size_t outer_idx, size_t inner_idx>(auto& element) {
             auto extended = element.template extend_to<ProverInstances::BATCHED_EXTENDED_LENGTH>();
+            // info("Relation ", outer_idx, ".", inner_idx, "[", 0, "] = ", extended.value_at(0));
             extended *= alpha[idx];
             result += extended;
             idx++;
diff --git a/barretenberg/cpp/src/barretenberg/relations/relation_types.hpp b/barretenberg/cpp/src/barretenberg/relations/relation_types.hpp
index aa4ba8820b7..87f1cfdd0d0 100644
--- a/barretenberg/cpp/src/barretenberg/relations/relation_types.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/relation_types.hpp
@@ -89,6 +89,28 @@ consteval std::array<size_t, NUM_SUBRELATIONS> compute_composed_subrelation_part
     return SUBRELATION_PARTIAL_LENGTHS;
 };
 
+/**
+ * @brief Get the subrelation accumulators for the Protogalaxy combiner calculation.
+ * @details A subrelation of degree D, when evaluated on polynomials of degree N, gives a polynomial of degree D
+ * * N. In the context of Protogalaxy, N = NUM_INSTANCES-1. Hence, given a subrelation of length x, its
+ * evaluation on such polynomials will have degree (x-1) * (NUM_INSTANCES-1), and the length of this evaluation
+ * will be one greater than this.
+ * @tparam NUM_INSTANCES
+ * @tparam NUM_SUBRELATIONS
+ * @param SUBRELATION_PARTIAL_LENGTHS The array of subrelation lengths supplied by a relation.
+ * @return The transformed subrelation lenths
+ */
+template <size_t NUM_INSTANCES, size_t NUM_SUBRELATIONS>
+consteval std::array<size_t, NUM_SUBRELATIONS> compute_optimised_composed_subrelation_partial_lengths(
+    std::array<size_t, NUM_SUBRELATIONS> SUBRELATION_PARTIAL_LENGTHS)
+{
+    std::transform(SUBRELATION_PARTIAL_LENGTHS.begin(),
+                   SUBRELATION_PARTIAL_LENGTHS.end(),
+                   SUBRELATION_PARTIAL_LENGTHS.begin(),
+                   [](const size_t x) { return (x - 2) * (NUM_INSTANCES - 1) + 1; });
+    return SUBRELATION_PARTIAL_LENGTHS;
+};
+
 /**
  * @brief The templates defined herein facilitate sharing the relation arithmetic between the prover and the
  * verifier.
@@ -149,6 +171,11 @@ template <typename RelationImpl> class Relation : public RelationImpl {
     template <size_t NUM_INSTANCES>
     using ProtogalaxyTupleOfUnivariatesOverSubrelations =
         TupleOfUnivariates<FF, compute_composed_subrelation_partial_lengths<NUM_INSTANCES>(SUBRELATION_TOTAL_LENGTHS)>;
+    template <size_t NUM_INSTANCES>
+    using OptimisedProtogalaxyTupleOfUnivariatesOverSubrelations =
+        TupleOfUnivariates<FF,
+                           compute_optimised_composed_subrelation_partial_lengths<NUM_INSTANCES>(
+                               SUBRELATION_TOTAL_LENGTHS)>;
     using SumcheckTupleOfUnivariatesOverSubrelations =
         TupleOfUnivariates<FF, RelationImpl::SUBRELATION_PARTIAL_LENGTHS>;
     using SumcheckArrayOfValuesOverSubrelations = ArrayOfValues<FF, RelationImpl::SUBRELATION_PARTIAL_LENGTHS>;
diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/goblin_ultra_flavor.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/goblin_ultra_flavor.hpp
index 677749016b5..d5f4b8a2c4e 100644
--- a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/goblin_ultra_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/goblin_ultra_flavor.hpp
@@ -87,6 +87,10 @@ class GoblinUltraFlavor {
     template <size_t NUM_INSTANCES>
     using ProtogalaxyTupleOfTuplesOfUnivariates =
         decltype(create_protogalaxy_tuple_of_tuples_of_univariates<Relations, NUM_INSTANCES>());
+
+    template <size_t NUM_INSTANCES>
+    using OptimisedProtogalaxyTupleOfTuplesOfUnivariates =
+        decltype(create_optimised_protogalaxy_tuple_of_tuples_of_univariates<Relations, NUM_INSTANCES>());
     using SumcheckTupleOfTuplesOfUnivariates = decltype(create_sumcheck_tuple_of_tuples_of_univariates<Relations>());
     using TupleOfArraysOfValues = decltype(create_tuple_of_arrays_of_values<Relations>());
 
diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp
index bb40fe0c7ff..618dedcecb3 100644
--- a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp
@@ -75,6 +75,9 @@ class UltraFlavor {
     template <size_t NUM_INSTANCES>
     using ProtogalaxyTupleOfTuplesOfUnivariates =
         decltype(create_protogalaxy_tuple_of_tuples_of_univariates<Relations, NUM_INSTANCES>());
+    template <size_t NUM_INSTANCES>
+    using OptimisedProtogalaxyTupleOfTuplesOfUnivariates =
+        decltype(create_optimised_protogalaxy_tuple_of_tuples_of_univariates<Relations, NUM_INSTANCES>());
     using SumcheckTupleOfTuplesOfUnivariates = decltype(create_sumcheck_tuple_of_tuples_of_univariates<Relations>());
     using TupleOfArraysOfValues = decltype(create_tuple_of_arrays_of_values<Relations>());
 
diff --git a/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp b/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp
index 1f16a50834d..23e33c7df7e 100644
--- a/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp
@@ -160,6 +160,9 @@ class AvmFlavor {
     template <size_t NUM_INSTANCES>
     using ProtogalaxyTupleOfTuplesOfUnivariates =
         decltype(create_protogalaxy_tuple_of_tuples_of_univariates<Relations, NUM_INSTANCES>());
+    template <size_t NUM_INSTANCES>
+    using OptimisedProtogalaxyTupleOfTuplesOfUnivariates =
+        decltype(create_optimised_protogalaxy_tuple_of_tuples_of_univariates<Relations, NUM_INSTANCES>());
     using SumcheckTupleOfTuplesOfUnivariates = decltype(create_sumcheck_tuple_of_tuples_of_univariates<Relations>());
     using TupleOfArraysOfValues = decltype(create_tuple_of_arrays_of_values<Relations>());
 

From 84d908fb6fd53841a845c79e16a17a76b10f7132 Mon Sep 17 00:00:00 2001
From: Rumata888 <isennovskiy@gmail.com>
Date: Thu, 18 Apr 2024 13:07:45 +0000
Subject: [PATCH 03/13] More generic, less efficient

---
 .../barretenberg/polynomials/univariate.hpp   | 31 +++++++++----------
 .../protogalaxy/protogalaxy_prover.hpp        |  3 +-
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
index 943f9be4201..044a4e97fde 100644
--- a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
+++ b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
@@ -271,9 +271,10 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
      * subtraction: setting Δ = v1-v0, the values of f(X) are f(0)=v0, f(1)= v0 + Δ, v2 = f(1) + Δ, v3 = f(2) + Δ...
      *
      */
-    template <size_t EXTENDED_DOMAIN_END, bool optimised = false> Univariate<Fr, EXTENDED_DOMAIN_END> extend_to() const
+    template <size_t EXTENDED_DOMAIN_END, size_t NUM_SKIPPED_INDICES = 0>
+    Univariate<Fr, EXTENDED_DOMAIN_END> extend_to() const
     {
-        const size_t EXTENDED_LENGTH = EXTENDED_DOMAIN_END - domain_start;
+        const size_t EXTENDED_LENGTH = EXTENDED_DOMAIN_END - domain_start + NUM_SKIPPED_INDICES;
         using Data = BarycentricData<Fr, LENGTH, EXTENDED_LENGTH>;
         static_assert(EXTENDED_LENGTH >= LENGTH);
 
@@ -282,22 +283,13 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
         std::copy(evaluations.begin(), evaluations.end(), result.evaluations.begin());
 
         static constexpr Fr inverse_two = Fr(2).invert();
-        // static_assert(!optimised || (LENGTH <= 2));
+        static_assert(NUM_SKIPPED_INDICES < LENGTH);
         if constexpr (LENGTH == 2) {
             Fr delta = value_at(1) - value_at(0);
             static_assert(EXTENDED_LENGTH != 0);
-            if constexpr (optimised) {
-                Fr current = result.value_at(1);
-                for (size_t idx = domain_end - 2; idx < EXTENDED_DOMAIN_END - 1; idx++) {
-                    current += delta;
-                    result.value_at(idx + 1) = current;
-                }
-            } else {
-                for (size_t idx = domain_end - 1; idx < EXTENDED_DOMAIN_END - 1; idx++) {
-                    result.value_at(idx + 1) = result.value_at(idx) + delta;
-                }
+            for (size_t idx = domain_end - 1; idx < EXTENDED_DOMAIN_END - 1; idx++) {
+                result.value_at(idx + 1) = result.value_at(idx) + delta;
             }
-            return result;
         } else if constexpr (LENGTH == 3) {
             // Based off https://hackmd.io/@aztec-network/SyR45cmOq?type=view
             // The technique used here is the same as the length == 3 case below.
@@ -313,7 +305,6 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
                 result.value_at(idx + 1) = result.value_at(idx) + extra;
                 extra += a2;
             }
-            return result;
         } else if constexpr (LENGTH == 4) {
             static constexpr Fr inverse_six = Fr(6).invert(); // computed at compile time for efficiency
 
@@ -377,7 +368,6 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
 
                 linear_term += three_a_plus_two_b;
             }
-            return result;
         } else {
             for (size_t k = domain_end; k != EXTENDED_DOMAIN_END; ++k) {
                 result.value_at(k) = 0;
@@ -390,8 +380,17 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
                 // scale the sum by the the value of of B(x)
                 result.value_at(k) *= Data::full_numerator_values[k];
             }
+        }
+        if constexpr (NUM_SKIPPED_INDICES == 0) {
             return result;
         }
+        Univariate<Fr, EXTENDED_LENGTH - NUM_SKIPPED_INDICES> optimised_result;
+        optimised_result.value_at(0) = result.value_at(0);
+
+        std::copy(std::next(result.begin(), 1 + NUM_SKIPPED_INDICES),
+                  result.evaluations.end(),
+                  std::next(optimised_result.begin(), 1));
+        return optimised_result;
     }
 
     /**
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
index 423cd67afda..b908ce342f1 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
@@ -311,7 +311,8 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
     {
         auto base_univariates = instances.row_to_univariates(row_idx);
         for (auto [extended_univariate, base_univariate] : zip_view(extended_univariates.get_all(), base_univariates)) {
-            extended_univariate = base_univariate.template extend_to<OptimisedExtendedUnivariate::LENGTH, true>();
+            extended_univariate =
+                base_univariate.template extend_to<OptimisedExtendedUnivariate::LENGTH, ProverInstances::NUM - 1>();
         }
     }
 

From 6c26d5392dd8bc032f461ae4f775df05cfca9fc2 Mon Sep 17 00:00:00 2001
From: Rumata888 <isennovskiy@gmail.com>
Date: Mon, 22 Apr 2024 13:56:14 +0000
Subject: [PATCH 04/13] Parameters

---
 .../src/barretenberg/polynomials/univariate.hpp  |  4 ++--
 .../protogalaxy/protogalaxy_prover.hpp           | 16 ++++++++++++----
 .../barretenberg/sumcheck/instance/instances.hpp |  2 ++
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
index 044a4e97fde..4bab8790d05 100644
--- a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
+++ b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
@@ -387,9 +387,9 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
         Univariate<Fr, EXTENDED_LENGTH - NUM_SKIPPED_INDICES> optimised_result;
         optimised_result.value_at(0) = result.value_at(0);
 
-        std::copy(std::next(result.begin(), 1 + NUM_SKIPPED_INDICES),
+        std::copy(std::next(result.evaluations.begin(), 1 + NUM_SKIPPED_INDICES),
                   result.evaluations.end(),
-                  std::next(optimised_result.begin(), 1));
+                  std::next(optimised_result.evaluations.begin(), 1));
         return optimised_result;
     }
 
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
index b908ce342f1..7c3d96ebaa5 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
@@ -381,7 +381,7 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
                 accumulate_relation_univariates(
                     thread_univariate_accumulators[thread_idx],
                     extended_univariates[thread_idx],
-                    instances.relation_parameters, // these parameters have already been folded
+                    instances.optimised_relation_parameters, // these parameters have already been folded
                     pow_challenge);
             }
         });
@@ -406,10 +406,13 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
             static_assert(std::remove_reference_t<decltype(optimised_element)>::LENGTH + (ProverInstances::NUM - 1) ==
                           std::remove_reference_t<decltype(element)>::LENGTH);
             element.evaluations[0] = optimised_element.evaluations[0];
-            element.evaluations[1] = FF(0);
+            for (size_t i = 1; i < ProverInstances::NUM; i++) {
+                element.evaluations[i] = FF(0);
+            }
             for (size_t i = 1; i < std::remove_reference_t<decltype(optimised_element)>::LENGTH; i++) {
-                element.evaluations[i + 1] = optimised_element.evaluations[i];
+                element.evaluations[i + ProverInstances::NUM - 1] = optimised_element.evaluations[i];
             }
+            info("Element ", outer_idx, ".", inner_idx, "[", ":", "] = ", element);
         };
 
         Utils::template apply_to_tuple_of_tuples<0, 0>(univariate_accumulators, deoptimise);
@@ -488,7 +491,8 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
     {
         size_t param_idx = 0;
         auto to_fold = instances.relation_parameters.get_to_fold();
-        for (auto& folded_parameter : to_fold) {
+        auto to_fold_optimised = instances.optimised_relation_parameters.get_to_fold();
+        for (auto [folded_parameter, optimised_folded_parameter] : zip_view(to_fold, to_fold_optimised)) {
             Univariate<FF, ProverInstances::NUM> tmp(0);
             size_t instance_idx = 0;
             for (auto& instance : instances) {
@@ -496,6 +500,10 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
                 instance_idx++;
             }
             folded_parameter = tmp.template extend_to<ProverInstances::EXTENDED_LENGTH>();
+            optimised_folded_parameter.value_at(0) = folded_parameter.value_at(0);
+            std::copy(std::next(folded_parameter.evaluations.begin(), ProverInstances::NUM),
+                      folded_parameter.evaluations.end(),
+                      std::next(optimised_folded_parameter.evaluations.begin(), 1));
             param_idx++;
         }
     }
diff --git a/barretenberg/cpp/src/barretenberg/sumcheck/instance/instances.hpp b/barretenberg/cpp/src/barretenberg/sumcheck/instance/instances.hpp
index dcfc8883098..e436270a49c 100644
--- a/barretenberg/cpp/src/barretenberg/sumcheck/instance/instances.hpp
+++ b/barretenberg/cpp/src/barretenberg/sumcheck/instance/instances.hpp
@@ -18,9 +18,11 @@ template <typename Flavor_, size_t NUM_ = 2> struct ProverInstances_ {
     static constexpr size_t EXTENDED_LENGTH = (Flavor::MAX_TOTAL_RELATION_LENGTH - 1) * (NUM - 1) + 1;
     static constexpr size_t BATCHED_EXTENDED_LENGTH = (Flavor::MAX_TOTAL_RELATION_LENGTH - 1 + NUM - 1) * (NUM - 1) + 1;
     using RelationParameters = bb::RelationParameters<Univariate<FF, EXTENDED_LENGTH>>;
+    using OptimisedRelationParameters = bb::RelationParameters<Univariate<FF, EXTENDED_LENGTH - NUM + 1>>;
     using RelationSeparator = std::array<Univariate<FF, BATCHED_EXTENDED_LENGTH>, NUM_SUBRELATIONS - 1>;
     ArrayType _data;
     RelationParameters relation_parameters;
+    OptimisedRelationParameters optimised_relation_parameters;
     RelationSeparator alphas;
     std::vector<FF> next_gate_challenges;
 

From c739afa56f147c7c63ea2dd53af8cf97474833ea Mon Sep 17 00:00:00 2001
From: Rumata888 <isennovskiy@gmail.com>
Date: Wed, 24 Apr 2024 14:56:02 +0000
Subject: [PATCH 05/13] Different strategy

---
 .../src/barretenberg/eccvm/eccvm_flavor.hpp   |   6 +
 .../barretenberg/polynomials/univariate.hpp   | 188 ++++++++++--------
 .../protogalaxy/protogalaxy_prover.hpp        |  52 ++---
 .../relations/nested_containers.hpp           |  24 ++-
 .../barretenberg/relations/relation_types.hpp |  29 +--
 .../goblin_ultra_flavor.hpp                   |   7 +
 .../stdlib_circuit_builders/ultra_flavor.hpp  |   6 +
 .../sumcheck/instance/instances.hpp           |   6 +-
 .../goblin_translator_flavor.hpp              |   6 +
 .../barretenberg/vm/generated/avm_flavor.hpp  |   6 +
 10 files changed, 177 insertions(+), 153 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
index c3c26a6ef70..3e315bf0d9a 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
@@ -401,6 +401,12 @@ class ECCVMFlavor {
      * @brief A container for univariates used during sumcheck.
      */
     template <size_t LENGTH> using ProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH>>;
+    /**
+     * @brief A container for univariates used during Protogalaxy folding and sumcheck.
+     * @details During folding and sumcheck, the prover evaluates the relations on these univariates.
+     */
+    template <size_t LENGTH, size_t SKIP_COUNT>
+    using OptimisedProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH, 0, SKIP_COUNT>>;
 
     /**
      * @brief A container for univariates produced during the hot loop in sumcheck.
diff --git a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
index 4bab8790d05..edbb358a95c 100644
--- a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
+++ b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
@@ -13,17 +13,17 @@ namespace bb {
  * of the data in those univariates. We do that by taking a view of those elements and then, as needed, using this to
  * populate new containers.
  */
-template <class Fr, size_t view_domain_end, size_t view_domain_start> class UnivariateView;
+template <class Fr, size_t view_domain_end, size_t view_domain_start, size_t skip_count> class UnivariateView;
 
 /**
  * @brief A univariate polynomial represented by its values on {domain_start, domain_start + 1,..., domain_end - 1}. For
  * memory efficiency purposes, we store the evaluations in an array starting from 0 and make the mapping to the right
  * domain under the hood.
  */
-template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate {
+template <class Fr, size_t domain_end, size_t domain_start = 0, size_t skip_count = 0> class Univariate {
   public:
     static constexpr size_t LENGTH = domain_end - domain_start;
-    using View = UnivariateView<Fr, domain_end, domain_start>;
+    using View = UnivariateView<Fr, domain_end, domain_start, skip_count>;
 
     using value_type = Fr; // used to get the type of the elements consistently with std::array
 
@@ -50,7 +50,7 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
         }
     }
     // Construct Univariate from UnivariateView
-    explicit Univariate(UnivariateView<Fr, domain_end, domain_start> in)
+    explicit Univariate(UnivariateView<Fr, domain_end, domain_start, skip_count> in)
         : evaluations{}
     {
         for (size_t i = 0; i < in.evaluations.size(); ++i) {
@@ -77,7 +77,7 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
 
     static Univariate get_random()
     {
-        auto output = Univariate<Fr, domain_end, domain_start>();
+        auto output = Univariate<Fr, domain_end, domain_start, skip_count>();
         for (size_t i = 0; i != LENGTH; ++i) {
             output.value_at(i) = Fr::random_element();
         }
@@ -86,7 +86,7 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
 
     static Univariate zero()
     {
-        auto output = Univariate<Fr, domain_end, domain_start>();
+        auto output = Univariate<Fr, domain_end, domain_start, skip_count>();
         for (size_t i = 0; i != LENGTH; ++i) {
             output.value_at(i) = Fr::zero();
         }
@@ -100,21 +100,25 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
 
     Univariate& operator+=(const Univariate& other)
     {
-        for (size_t i = 0; i < LENGTH; ++i) {
+        evaluations[0] += other.evaluations[0];
+        for (size_t i = skip_count + 1; i < LENGTH; ++i) {
             evaluations[i] += other.evaluations[i];
         }
         return *this;
     }
     Univariate& operator-=(const Univariate& other)
     {
-        for (size_t i = 0; i < LENGTH; ++i) {
+        evaluations[0] -= other.evaluations[0];
+        for (size_t i = skip_count + 1; i < LENGTH; ++i) {
+
             evaluations[i] -= other.evaluations[i];
         }
         return *this;
     }
     Univariate& operator*=(const Univariate& other)
     {
-        for (size_t i = 0; i < LENGTH; ++i) {
+        evaluations[0] *= other.evaluations[0];
+        for (size_t i = skip_count + 1; i < LENGTH; ++i) {
             evaluations[i] *= other.evaluations[i];
         }
         return *this;
@@ -135,8 +139,12 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
     Univariate operator-() const
     {
         Univariate res(*this);
+        size_t i = 0;
         for (auto& eval : res.evaluations) {
-            eval = -eval;
+            if (i == 0 || i >= (skip_count + 1)) {
+                eval = -eval;
+            }
+            i++;
         }
         return res;
     }
@@ -151,23 +159,35 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
     // Operations between Univariate and scalar
     Univariate& operator+=(const Fr& scalar)
     {
+        size_t i = 0;
         for (auto& eval : evaluations) {
-            eval += scalar;
+            if (i == 0 || i >= (skip_count + 1)) {
+                eval += scalar;
+            }
+            i++;
         }
         return *this;
     }
 
     Univariate& operator-=(const Fr& scalar)
     {
+        size_t i = 0;
         for (auto& eval : evaluations) {
-            eval -= scalar;
+            if (i == 0 || i >= (skip_count + 1)) {
+                eval -= scalar;
+            }
+            i++;
         }
         return *this;
     }
     Univariate& operator*=(const Fr& scalar)
     {
+        size_t i = 0;
         for (auto& eval : evaluations) {
-            eval *= scalar;
+            if (i == 0 || i >= (skip_count + 1)) {
+                eval *= scalar;
+            }
+            i++;
         }
         return *this;
     }
@@ -194,45 +214,48 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
     }
 
     // Operations between Univariate and UnivariateView
-    Univariate& operator+=(const UnivariateView<Fr, domain_end, domain_start>& view)
+    Univariate& operator+=(const UnivariateView<Fr, domain_end, domain_start, skip_count>& view)
     {
-        for (size_t i = 0; i < LENGTH; ++i) {
+        evaluations[0] += view.evaluations[0];
+        for (size_t i = skip_count + 1; i < LENGTH; ++i) {
             evaluations[i] += view.evaluations[i];
         }
         return *this;
     }
 
-    Univariate& operator-=(const UnivariateView<Fr, domain_end, domain_start>& view)
+    Univariate& operator-=(const UnivariateView<Fr, domain_end, domain_start, skip_count>& view)
     {
-        for (size_t i = 0; i < LENGTH; ++i) {
+        evaluations[0] -= view.evaluations[0];
+        for (size_t i = skip_count + 1; i < LENGTH; ++i) {
             evaluations[i] -= view.evaluations[i];
         }
         return *this;
     }
 
-    Univariate& operator*=(const UnivariateView<Fr, domain_end, domain_start>& view)
+    Univariate& operator*=(const UnivariateView<Fr, domain_end, domain_start, skip_count>& view)
     {
-        for (size_t i = 0; i < LENGTH; ++i) {
+        evaluations[0] *= view.evaluations[0];
+        for (size_t i = skip_count + 1; i < LENGTH; ++i) {
             evaluations[i] *= view.evaluations[i];
         }
         return *this;
     }
 
-    Univariate operator+(const UnivariateView<Fr, domain_end, domain_start>& view) const
+    Univariate operator+(const UnivariateView<Fr, domain_end, domain_start, skip_count>& view) const
     {
         Univariate res(*this);
         res += view;
         return res;
     }
 
-    Univariate operator-(const UnivariateView<Fr, domain_end, domain_start>& view) const
+    Univariate operator-(const UnivariateView<Fr, domain_end, domain_start, skip_count>& view) const
     {
         Univariate res(*this);
         res -= view;
         return res;
     }
 
-    Univariate operator*(const UnivariateView<Fr, domain_end, domain_start>& view) const
+    Univariate operator*(const UnivariateView<Fr, domain_end, domain_start, skip_count>& view) const
     {
         Univariate res(*this);
         res *= view;
@@ -256,29 +279,31 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
     }
 
     /**
-     * @brief Given a univariate f represented by {f(domain_start), ..., f(domain_end - 1)}, compute the evaluations
-     * {f(domain_end),..., f(extended_domain_end -1)} and return the Univariate represented by  {f(domain_start),...,
-     * f(extended_domain_end -1)}
+     * @brief Given a univariate f represented by {f(domain_start), ..., f(domain_end - 1)}, compute the
+     * evaluations {f(domain_end),..., f(extended_domain_end -1)} and return the Univariate represented by
+     * {f(domain_start),..., f(extended_domain_end -1)}
      *
-     * @details Write v_i = f(x_i) on a the domain {x_{domain_start}, ..., x_{domain_end-1}}. To efficiently compute the
-     * needed values of f, we use the barycentric formula
+     * @details Write v_i = f(x_i) on a the domain {x_{domain_start}, ..., x_{domain_end-1}}. To efficiently
+     * compute the needed values of f, we use the barycentric formula
      *      - f(x) = B(x) Σ_{i=domain_start}^{domain_end-1} v_i / (d_i*(x-x_i))
      * where
      *      - B(x) = Π_{i=domain_start}^{domain_end-1} (x-x_i)
-     *      - d_i  = Π_{j ∈ {domain_start, ..., domain_end-1}, j≠i} (x_i-x_j) for i ∈ {domain_start, ..., domain_end-1}
+     *      - d_i  = Π_{j ∈ {domain_start, ..., domain_end-1}, j≠i} (x_i-x_j) for i ∈ {domain_start, ...,
+     * domain_end-1}
      *
-     * When the domain size is two, extending f = v0(1-X) + v1X to a new value involves just one addition and a
-     * subtraction: setting Δ = v1-v0, the values of f(X) are f(0)=v0, f(1)= v0 + Δ, v2 = f(1) + Δ, v3 = f(2) + Δ...
+     * When the domain size is two, extending f = v0(1-X) + v1X to a new value involves just one addition
+     * and a subtraction: setting Δ = v1-v0, the values of f(X) are f(0)=v0, f(1)= v0 + Δ, v2 = f(1) + Δ, v3
+     * = f(2) + Δ...
      *
      */
     template <size_t EXTENDED_DOMAIN_END, size_t NUM_SKIPPED_INDICES = 0>
-    Univariate<Fr, EXTENDED_DOMAIN_END> extend_to() const
+    Univariate<Fr, EXTENDED_DOMAIN_END, 0, NUM_SKIPPED_INDICES> extend_to() const
     {
-        const size_t EXTENDED_LENGTH = EXTENDED_DOMAIN_END - domain_start + NUM_SKIPPED_INDICES;
+        const size_t EXTENDED_LENGTH = EXTENDED_DOMAIN_END - domain_start;
         using Data = BarycentricData<Fr, LENGTH, EXTENDED_LENGTH>;
         static_assert(EXTENDED_LENGTH >= LENGTH);
 
-        Univariate<Fr, EXTENDED_LENGTH> result;
+        Univariate<Fr, EXTENDED_LENGTH, 0, NUM_SKIPPED_INDICES> result;
 
         std::copy(evaluations.begin(), evaluations.end(), result.evaluations.begin());
 
@@ -315,8 +340,8 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
             //          a*1 + b*1 + c*1 + d = f(1)
             //          a*2^3 + b*2^2 + c*2 + d = f(2)
             //          a*3^3 + b*3^2 + c*3 + d = f(3)
-            // These equations can be rewritten as a matrix equation M * [a, b, c, d] = [f(0), f(1), f(2), f(3)], where
-            // M is:
+            // These equations can be rewritten as a matrix equation M * [a, b, c, d] = [f(0), f(1), f(2),
+            // f(3)], where M is:
             //          0,  0,  0,  1
             //          1,  1,  1,  1
             //          2^3, 2^2, 2,  1
@@ -326,9 +351,9 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
             //      1,	    -5/2,	2,	    -1/2
             //      -11/6,	3,	    -3/2,	1/3
             //      1,	    0,	    0,	    0
-            // To compute these values, we can multiply everything by 6 and multiply by inverse_six at the end for each
-            // coefficient The resulting computation here does 18 field adds, 6 subtracts, 3 muls to compute a, b, c,
-            // and d.
+            // To compute these values, we can multiply everything by 6 and multiply by inverse_six at the
+            // end for each coefficient The resulting computation here does 18 field adds, 6 subtracts, 3
+            // muls to compute a, b, c, and d.
             Fr zero_times_3 = value_at(0) + value_at(0) + value_at(0);
             Fr zero_times_6 = zero_times_3 + zero_times_3;
             Fr zero_times_12 = zero_times_6 + zero_times_6;
@@ -381,16 +406,7 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
                 result.value_at(k) *= Data::full_numerator_values[k];
             }
         }
-        if constexpr (NUM_SKIPPED_INDICES == 0) {
-            return result;
-        }
-        Univariate<Fr, EXTENDED_LENGTH - NUM_SKIPPED_INDICES> optimised_result;
-        optimised_result.value_at(0) = result.value_at(0);
-
-        std::copy(std::next(result.evaluations.begin(), 1 + NUM_SKIPPED_INDICES),
-                  result.evaluations.end(),
-                  std::next(optimised_result.evaluations.begin(), 1));
-        return optimised_result;
+        return result;
     }
 
     /**
@@ -407,8 +423,8 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
             full_numerator_value *= u - i;
         }
 
-        // build set of domain size-many denominator inverses 1/(d_i*(x_k - x_j)). will multiply against each of
-        // these (rather than to divide by something) for each barycentric evaluation
+        // build set of domain size-many denominator inverses 1/(d_i*(x_k - x_j)). will multiply against
+        // each of these (rather than to divide by something) for each barycentric evaluation
         std::array<Fr, LENGTH> denominator_inverses;
         for (size_t i = 0; i != LENGTH; ++i) {
             Fr inv = Data::lagrange_denominators[i];
@@ -451,7 +467,7 @@ inline void write(B& it, Univariate<Fr, domain_end, domain_start> const& univari
     write(it, univariate.evaluations);
 }
 
-template <class Fr, size_t domain_end, size_t domain_start = 0> class UnivariateView {
+template <class Fr, size_t domain_end, size_t domain_start = 0, size_t skip_count = 0> class UnivariateView {
   public:
     static constexpr size_t LENGTH = domain_end - domain_start;
     std::span<const Fr, LENGTH> evaluations;
@@ -461,77 +477,84 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
     const Fr& value_at(size_t i) const { return evaluations[i]; };
 
     template <size_t full_domain_end, size_t full_domain_start = 0>
-    explicit UnivariateView(const Univariate<Fr, full_domain_end, full_domain_start>& univariate_in)
+    explicit UnivariateView(const Univariate<Fr, full_domain_end, full_domain_start, skip_count>& univariate_in)
         : evaluations(std::span<const Fr>(univariate_in.evaluations.data(), LENGTH)){};
 
-    Univariate<Fr, domain_end, domain_start> operator+(const UnivariateView& other) const
+    Univariate<Fr, domain_end, domain_start, skip_count> operator+(const UnivariateView& other) const
     {
-        Univariate<Fr, domain_end, domain_start> res(*this);
+        Univariate<Fr, domain_end, domain_start, skip_count> res(*this);
         res += other;
         return res;
     }
 
-    Univariate<Fr, domain_end, domain_start> operator-(const UnivariateView& other) const
+    Univariate<Fr, domain_end, domain_start, skip_count> operator-(const UnivariateView& other) const
     {
-        Univariate<Fr, domain_end, domain_start> res(*this);
+        Univariate<Fr, domain_end, domain_start, skip_count> res(*this);
         res -= other;
         return res;
     }
 
-    Univariate<Fr, domain_end, domain_start> operator-() const
+    Univariate<Fr, domain_end, domain_start, skip_count> operator-() const
     {
-        Univariate<Fr, domain_end, domain_start> res(*this);
+        Univariate<Fr, domain_end, domain_start, skip_count> res(*this);
+        size_t i = 0;
         for (auto& eval : res.evaluations) {
-            eval = -eval;
+            if (i == 0 || i >= (skip_count + 1)) {
+                eval = -eval;
+            }
+            i++;
         }
         return res;
     }
 
-    Univariate<Fr, domain_end, domain_start> operator*(const UnivariateView& other) const
+    Univariate<Fr, domain_end, domain_start, skip_count> operator*(const UnivariateView& other) const
     {
-        Univariate<Fr, domain_end, domain_start> res(*this);
+        Univariate<Fr, domain_end, domain_start, skip_count> res(*this);
         res *= other;
         return res;
     }
 
-    Univariate<Fr, domain_end, domain_start> operator*(const Univariate<Fr, domain_end, domain_start>& other) const
+    Univariate<Fr, domain_end, domain_start, skip_count> operator*(
+        const Univariate<Fr, domain_end, domain_start, skip_count>& other) const
     {
-        Univariate<Fr, domain_end, domain_start> res(*this);
+        Univariate<Fr, domain_end, domain_start, skip_count> res(*this);
         res *= other;
         return res;
     }
 
-    Univariate<Fr, domain_end, domain_start> operator+(const Univariate<Fr, domain_end, domain_start>& other) const
+    Univariate<Fr, domain_end, domain_start, skip_count> operator+(
+        const Univariate<Fr, domain_end, domain_start, skip_count>& other) const
     {
-        Univariate<Fr, domain_end, domain_start> res(*this);
+        Univariate<Fr, domain_end, domain_start, skip_count> res(*this);
         res += other;
         return res;
     }
 
-    Univariate<Fr, domain_end, domain_start> operator+(const Fr& other) const
+    Univariate<Fr, domain_end, domain_start, skip_count> operator+(const Fr& other) const
     {
-        Univariate<Fr, domain_end, domain_start> res(*this);
+        Univariate<Fr, domain_end, domain_start, skip_count> res(*this);
         res += other;
         return res;
     }
 
-    Univariate<Fr, domain_end, domain_start> operator-(const Fr& other) const
+    Univariate<Fr, domain_end, domain_start, skip_count> operator-(const Fr& other) const
     {
-        Univariate<Fr, domain_end, domain_start> res(*this);
+        Univariate<Fr, domain_end, domain_start, skip_count> res(*this);
         res -= other;
         return res;
     }
 
-    Univariate<Fr, domain_end, domain_start> operator*(const Fr& other) const
+    Univariate<Fr, domain_end, domain_start, skip_count> operator*(const Fr& other) const
     {
-        Univariate<Fr, domain_end, domain_start> res(*this);
+        Univariate<Fr, domain_end, domain_start, skip_count> res(*this);
         res *= other;
         return res;
     }
 
-    Univariate<Fr, domain_end, domain_start> operator-(const Univariate<Fr, domain_end, domain_start>& other) const
+    Univariate<Fr, domain_end, domain_start, skip_count> operator-(
+        const Univariate<Fr, domain_end, domain_start, skip_count>& other) const
     {
-        Univariate<Fr, domain_end, domain_start> res(*this);
+        Univariate<Fr, domain_end, domain_start, skip_count> res(*this);
         res -= other;
         return res;
     }
@@ -554,8 +577,8 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
 };
 
 /**
- * @brief Create a sub-array of `elements` at the indices given in the template pack `Is`, converting them to the new
- * type T.
+ * @brief Create a sub-array of `elements` at the indices given in the template pack `Is`, converting them
+ * to the new type T.
  *
  * @tparam T type to convert to
  * @tparam U type to convert from
@@ -563,8 +586,8 @@ template <class Fr, size_t domain_end, size_t domain_start = 0> class Univariate
  * @tparam Is list of indices we want in the returned array. When the second argument is called with
  * `std::make_index_sequence<N>`, these will be `0, 1, ..., N-1`.
  * @param elements array to convert from
- * @return std::array<T, sizeof...(Is)> result array s.t. result[i] = T(elements[Is[i]]). By default, Is[i] = i when
- * called with `std::make_index_sequence<N>`.
+ * @return std::array<T, sizeof...(Is)> result array s.t. result[i] = T(elements[Is[i]]). By default, Is[i]
+ * = i when called with `std::make_index_sequence<N>`.
  */
 template <typename T, typename U, std::size_t N, std::size_t... Is>
 std::array<T, sizeof...(Is)> array_to_array_aux(const std::array<U, N>& elements, std::index_sequence<Is...>)
@@ -576,11 +599,12 @@ std::array<T, sizeof...(Is)> array_to_array_aux(const std::array<U, N>& elements
  * @brief Given an std::array<U,N>, returns an std::array<T,N>, by calling the (explicit) constructor T(U).
  *
  * @details https://stackoverflow.com/a/32175958
- * The main use case is to convert an array of `Univariate` into `UnivariateView`. The main use case would be to let
- * Sumcheck decide the required degree of the relation evaluation, rather than hardcoding it inside the relation. The
- * `_aux` version could also be used to create an array of only the polynomials required by the relation, and it could
- * help us implement the optimization where we extend each edge only up to the maximum degree that is required over all
- * relations (for example, `L_LAST` only needs degree 3).
+ * The main use case is to convert an array of `Univariate` into `UnivariateView`. The main use case would
+ * be to let Sumcheck decide the required degree of the relation evaluation, rather than hardcoding it
+ * inside the relation. The
+ * `_aux` version could also be used to create an array of only the polynomials required by the relation,
+ * and it could help us implement the optimization where we extend each edge only up to the maximum degree
+ * that is required over all relations (for example, `L_LAST` only needs degree 3).
  *
  * @tparam T Output type
  * @tparam U Input type (deduced from `elements`)
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
index 7c3d96ebaa5..84b6ca8b23e 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
@@ -50,7 +50,10 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
     // obtained by composing a relation with folded instance + relation parameters .
     using ExtendedUnivariate = Univariate<FF, (Flavor::MAX_TOTAL_RELATION_LENGTH - 1) * (ProverInstances::NUM - 1) + 1>;
     using OptimisedExtendedUnivariate =
-        Univariate<FF, (Flavor::MAX_TOTAL_RELATION_LENGTH - 2) * (ProverInstances::NUM - 1) + 1>;
+        Univariate<FF,
+                   (Flavor::MAX_TOTAL_RELATION_LENGTH - 1) * (ProverInstances::NUM - 1) + 1,
+                   0,
+                   ProverInstances::NUM - 1>;
     // Represents the total length of the combiner univariate, obtained by combining the already folded relations with
     // the folded relation batching challenge.
     using ExtendedUnivariateWithRandomization =
@@ -58,7 +61,7 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
                    (Flavor::MAX_TOTAL_RELATION_LENGTH - 1 + ProverInstances::NUM - 1) * (ProverInstances::NUM - 1) + 1>;
     using ExtendedUnivariates = typename Flavor::template ProverUnivariates<ExtendedUnivariate::LENGTH>;
     using OptimisedExtendedUnivariates =
-        typename Flavor::template ProverUnivariates<ExtendedUnivariate::LENGTH - (1 * (ProverInstances::NUM - 1))>;
+        typename Flavor::template OptimisedProverUnivariates<ExtendedUnivariate::LENGTH, ProverInstances::NUM - 1>;
 
     using TupleOfTuplesOfUnivariates =
         typename Flavor::template ProtogalaxyTupleOfTuplesOfUnivariates<ProverInstances::NUM>;
@@ -287,32 +290,14 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
      * (i.e., compute additional evaluations at adjacent domain values) as needed.
      * @todo TODO(https://github.com/AztecProtocol/barretenberg/issues/751) Optimize memory
      */
-    void extend_univariates(ExtendedUnivariates& extended_univariates,
+    template <size_t skip_count = 0>
+    void extend_univariates(OptimisedExtendedUnivariates& extended_univariates,
                             const ProverInstances& instances,
                             const size_t row_idx)
     {
-        auto base_univariates = instances.row_to_univariates(row_idx);
+        auto base_univariates = instances.template row_to_univariates<skip_count>(row_idx);
         for (auto [extended_univariate, base_univariate] : zip_view(extended_univariates.get_all(), base_univariates)) {
-            extended_univariate = base_univariate.template extend_to<ExtendedUnivariate::LENGTH>();
-        }
-    }
-
-    /**
-     * @brief Prepare a univariate polynomial for relation execution in one step of the main loop in folded instance
-     * construction.
-     * @details For a fixed prover polynomial index, extract that polynomial from each instance in Instances. From each
-     * polynomial, extract the value at row_idx. Use these values to create a univariate polynomial, and then extend
-     * (i.e., compute additional evaluations at adjacent domain values) as needed.
-     * @todo TODO(https://github.com/AztecProtocol/barretenberg/issues/751) Optimize memory
-     */
-    void optimised_extend_univariates(OptimisedExtendedUnivariates& extended_univariates,
-                                      const ProverInstances& instances,
-                                      const size_t row_idx)
-    {
-        auto base_univariates = instances.row_to_univariates(row_idx);
-        for (auto [extended_univariate, base_univariate] : zip_view(extended_univariates.get_all(), base_univariates)) {
-            extended_univariate =
-                base_univariate.template extend_to<OptimisedExtendedUnivariate::LENGTH, ProverInstances::NUM - 1>();
+            extended_univariate = base_univariate.template extend_to<ExtendedUnivariate::LENGTH, skip_count>();
         }
     }
 
@@ -371,7 +356,7 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
 
             for (size_t idx = start; idx < end; idx++) {
                 // No need to initialise extended_univariates to 0, it's assigned to
-                optimised_extend_univariates(extended_univariates[thread_idx], instances, idx);
+                extend_univariates<ProverInstances::NUM - 1>(extended_univariates[thread_idx], instances, idx);
 
                 FF pow_challenge = pow_betas[idx];
 
@@ -391,31 +376,24 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
         for (auto& accumulators : thread_univariate_accumulators) {
             Utils::add_nested_tuples(optimised_univariate_accumulators, accumulators);
         }
-        deoptimise_univariates(optimised_univariate_accumulators, univariate_accumulators);
+        zero_skipped_indices(optimised_univariate_accumulators);
         // Batch the univariate contributions from each sub-relation to obtain the round univariate
         return batch_over_relations(univariate_accumulators, instances.alphas);
     }
 
-    static void deoptimise_univariates(OptimisedTupleOfTuplesOfUnivariates& optimised_univariate_accumulators,
-                                       TupleOfTuplesOfUnivariates& univariate_accumulators
+    static void zero_skipped_indices(OptimisedTupleOfTuplesOfUnivariates& optimised_univariate_accumulators
 
     )
     {
         auto deoptimise = [&]<size_t outer_idx, size_t inner_idx>(auto& element) {
-            auto& optimised_element = std::get<inner_idx>(std::get<outer_idx>(optimised_univariate_accumulators));
-            static_assert(std::remove_reference_t<decltype(optimised_element)>::LENGTH + (ProverInstances::NUM - 1) ==
-                          std::remove_reference_t<decltype(element)>::LENGTH);
-            element.evaluations[0] = optimised_element.evaluations[0];
+            // auto& optimised_element = std::get<inner_idx>(std::get<outer_idx>(optimised_univariate_accumulators));
             for (size_t i = 1; i < ProverInstances::NUM; i++) {
                 element.evaluations[i] = FF(0);
             }
-            for (size_t i = 1; i < std::remove_reference_t<decltype(optimised_element)>::LENGTH; i++) {
-                element.evaluations[i + ProverInstances::NUM - 1] = optimised_element.evaluations[i];
-            }
-            info("Element ", outer_idx, ".", inner_idx, "[", ":", "] = ", element);
+            // info("Element ", outer_idx, ".", inner_idx, "[", ":", "] = ", element);
         };
 
-        Utils::template apply_to_tuple_of_tuples<0, 0>(univariate_accumulators, deoptimise);
+        Utils::template apply_to_tuple_of_tuples<0, 0>(optimised_univariate_accumulators, deoptimise);
     }
 
     static ExtendedUnivariateWithRandomization batch_over_relations(TupleOfTuplesOfUnivariates& univariate_accumulators,
diff --git a/barretenberg/cpp/src/barretenberg/relations/nested_containers.hpp b/barretenberg/cpp/src/barretenberg/relations/nested_containers.hpp
index 46f2d246303..36a522eb161 100644
--- a/barretenberg/cpp/src/barretenberg/relations/nested_containers.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/nested_containers.hpp
@@ -10,30 +10,42 @@ namespace bb {
  *
  * @details Credit: https://stackoverflow.com/a/60440611
  */
-template <template <typename, size_t, size_t> typename InnerContainer,
+template <template <typename, size_t, size_t, size_t> typename InnerContainer,
           typename ValueType,
           auto domain_end,
           size_t domain_start = 0,
+          size_t skip_count = 0,
           typename IS = decltype(std::make_index_sequence<domain_end.size()>())>
 struct TupleOfContainersOverArray;
-template <template <typename, size_t, size_t> typename InnerContainer,
+template <template <typename, size_t, size_t, size_t> typename InnerContainer,
           typename ValueType,
           auto domain_end,
           size_t domain_start,
+          size_t skip_count,
           std::size_t... I>
-struct TupleOfContainersOverArray<InnerContainer, ValueType, domain_end, domain_start, std::index_sequence<I...>> {
-    using type = std::tuple<InnerContainer<ValueType, domain_end[I], domain_start>...>;
+struct TupleOfContainersOverArray<InnerContainer,
+                                  ValueType,
+                                  domain_end,
+                                  domain_start,
+                                  skip_count,
+                                  std::index_sequence<I...>> {
+    using type = std::tuple<InnerContainer<ValueType, domain_end[I], domain_start, skip_count>...>;
 };
 
 // Helpers
-template <typename ValueType, size_t, size_t> using ExtractValueType = ValueType;
+template <typename ValueType, size_t, size_t, size_t> using ExtractValueType = ValueType;
 
 template <typename Tuple>
 using HomogeneousTupleToArray = std::array<std::tuple_element_t<0, Tuple>, std::tuple_size_v<Tuple>>;
 
 // Types needed for sumcheck and folding.
 template <typename FF, auto LENGTHS>
-using TupleOfUnivariates = typename TupleOfContainersOverArray<bb::Univariate, FF, LENGTHS, 0>::type;
+using TupleOfUnivariates = typename TupleOfContainersOverArray<bb::Univariate, FF, LENGTHS, 0, 0>::type;
+
+// Types needed for sumcheck and folding.
+template <typename FF, auto LENGTHS, size_t SKIP_COUNT>
+using OptimisedTupleOfUnivariates =
+    typename TupleOfContainersOverArray<bb::Univariate, FF, LENGTHS, 0, SKIP_COUNT>::type;
 
 template <typename FF, auto LENGTHS>
 using TupleOfValues = typename TupleOfContainersOverArray<ExtractValueType, FF, LENGTHS>::type;
diff --git a/barretenberg/cpp/src/barretenberg/relations/relation_types.hpp b/barretenberg/cpp/src/barretenberg/relations/relation_types.hpp
index 87f1cfdd0d0..502d83df872 100644
--- a/barretenberg/cpp/src/barretenberg/relations/relation_types.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/relation_types.hpp
@@ -89,28 +89,6 @@ consteval std::array<size_t, NUM_SUBRELATIONS> compute_composed_subrelation_part
     return SUBRELATION_PARTIAL_LENGTHS;
 };
 
-/**
- * @brief Get the subrelation accumulators for the Protogalaxy combiner calculation.
- * @details A subrelation of degree D, when evaluated on polynomials of degree N, gives a polynomial of degree D
- * * N. In the context of Protogalaxy, N = NUM_INSTANCES-1. Hence, given a subrelation of length x, its
- * evaluation on such polynomials will have degree (x-1) * (NUM_INSTANCES-1), and the length of this evaluation
- * will be one greater than this.
- * @tparam NUM_INSTANCES
- * @tparam NUM_SUBRELATIONS
- * @param SUBRELATION_PARTIAL_LENGTHS The array of subrelation lengths supplied by a relation.
- * @return The transformed subrelation lenths
- */
-template <size_t NUM_INSTANCES, size_t NUM_SUBRELATIONS>
-consteval std::array<size_t, NUM_SUBRELATIONS> compute_optimised_composed_subrelation_partial_lengths(
-    std::array<size_t, NUM_SUBRELATIONS> SUBRELATION_PARTIAL_LENGTHS)
-{
-    std::transform(SUBRELATION_PARTIAL_LENGTHS.begin(),
-                   SUBRELATION_PARTIAL_LENGTHS.end(),
-                   SUBRELATION_PARTIAL_LENGTHS.begin(),
-                   [](const size_t x) { return (x - 2) * (NUM_INSTANCES - 1) + 1; });
-    return SUBRELATION_PARTIAL_LENGTHS;
-};
-
 /**
  * @brief The templates defined herein facilitate sharing the relation arithmetic between the prover and the
  * verifier.
@@ -173,9 +151,10 @@ template <typename RelationImpl> class Relation : public RelationImpl {
         TupleOfUnivariates<FF, compute_composed_subrelation_partial_lengths<NUM_INSTANCES>(SUBRELATION_TOTAL_LENGTHS)>;
     template <size_t NUM_INSTANCES>
     using OptimisedProtogalaxyTupleOfUnivariatesOverSubrelations =
-        TupleOfUnivariates<FF,
-                           compute_optimised_composed_subrelation_partial_lengths<NUM_INSTANCES>(
-                               SUBRELATION_TOTAL_LENGTHS)>;
+        OptimisedTupleOfUnivariates<FF,
+                                    compute_composed_subrelation_partial_lengths<NUM_INSTANCES>(
+                                        SUBRELATION_TOTAL_LENGTHS),
+                                    NUM_INSTANCES - 1>;
     using SumcheckTupleOfUnivariatesOverSubrelations =
         TupleOfUnivariates<FF, RelationImpl::SUBRELATION_PARTIAL_LENGTHS>;
     using SumcheckArrayOfValuesOverSubrelations = ArrayOfValues<FF, RelationImpl::SUBRELATION_PARTIAL_LENGTHS>;
diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/goblin_ultra_flavor.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/goblin_ultra_flavor.hpp
index d5f4b8a2c4e..aac9f279f1f 100644
--- a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/goblin_ultra_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/goblin_ultra_flavor.hpp
@@ -454,6 +454,13 @@ class GoblinUltraFlavor {
      */
     template <size_t LENGTH> using ProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH>>;
 
+    /**
+     * @brief A container for univariates used during Protogalaxy folding and sumcheck.
+     * @details During folding and sumcheck, the prover evaluates the relations on these univariates.
+     */
+    template <size_t LENGTH, size_t SKIP_COUNT>
+    using OptimisedProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH, 0, SKIP_COUNT>>;
+
     /**
      * @brief A container for univariates produced during the hot loop in sumcheck.
      */
diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp
index 618dedcecb3..16886aacef0 100644
--- a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp
@@ -475,6 +475,12 @@ class UltraFlavor {
      * @details During folding and sumcheck, the prover evaluates the relations on these univariates.
      */
     template <size_t LENGTH> using ProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH>>;
+    /**
+     * @brief A container for univariates used during Protogalaxy folding and sumcheck.
+     * @details During folding and sumcheck, the prover evaluates the relations on these univariates.
+     */
+    template <size_t LENGTH, size_t SKIP_COUNT>
+    using OptimisedProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH, 0, SKIP_COUNT>>;
 
     /**
      * @brief A container for univariates produced during the hot loop in sumcheck.
diff --git a/barretenberg/cpp/src/barretenberg/sumcheck/instance/instances.hpp b/barretenberg/cpp/src/barretenberg/sumcheck/instance/instances.hpp
index e436270a49c..a09c9aca88f 100644
--- a/barretenberg/cpp/src/barretenberg/sumcheck/instance/instances.hpp
+++ b/barretenberg/cpp/src/barretenberg/sumcheck/instance/instances.hpp
@@ -18,7 +18,7 @@ template <typename Flavor_, size_t NUM_ = 2> struct ProverInstances_ {
     static constexpr size_t EXTENDED_LENGTH = (Flavor::MAX_TOTAL_RELATION_LENGTH - 1) * (NUM - 1) + 1;
     static constexpr size_t BATCHED_EXTENDED_LENGTH = (Flavor::MAX_TOTAL_RELATION_LENGTH - 1 + NUM - 1) * (NUM - 1) + 1;
     using RelationParameters = bb::RelationParameters<Univariate<FF, EXTENDED_LENGTH>>;
-    using OptimisedRelationParameters = bb::RelationParameters<Univariate<FF, EXTENDED_LENGTH - NUM + 1>>;
+    using OptimisedRelationParameters = bb::RelationParameters<Univariate<FF, EXTENDED_LENGTH, 0, NUM_ - 1>>;
     using RelationSeparator = std::array<Univariate<FF, BATCHED_EXTENDED_LENGTH>, NUM_SUBRELATIONS - 1>;
     ArrayType _data;
     RelationParameters relation_parameters;
@@ -56,10 +56,10 @@ template <typename Flavor_, size_t NUM_ = 2> struct ProverInstances_ {
      * @param row_idx A fixed row position in several execution traces
      * @return The univariates whose extensions will be used to construct the combiner.
      */
-    auto row_to_univariates(size_t row_idx) const
+    template <size_t skip_count = 0> auto row_to_univariates(size_t row_idx) const
     {
         auto insts_prover_polynomials_views = get_polynomials_views();
-        std::array<Univariate<FF, NUM>, insts_prover_polynomials_views[0].size()> results;
+        std::array<Univariate<FF, NUM, 0, skip_count>, insts_prover_polynomials_views[0].size()> results;
         // Set the size corresponding to the number of rows in the execution trace
         size_t instance_idx = 0;
         // Iterate over the prover polynomials' views corresponding to each instance
diff --git a/barretenberg/cpp/src/barretenberg/translator_vm/goblin_translator_flavor.hpp b/barretenberg/cpp/src/barretenberg/translator_vm/goblin_translator_flavor.hpp
index 46eac85ce3a..50d9e2fe5e2 100644
--- a/barretenberg/cpp/src/barretenberg/translator_vm/goblin_translator_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/translator_vm/goblin_translator_flavor.hpp
@@ -879,6 +879,12 @@ class GoblinTranslatorFlavor {
      * @brief A container for univariates used during sumcheck.
      */
     template <size_t LENGTH> using ProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH>>;
+    /**
+     * @brief A container for univariates used during Protogalaxy folding and sumcheck.
+     * @details During folding and sumcheck, the prover evaluates the relations on these univariates.
+     */
+    template <size_t LENGTH, size_t SKIP_COUNT>
+    using OptimisedProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH, 0, SKIP_COUNT>>;
 
     /**
      * @brief A container for univariates produced during the hot loop in sumcheck.
diff --git a/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp b/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp
index 23e33c7df7e..ea522018e0c 100644
--- a/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp
@@ -1707,6 +1707,12 @@ class AvmFlavor {
      * @details During folding and sumcheck, the prover evaluates the relations on these univariates.
      */
     template <size_t LENGTH> using ProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH>>;
+    /**
+     * @brief A container for univariates used during Protogalaxy folding and sumcheck.
+     * @details During folding and sumcheck, the prover evaluates the relations on these univariates.
+     */
+    template <size_t LENGTH, size_t SKIP_COUNT>
+    using OptimisedProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH, 0, SKIP_COUNT>>;
 
     /**
      * @brief A container for univariates produced during the hot loop in sumcheck.

From e38871e040cd707966e6287a58150839be40315b Mon Sep 17 00:00:00 2001
From: Rumata888 <isennovskiy@gmail.com>
Date: Thu, 25 Apr 2024 15:00:13 +0000
Subject: [PATCH 06/13] PG tests passing

---
 .../barretenberg/polynomials/univariate.hpp   |  16 ++-
 .../protogalaxy/combiner.test.cpp             |   2 +-
 .../protogalaxy/protogalaxy_prover.hpp        | 120 +++++++++++++++---
 3 files changed, 115 insertions(+), 23 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
index edbb358a95c..cb859eb74cb 100644
--- a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
+++ b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
@@ -40,8 +40,20 @@ template <class Fr, size_t domain_end, size_t domain_start = 0, size_t skip_coun
     Univariate(Univariate&& other) noexcept = default;
     Univariate& operator=(const Univariate& other) = default;
     Univariate& operator=(Univariate&& other) noexcept = default;
-    // Construct constant Univariate from scalar which represents the value that all the points in the domain evaluate
-    // to
+    Univariate<Fr, domain_end, domain_start> convert() const noexcept
+    {
+        Univariate<Fr, domain_end, domain_start, 0> result;
+        result.evaluations[0] = evaluations[0];
+        for (size_t i = 1; i < skip_count + 1; i++) {
+            result.evaluations[i] = Fr::zero();
+        }
+        for (size_t i = skip_count + 1; i < LENGTH; i++) {
+            result.evaluations[i] = evaluations[i];
+        }
+        return result;
+    }
+    // Construct constant Univariate from scalar which represents the value that all the points in the domain
+    // evaluate to
     explicit Univariate(Fr value)
         : evaluations{}
     {
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp b/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp
index 2e6f87f73f9..7bfed96bef7 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp
@@ -53,7 +53,7 @@ TEST(Protogalaxy, CombinerOn2Instances)
             ProverInstances instances{ instance_data };
             instances.alphas.fill(bb::Univariate<FF, 12>(FF(0))); // focus on the arithmetic relation only
             auto pow_polynomial = PowPolynomial(std::vector<FF>{ 2 });
-            auto result = prover.compute_combiner(instances, pow_polynomial);
+            auto result = prover.compute_combiner<true>(instances, pow_polynomial);
             auto expected_result = Univariate<FF, 12>(std::array<FF, 12>{
                 87706,
                 13644570,
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
index 84b6ca8b23e..06dcd3cfe84 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
@@ -301,6 +301,16 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
         }
     }
 
+    void extend_univariates(ExtendedUnivariates& extended_univariates,
+                            const ProverInstances& instances,
+                            const size_t row_idx)
+    {
+        auto base_univariates = instances.template row_to_univariates(row_idx);
+        for (auto [extended_univariate, base_univariate] : zip_view(extended_univariates.get_all(), base_univariates)) {
+            extended_univariate = base_univariate.template extend_to<ExtendedUnivariate::LENGTH>();
+        }
+    }
+
     template <typename Parameters, size_t relation_idx = 0>
     void accumulate_relation_univariates(OptimisedTupleOfTuplesOfUnivariates& univariate_accumulators,
                                          const OptimisedExtendedUnivariates& extended_univariates,
@@ -318,10 +328,28 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
         }
     }
 
+    template <typename Parameters, size_t relation_idx = 0>
+    void accumulate_relation_univariates(TupleOfTuplesOfUnivariates& univariate_accumulators,
+                                         const ExtendedUnivariates& extended_univariates,
+                                         const Parameters& relation_parameters,
+                                         const FF& scaling_factor)
+    {
+        using Relation = std::tuple_element_t<relation_idx, Relations>;
+        Relation::accumulate(
+            std::get<relation_idx>(univariate_accumulators), extended_univariates, relation_parameters, scaling_factor);
+
+        // Repeat for the next relation.
+        if constexpr (relation_idx + 1 < Flavor::NUM_RELATIONS) {
+            accumulate_relation_univariates<Parameters, relation_idx + 1>(
+                univariate_accumulators, extended_univariates, relation_parameters, scaling_factor);
+        }
+    }
+
     /**
      * @brief Compute the combiner polynomial $G$ in the Protogalaxy paper.
      *
      */
+    template <bool disable_optimisation = false>
     ExtendedUnivariateWithRandomization compute_combiner(const ProverInstances& instances, PowPolynomial<FF>& pow_betas)
     {
         BB_OP_COUNT_TIME();
@@ -338,15 +366,21 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
         size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified
         num_threads = num_threads > 0 ? num_threads : 1;                     // ensure num threads is >= 1
         size_t iterations_per_thread = common_instance_size / num_threads;   // actual iterations per thread
+
         // Construct univariate accumulator containers; one per thread
-        std::vector<OptimisedTupleOfTuplesOfUnivariates> thread_univariate_accumulators(num_threads);
+        using ThreadAccumulators =
+            std::conditional_t<disable_optimisation, TupleOfTuplesOfUnivariates, OptimisedTupleOfTuplesOfUnivariates>;
+        using ExtendedUnivatiatesType =
+            std::conditional_t<disable_optimisation, ExtendedUnivariates, OptimisedExtendedUnivariates>;
+
+        std::vector<ThreadAccumulators> thread_univariate_accumulators(num_threads);
         for (auto& accum : thread_univariate_accumulators) {
             // just normal relation lengths
             Utils::zero_univariates(accum);
         }
 
         // Construct extended univariates containers; one per thread
-        std::vector<OptimisedExtendedUnivariates> extended_univariates;
+        std::vector<ExtendedUnivatiatesType> extended_univariates;
         extended_univariates.resize(num_threads);
 
         // Accumulate the contribution from each sub-relation
@@ -356,31 +390,67 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
 
             for (size_t idx = start; idx < end; idx++) {
                 // No need to initialise extended_univariates to 0, it's assigned to
-                extend_univariates<ProverInstances::NUM - 1>(extended_univariates[thread_idx], instances, idx);
+                if constexpr (disable_optimisation) {
+                    extend_univariates(extended_univariates[thread_idx], instances, idx);
+                } else {
+                    extend_univariates<ProverInstances::NUM - 1>(extended_univariates[thread_idx], instances, idx);
+                }
 
                 FF pow_challenge = pow_betas[idx];
 
                 // Accumulate the i-th row's univariate contribution. Note that the relation parameters passed to
                 // this function have already been folded. Moreover, linear-dependent relations that act over the
                 // entire execution trace rather than on rows, will not be multiplied by the pow challenge.
-                accumulate_relation_univariates(
-                    thread_univariate_accumulators[thread_idx],
-                    extended_univariates[thread_idx],
-                    instances.optimised_relation_parameters, // these parameters have already been folded
-                    pow_challenge);
+                if constexpr (disable_optimisation) {
+                    accumulate_relation_univariates(
+                        thread_univariate_accumulators[thread_idx],
+                        extended_univariates[thread_idx],
+                        instances.relation_parameters, // these parameters have already been folded
+                        pow_challenge);
+                } else {
+                    accumulate_relation_univariates(
+                        thread_univariate_accumulators[thread_idx],
+                        extended_univariates[thread_idx],
+                        instances.optimised_relation_parameters, // these parameters have already been folded
+                        pow_challenge);
+                }
             }
         });
-        OptimisedTupleOfTuplesOfUnivariates optimised_univariate_accumulators;
-        Utils::zero_univariates(optimised_univariate_accumulators);
-        // Accumulate the per-thread univariate accumulators into a single set of accumulators
-        for (auto& accumulators : thread_univariate_accumulators) {
-            Utils::add_nested_tuples(optimised_univariate_accumulators, accumulators);
+        if constexpr (disable_optimisation) {
+            Utils::zero_univariates(univariate_accumulators);
+            for (auto& accumulators : thread_univariate_accumulators) {
+                Utils::add_nested_tuples(univariate_accumulators, accumulators);
+            }
+
+            return batch_over_relations(univariate_accumulators, instances.alphas);
+        } else {
+            OptimisedTupleOfTuplesOfUnivariates optimised_univariate_accumulators;
+            Utils::zero_univariates(optimised_univariate_accumulators);
+            // Accumulate the per-thread univariate accumulators into a single set of accumulators
+            for (auto& accumulators : thread_univariate_accumulators) {
+                Utils::add_nested_tuples(optimised_univariate_accumulators, accumulators);
+            }
+
+            zero_skipped_indices(optimised_univariate_accumulators);
+            // print_comparison(optimised_univariate_accumulators, univariate_accumulators);
+            deoptimise_univariates(optimised_univariate_accumulators, univariate_accumulators);
+            //  Batch the univariate contributions from each sub-relation to obtain the round univariate
+            return batch_over_relations(univariate_accumulators, instances.alphas);
         }
-        zero_skipped_indices(optimised_univariate_accumulators);
-        // Batch the univariate contributions from each sub-relation to obtain the round univariate
-        return batch_over_relations(univariate_accumulators, instances.alphas);
     }
+    static void deoptimise_univariates(const OptimisedTupleOfTuplesOfUnivariates& optimised_univariate_accumulators,
+                                       TupleOfTuplesOfUnivariates& new_univariate_accumulators
 
+    )
+    {
+        auto deoptimise = [&]<size_t outer_idx, size_t inner_idx>(auto& element) {
+            auto& optimised_element = std::get<inner_idx>(std::get<outer_idx>(optimised_univariate_accumulators));
+            element = optimised_element.convert();
+            // info("Element ", outer_idx, ".", inner_idx, "[", ":", "] = ", element);
+        };
+
+        Utils::template apply_to_tuple_of_tuples<0, 0>(new_univariate_accumulators, deoptimise);
+    }
     static void zero_skipped_indices(OptimisedTupleOfTuplesOfUnivariates& optimised_univariate_accumulators
 
     )
@@ -395,6 +465,18 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
 
         Utils::template apply_to_tuple_of_tuples<0, 0>(optimised_univariate_accumulators, deoptimise);
     }
+    static void print_comparison(OptimisedTupleOfTuplesOfUnivariates& optimised_univariate_accumulators,
+                                 TupleOfTuplesOfUnivariates& new_univariate_accumulators)
+
+    {
+        auto deoptimise = [&]<size_t outer_idx, size_t inner_idx>(auto& element) {
+            auto& optimised_element = std::get<inner_idx>(std::get<outer_idx>(optimised_univariate_accumulators));
+            info("Element ", outer_idx, ".", inner_idx, "[", ":", "] = ", element);
+            info("Optimisation ", outer_idx, ".", inner_idx, "[", ":", "] = ", optimised_element);
+        };
+
+        Utils::template apply_to_tuple_of_tuples<0, 0>(new_univariate_accumulators, deoptimise);
+    }
 
     static ExtendedUnivariateWithRandomization batch_over_relations(TupleOfTuplesOfUnivariates& univariate_accumulators,
                                                                     const CombinedRelationSeparator& alpha)
@@ -478,10 +560,8 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
                 instance_idx++;
             }
             folded_parameter = tmp.template extend_to<ProverInstances::EXTENDED_LENGTH>();
-            optimised_folded_parameter.value_at(0) = folded_parameter.value_at(0);
-            std::copy(std::next(folded_parameter.evaluations.begin(), ProverInstances::NUM),
-                      folded_parameter.evaluations.end(),
-                      std::next(optimised_folded_parameter.evaluations.begin(), 1));
+            optimised_folded_parameter =
+                tmp.template extend_to<ProverInstances::EXTENDED_LENGTH, ProverInstances::NUM - 1>();
             param_idx++;
         }
     }

From 2f098900c1845f5531b40a66323155b31665782f Mon Sep 17 00:00:00 2001
From: Rumata888 <isennovskiy@gmail.com>
Date: Thu, 25 Apr 2024 18:14:32 +0000
Subject: [PATCH 07/13] comments

---
 .../src/barretenberg/eccvm/eccvm_flavor.hpp   |  7 +-
 .../cpp/src/barretenberg/flavor/flavor.hpp    |  4 +-
 .../barretenberg/polynomials/univariate.hpp   | 10 ++
 .../protogalaxy/combiner.test.cpp             |  2 +-
 .../protogalaxy/protogalaxy_prover.hpp        | 91 +++++++++----------
 .../goblin_ultra_flavor.hpp                   |  3 +-
 .../sumcheck/instance/instances.hpp           |  1 +
 .../goblin_translator_flavor.hpp              |  3 +-
 .../barretenberg/vm/generated/avm_flavor.hpp  |  3 +-
 9 files changed, 64 insertions(+), 60 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
index 3e315bf0d9a..f3e00d86fab 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
@@ -398,12 +398,13 @@ class ECCVMFlavor {
     };
 
     /**
-     * @brief A container for univariates used during sumcheck.
+     * @brief A container for univariates used during Protogalaxy and sumcheck.
      */
     template <size_t LENGTH> using ProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH>>;
     /**
-     * @brief A container for univariates used during Protogalaxy folding and sumcheck.
-     * @details During folding and sumcheck, the prover evaluates the relations on these univariates.
+     * @brief A container for univariates used during Protogalaxy folding in 'optimised' mode.
+     * @details Univariates in the optimised version skip some redundant computation the result of which we already know
+     * (optimistically)
      */
     template <size_t LENGTH, size_t SKIP_COUNT>
     using OptimisedProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH, 0, SKIP_COUNT>>;
diff --git a/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp b/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp
index 6be12a498ff..09f7d61cf36 100644
--- a/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp
@@ -243,9 +243,7 @@ static constexpr auto create_protogalaxy_tuple_of_tuples_of_univariates()
 
 /**
  * @brief Recursive utility function to construct a container for the subrelation accumulators of Protogalaxy folding.
- * @details The size of the outer tuple is equal to the number of relations. Each relation contributes an inner tuple of
- * univariates whose size is equal to the number of subrelations of the relation. The length of a univariate in an inner
- * tuple is determined by the corresponding subrelation length and the number of instances to be folded.
+ * @details Differs from the non-optimised method by using optimised univariates that skip redundant computation
  */
 template <typename Tuple, size_t NUM_INSTANCES, size_t Index = 0>
 static constexpr auto create_optimised_protogalaxy_tuple_of_tuples_of_univariates()
diff --git a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
index cb859eb74cb..21efa2dad83 100644
--- a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
+++ b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
@@ -19,6 +19,9 @@ template <class Fr, size_t view_domain_end, size_t view_domain_start, size_t ski
  * @brief A univariate polynomial represented by its values on {domain_start, domain_start + 1,..., domain_end - 1}. For
  * memory efficiency purposes, we store the evaluations in an array starting from 0 and make the mapping to the right
  * domain under the hood.
+ *
+ * @tparam skip_count Skip computing the values of elements [domain_start+1,..,domain_start+skip_count]. Used for
+ * optimising computation in protogalaxy
  */
 template <class Fr, size_t domain_end, size_t domain_start = 0, size_t skip_count = 0> class Univariate {
   public:
@@ -40,6 +43,13 @@ template <class Fr, size_t domain_end, size_t domain_start = 0, size_t skip_coun
     Univariate(Univariate&& other) noexcept = default;
     Univariate& operator=(const Univariate& other) = default;
     Univariate& operator=(Univariate&& other) noexcept = default;
+
+    /**
+     * @brief Convert from a version with skipped evaluations to one without skipping (with zeroes in previously skipped
+     * locations)
+     *
+     * @return Univariate<Fr, domain_end, domain_start>
+     */
     Univariate<Fr, domain_end, domain_start> convert() const noexcept
     {
         Univariate<Fr, domain_end, domain_start, 0> result;
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp b/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp
index 7bfed96bef7..e98ec0b5ba5 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp
@@ -53,7 +53,7 @@ TEST(Protogalaxy, CombinerOn2Instances)
             ProverInstances instances{ instance_data };
             instances.alphas.fill(bb::Univariate<FF, 12>(FF(0))); // focus on the arithmetic relation only
             auto pow_polynomial = PowPolynomial(std::vector<FF>{ 2 });
-            auto result = prover.compute_combiner<true>(instances, pow_polynomial);
+            auto result = prover.compute_combiner</*disable_optimisation=*/true>(instances, pow_polynomial);
             auto expected_result = Univariate<FF, 12>(std::array<FF, 12>{
                 87706,
                 13644570,
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
index 06dcd3cfe84..6879c53a879 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
@@ -49,6 +49,8 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
     // The length of ExtendedUnivariate is the largest length (==max_relation_degree + 1) of a univariate polynomial
     // obtained by composing a relation with folded instance + relation parameters .
     using ExtendedUnivariate = Univariate<FF, (Flavor::MAX_TOTAL_RELATION_LENGTH - 1) * (ProverInstances::NUM - 1) + 1>;
+    // Same as ExtendedUnivariate, but uses optimised univariates which skip redundant computation in optimistic cases
+    // (when we know that the evaluation of all relations is 0 on a particular index, for example)
     using OptimisedExtendedUnivariate =
         Univariate<FF,
                    (Flavor::MAX_TOTAL_RELATION_LENGTH - 1) * (ProverInstances::NUM - 1) + 1,
@@ -280,6 +282,7 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
         return Polynomial<FF>(coeffs);
     }
 
+    OptimisedTupleOfTuplesOfUnivariates optimised_univariate_accumulators;
     TupleOfTuplesOfUnivariates univariate_accumulators;
 
     /**
@@ -289,9 +292,12 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
      * polynomial, extract the value at row_idx. Use these values to create a univariate polynomial, and then extend
      * (i.e., compute additional evaluations at adjacent domain values) as needed.
      * @todo TODO(https://github.com/AztecProtocol/barretenberg/issues/751) Optimize memory
+     *
+     *
      */
-    template <size_t skip_count = 0>
-    void extend_univariates(OptimisedExtendedUnivariates& extended_univariates,
+
+    template <typename ExtendedUnivariatesType, size_t skip_count = 0>
+    void extend_univariates(ExtendedUnivariatesType& extended_univariates,
                             const ProverInstances& instances,
                             const size_t row_idx)
     {
@@ -301,19 +307,16 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
         }
     }
 
-    void extend_univariates(ExtendedUnivariates& extended_univariates,
-                            const ProverInstances& instances,
-                            const size_t row_idx)
-    {
-        auto base_univariates = instances.template row_to_univariates(row_idx);
-        for (auto [extended_univariate, base_univariate] : zip_view(extended_univariates.get_all(), base_univariates)) {
-            extended_univariate = base_univariate.template extend_to<ExtendedUnivariate::LENGTH>();
-        }
-    }
-
-    template <typename Parameters, size_t relation_idx = 0>
-    void accumulate_relation_univariates(OptimisedTupleOfTuplesOfUnivariates& univariate_accumulators,
-                                         const OptimisedExtendedUnivariates& extended_univariates,
+    template <typename UnivariateAccumulatorsType,
+              typename ExtendedUnivariatesType,
+              typename Parameters,
+              size_t relation_idx = 0>
+        requires(std::is_same_v<UnivariateAccumulatorsType, OptimisedTupleOfTuplesOfUnivariates> &&
+                 std::is_same_v<ExtendedUnivariatesType, OptimisedExtendedUnivariates>) ||
+                (std::is_same_v<UnivariateAccumulatorsType, TupleOfTuplesOfUnivariates> &&
+                 std::is_same_v<ExtendedUnivariatesType, ExtendedUnivariates>)
+    void accumulate_relation_univariates(UnivariateAccumulatorsType& univariate_accumulators,
+                                         const ExtendedUnivariatesType& extended_univariates,
                                          const Parameters& relation_parameters,
                                          const FF& scaling_factor)
     {
@@ -323,24 +326,10 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
 
         // Repeat for the next relation.
         if constexpr (relation_idx + 1 < Flavor::NUM_RELATIONS) {
-            accumulate_relation_univariates<Parameters, relation_idx + 1>(
-                univariate_accumulators, extended_univariates, relation_parameters, scaling_factor);
-        }
-    }
-
-    template <typename Parameters, size_t relation_idx = 0>
-    void accumulate_relation_univariates(TupleOfTuplesOfUnivariates& univariate_accumulators,
-                                         const ExtendedUnivariates& extended_univariates,
-                                         const Parameters& relation_parameters,
-                                         const FF& scaling_factor)
-    {
-        using Relation = std::tuple_element_t<relation_idx, Relations>;
-        Relation::accumulate(
-            std::get<relation_idx>(univariate_accumulators), extended_univariates, relation_parameters, scaling_factor);
-
-        // Repeat for the next relation.
-        if constexpr (relation_idx + 1 < Flavor::NUM_RELATIONS) {
-            accumulate_relation_univariates<Parameters, relation_idx + 1>(
+            accumulate_relation_univariates<UnivariateAccumulatorsType,
+                                            ExtendedUnivariatesType,
+                                            Parameters,
+                                            relation_idx + 1>(
                 univariate_accumulators, extended_univariates, relation_parameters, scaling_factor);
         }
     }
@@ -367,12 +356,14 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
         num_threads = num_threads > 0 ? num_threads : 1;                     // ensure num threads is >= 1
         size_t iterations_per_thread = common_instance_size / num_threads;   // actual iterations per thread
 
-        // Construct univariate accumulator containers; one per thread
+        // Univariates are optimised for usual PG, but we need the unoptimised version for tests (it's a version that
+        // doesn't skip computation), so we need to define types depending on the template instantiation
         using ThreadAccumulators =
             std::conditional_t<disable_optimisation, TupleOfTuplesOfUnivariates, OptimisedTupleOfTuplesOfUnivariates>;
         using ExtendedUnivatiatesType =
             std::conditional_t<disable_optimisation, ExtendedUnivariates, OptimisedExtendedUnivariates>;
 
+        // Construct univariate accumulator containers; one per thread
         std::vector<ThreadAccumulators> thread_univariate_accumulators(num_threads);
         for (auto& accum : thread_univariate_accumulators) {
             // just normal relation lengths
@@ -393,7 +384,10 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
                 if constexpr (disable_optimisation) {
                     extend_univariates(extended_univariates[thread_idx], instances, idx);
                 } else {
-                    extend_univariates<ProverInstances::NUM - 1>(extended_univariates[thread_idx], instances, idx);
+                    // Instantiate univariates with skipping to ignore computation in those indices (they are still
+                    // available for skipping relations, but all derived univariate will ignore those evaluations)
+                    extend_univariates<ExtendedUnivatiatesType, /*skip_count=*/ProverInstances::NUM - 1>(
+                        extended_univariates[thread_idx], instances, idx);
                 }
 
                 FF pow_challenge = pow_betas[idx];
@@ -418,26 +412,35 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
         });
         if constexpr (disable_optimisation) {
             Utils::zero_univariates(univariate_accumulators);
+            // Accumulate the per-thread univariate accumulators into a single set of accumulators
             for (auto& accumulators : thread_univariate_accumulators) {
                 Utils::add_nested_tuples(univariate_accumulators, accumulators);
             }
 
             return batch_over_relations(univariate_accumulators, instances.alphas);
         } else {
-            OptimisedTupleOfTuplesOfUnivariates optimised_univariate_accumulators;
             Utils::zero_univariates(optimised_univariate_accumulators);
             // Accumulate the per-thread univariate accumulators into a single set of accumulators
             for (auto& accumulators : thread_univariate_accumulators) {
                 Utils::add_nested_tuples(optimised_univariate_accumulators, accumulators);
             }
 
-            zero_skipped_indices(optimised_univariate_accumulators);
-            // print_comparison(optimised_univariate_accumulators, univariate_accumulators);
+            // Convert from optimised version to non-optimised
             deoptimise_univariates(optimised_univariate_accumulators, univariate_accumulators);
             //  Batch the univariate contributions from each sub-relation to obtain the round univariate
             return batch_over_relations(univariate_accumulators, instances.alphas);
         }
     }
+
+    /**
+     * @brief Convert univariates from optimised form to regular
+     *
+     * @details We need to convert before we batch relations, since optimised versions don't have enough information to
+     * extend the univariates to maximum length
+     *
+     * @param optimised_univariate_accumulators
+     * @param new_univariate_accumulators
+     */
     static void deoptimise_univariates(const OptimisedTupleOfTuplesOfUnivariates& optimised_univariate_accumulators,
                                        TupleOfTuplesOfUnivariates& new_univariate_accumulators
 
@@ -465,18 +468,6 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
 
         Utils::template apply_to_tuple_of_tuples<0, 0>(optimised_univariate_accumulators, deoptimise);
     }
-    static void print_comparison(OptimisedTupleOfTuplesOfUnivariates& optimised_univariate_accumulators,
-                                 TupleOfTuplesOfUnivariates& new_univariate_accumulators)
-
-    {
-        auto deoptimise = [&]<size_t outer_idx, size_t inner_idx>(auto& element) {
-            auto& optimised_element = std::get<inner_idx>(std::get<outer_idx>(optimised_univariate_accumulators));
-            info("Element ", outer_idx, ".", inner_idx, "[", ":", "] = ", element);
-            info("Optimisation ", outer_idx, ".", inner_idx, "[", ":", "] = ", optimised_element);
-        };
-
-        Utils::template apply_to_tuple_of_tuples<0, 0>(new_univariate_accumulators, deoptimise);
-    }
 
     static ExtendedUnivariateWithRandomization batch_over_relations(TupleOfTuplesOfUnivariates& univariate_accumulators,
                                                                     const CombinedRelationSeparator& alpha)
diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/goblin_ultra_flavor.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/goblin_ultra_flavor.hpp
index aac9f279f1f..e637397dd94 100644
--- a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/goblin_ultra_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/goblin_ultra_flavor.hpp
@@ -455,7 +455,8 @@ class GoblinUltraFlavor {
     template <size_t LENGTH> using ProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH>>;
 
     /**
-     * @brief A container for univariates used during Protogalaxy folding and sumcheck.
+     * @brief A container for univariates used during Protogalaxy folding and sumcheck with some of the computation
+     * optmistically ignored.
      * @details During folding and sumcheck, the prover evaluates the relations on these univariates.
      */
     template <size_t LENGTH, size_t SKIP_COUNT>
diff --git a/barretenberg/cpp/src/barretenberg/sumcheck/instance/instances.hpp b/barretenberg/cpp/src/barretenberg/sumcheck/instance/instances.hpp
index a09c9aca88f..bcd7da7addf 100644
--- a/barretenberg/cpp/src/barretenberg/sumcheck/instance/instances.hpp
+++ b/barretenberg/cpp/src/barretenberg/sumcheck/instance/instances.hpp
@@ -54,6 +54,7 @@ template <typename Flavor_, size_t NUM_ = 2> struct ProverInstances_ {
      * and the function returns the univariates [{a_1, b_1, c_1, d_1}, {a_2, b_2, c_2, d_2}, ...]
      *
      * @param row_idx A fixed row position in several execution traces
+     * @tparam skip_count Construct univariates that skip some of the indices when computing results
      * @return The univariates whose extensions will be used to construct the combiner.
      */
     template <size_t skip_count = 0> auto row_to_univariates(size_t row_idx) const
diff --git a/barretenberg/cpp/src/barretenberg/translator_vm/goblin_translator_flavor.hpp b/barretenberg/cpp/src/barretenberg/translator_vm/goblin_translator_flavor.hpp
index 50d9e2fe5e2..13afacd9c0f 100644
--- a/barretenberg/cpp/src/barretenberg/translator_vm/goblin_translator_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/translator_vm/goblin_translator_flavor.hpp
@@ -880,7 +880,8 @@ class GoblinTranslatorFlavor {
      */
     template <size_t LENGTH> using ProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH>>;
     /**
-     * @brief A container for univariates used during Protogalaxy folding and sumcheck.
+     * @brief A container for univariates used during Protogalaxy folding and sumcheck with some of the computation
+     * optmistically ignored
      * @details During folding and sumcheck, the prover evaluates the relations on these univariates.
      */
     template <size_t LENGTH, size_t SKIP_COUNT>
diff --git a/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp b/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp
index ea522018e0c..70fd11699be 100644
--- a/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp
@@ -1708,7 +1708,8 @@ class AvmFlavor {
      */
     template <size_t LENGTH> using ProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH>>;
     /**
-     * @brief A container for univariates used during Protogalaxy folding and sumcheck.
+     * @brief A container for univariates used during Protogalaxy folding and sumcheck with some of the computation
+     * optmistically ignored
      * @details During folding and sumcheck, the prover evaluates the relations on these univariates.
      */
     template <size_t LENGTH, size_t SKIP_COUNT>

From 1ae27c07529676aa99241e3d2e5646c726ead912 Mon Sep 17 00:00:00 2001
From: Rumata888 <isennovskiy@gmail.com>
Date: Fri, 26 Apr 2024 16:47:59 +0000
Subject: [PATCH 08/13] Address Mara's comments

---
 .../cpp/src/barretenberg/flavor/flavor.hpp    |  29 ++--
 .../protogalaxy/combiner.test.cpp             |  42 +++---
 .../protogalaxy/protogalaxy_prover.hpp        | 129 ++++++++++--------
 .../goblin_ultra_flavor.hpp                   |   4 +-
 .../stdlib_circuit_builders/ultra_flavor.hpp  |   4 +-
 .../barretenberg/vm/generated/avm_flavor.hpp  |   4 +-
 6 files changed, 114 insertions(+), 98 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp b/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp
index 09f7d61cf36..5cd93102eb6 100644
--- a/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp
@@ -226,36 +226,23 @@ template <typename Tuple, std::size_t Index = 0> static constexpr size_t compute
  * @details The size of the outer tuple is equal to the number of relations. Each relation contributes an inner tuple of
  * univariates whose size is equal to the number of subrelations of the relation. The length of a univariate in an inner
  * tuple is determined by the corresponding subrelation length and the number of instances to be folded.
+ * @tparam optimised Enable optimised version with skipping some of the computation
  */
-template <typename Tuple, size_t NUM_INSTANCES, size_t Index = 0>
+template <typename Tuple, size_t NUM_INSTANCES, bool optimised = false, size_t Index = 0>
 static constexpr auto create_protogalaxy_tuple_of_tuples_of_univariates()
 {
     if constexpr (Index >= std::tuple_size<Tuple>::value) {
         return std::tuple<>{}; // Return empty when reach end of the tuple
     } else {
         using UnivariateTuple =
-            typename std::tuple_element_t<Index,
-                                          Tuple>::template ProtogalaxyTupleOfUnivariatesOverSubrelations<NUM_INSTANCES>;
-        return std::tuple_cat(std::tuple<UnivariateTuple>{},
-                              create_protogalaxy_tuple_of_tuples_of_univariates<Tuple, NUM_INSTANCES, Index + 1>());
-    }
-}
-
-/**
- * @brief Recursive utility function to construct a container for the subrelation accumulators of Protogalaxy folding.
- * @details Differs from the non-optimised method by using optimised univariates that skip redundant computation
- */
-template <typename Tuple, size_t NUM_INSTANCES, size_t Index = 0>
-static constexpr auto create_optimised_protogalaxy_tuple_of_tuples_of_univariates()
-{
-    if constexpr (Index >= std::tuple_size<Tuple>::value) {
-        return std::tuple<>{}; // Return empty when reach end of the tuple
-    } else {
-        using UnivariateTuple = typename std::tuple_element_t<Index, Tuple>::
-            template OptimisedProtogalaxyTupleOfUnivariatesOverSubrelations<NUM_INSTANCES>;
+            std::conditional_t<optimised,
+                               typename std::tuple_element_t<Index, Tuple>::
+                                   template OptimisedProtogalaxyTupleOfUnivariatesOverSubrelations<NUM_INSTANCES>,
+                               typename std::tuple_element_t<Index, Tuple>::
+                                   template ProtogalaxyTupleOfUnivariatesOverSubrelations<NUM_INSTANCES>>;
         return std::tuple_cat(
             std::tuple<UnivariateTuple>{},
-            create_optimised_protogalaxy_tuple_of_tuples_of_univariates<Tuple, NUM_INSTANCES, Index + 1>());
+            create_protogalaxy_tuple_of_tuples_of_univariates<Tuple, NUM_INSTANCES, optimised, Index + 1>());
     }
 }
 
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp b/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp
index e98ec0b5ba5..08a8a5cd752 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp
@@ -44,6 +44,10 @@ TEST(Protogalaxy, CombinerOn2Instances)
                 auto prover_polynomials = get_sequential_prover_polynomials<Flavor>(
                     /*log_circuit_size=*/1, idx * 128);
                 restrict_to_standard_arithmetic_relation(prover_polynomials);
+                // This ensures that the combiner accumulator for second instance = 0
+                if (idx == NUM_INSTANCES - 1) {
+                    prover_polynomials.q_c[0] -= 13644570;
+                }
                 instance->prover_polynomials = std::move(prover_polynomials);
                 instance->proving_key = Flavor::ProvingKey();
                 instance->proving_key.circuit_size = 2;
@@ -53,22 +57,22 @@ TEST(Protogalaxy, CombinerOn2Instances)
             ProverInstances instances{ instance_data };
             instances.alphas.fill(bb::Univariate<FF, 12>(FF(0))); // focus on the arithmetic relation only
             auto pow_polynomial = PowPolynomial(std::vector<FF>{ 2 });
-            auto result = prover.compute_combiner</*disable_optimisation=*/true>(instances, pow_polynomial);
-            auto expected_result = Univariate<FF, 12>(std::array<FF, 12>{
-                87706,
-                13644570,
-                76451738,
-                226257946,
-                static_cast<uint64_t>(500811930),
-                static_cast<uint64_t>(937862426),
-                static_cast<uint64_t>(1575158170),
-                static_cast<uint64_t>(2450447898),
-                static_cast<uint64_t>(3601480346),
-                static_cast<uint64_t>(5066004250),
-                static_cast<uint64_t>(6881768346),
-                static_cast<uint64_t>(9086521370),
-            });
+            auto result = prover.compute_combiner</*OptimisationEnabled=*/false>(instances, pow_polynomial);
+            auto optimised_result = prover.compute_combiner(instances, pow_polynomial);
+            auto expected_result = Univariate<FF, 12>(std::array<FF, 12>{ 87706,
+                                                                          0,
+                                                                          0x02ee2966,
+                                                                          0x0b0bd2cc,
+                                                                          0x00001a98fc32,
+                                                                          0x000033d5a598,
+                                                                          0x00005901cefe,
+                                                                          0x00008c5d7864,
+                                                                          0x0000d028a1ca,
+                                                                          0x000126a34b30UL,
+                                                                          0x0001920d7496UL,
+                                                                          0x000214a71dfcUL });
             EXPECT_EQ(result, expected_result);
+            EXPECT_EQ(optimised_result, expected_result);
         } else {
             std::vector<std::shared_ptr<ProverInstance>> instance_data(NUM_INSTANCES);
             ProtoGalaxyProver prover;
@@ -132,11 +136,13 @@ TEST(Protogalaxy, CombinerOn2Instances)
                       0    0    0    0    0    0    0              0    0    6   18   36   60   90      */
 
             auto pow_polynomial = PowPolynomial(std::vector<FF>{ 2 });
-            auto result = prover.compute_combiner(instances, pow_polynomial);
+            auto result = prover.compute_combiner</*OptimisationEnabled=*/false>(instances, pow_polynomial);
+            auto optimised_result = prover.compute_combiner(instances, pow_polynomial);
             auto expected_result =
                 Univariate<FF, 12>(std::array<FF, 12>{ 0, 0, 12, 36, 72, 120, 180, 252, 336, 432, 540, 660 });
 
             EXPECT_EQ(result, expected_result);
+            EXPECT_EQ(optimised_result, expected_result);
         }
     };
     run_test(true);
@@ -184,11 +190,13 @@ TEST(Protogalaxy, CombinerOn4Instances)
         zero_all_selectors(instances[3]->prover_polynomials);
 
         auto pow_polynomial = PowPolynomial(std::vector<FF>{ 2 });
-        auto result = prover.compute_combiner(instances, pow_polynomial);
+        auto result = prover.compute_combiner</*OptimisationEnabled=*/false>(instances, pow_polynomial);
+        auto optimised_result = prover.compute_combiner(instances, pow_polynomial);
         std::array<FF, 40> zeroes;
         std::fill(zeroes.begin(), zeroes.end(), 0);
         auto expected_result = Univariate<FF, 40>(zeroes);
         EXPECT_EQ(result, expected_result);
+        EXPECT_EQ(optimised_result, expected_result);
     };
     run_test();
 };
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
index 6879c53a879..e478278eb6c 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
@@ -233,7 +233,7 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
                     }
                 }
             },
-            8);
+            /*no_multhreading_if_less_or_equal=*/8);
         return construct_coefficients_tree(betas, deltas, level_coeffs, level + 1);
     }
 
@@ -254,6 +254,8 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
         auto width = full_honk_evaluations.size();
         std::vector<std::vector<FF>> first_level_coeffs(width >> 1, std::vector<FF>(2, 0));
         run_loop_in_parallel(width >> 1, [&](size_t start, size_t end) {
+            // Run loop in parallel can divide the domain in such way that the indices are odd, which we can't tolerate
+            // here, so first we divide the width by two, enable parallelism and then reconstruct even start and end
             for (size_t node = start << 1; node < end << 1; node += 2) {
                 auto parent = node >> 1;
                 first_level_coeffs[parent][0] =
@@ -288,18 +290,19 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
     /**
      * @brief Prepare a univariate polynomial for relation execution in one step of the main loop in folded instance
      * construction.
-     * @details For a fixed prover polynomial index, extract that polynomial from each instance in Instances. From each
-     * polynomial, extract the value at row_idx. Use these values to create a univariate polynomial, and then extend
-     * (i.e., compute additional evaluations at adjacent domain values) as needed.
+     * @details For a fixed prover polynomial index, extract that polynomial from each instance in Instances. From
+     *each polynomial, extract the value at row_idx. Use these values to create a univariate polynomial, and then
+     *extend (i.e., compute additional evaluations at adjacent domain values) as needed.
      * @todo TODO(https://github.com/AztecProtocol/barretenberg/issues/751) Optimize memory
      *
      *
      */
 
-    template <typename ExtendedUnivariatesType, size_t skip_count = 0>
-    void extend_univariates(ExtendedUnivariatesType& extended_univariates,
-                            const ProverInstances& instances,
-                            const size_t row_idx)
+    template <size_t skip_count = 0>
+    void extend_univariates(
+        std::conditional_t<skip_count != 0, OptimisedExtendedUnivariates, ExtendedUnivariates>& extended_univariates,
+        const ProverInstances& instances,
+        const size_t row_idx)
     {
         auto base_univariates = instances.template row_to_univariates<skip_count>(row_idx);
         for (auto [extended_univariate, base_univariate] : zip_view(extended_univariates.get_all(), base_univariates)) {
@@ -307,16 +310,15 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
         }
     }
 
-    template <typename UnivariateAccumulatorsType,
-              typename ExtendedUnivariatesType,
-              typename Parameters,
-              size_t relation_idx = 0>
-        requires(std::is_same_v<UnivariateAccumulatorsType, OptimisedTupleOfTuplesOfUnivariates> &&
-                 std::is_same_v<ExtendedUnivariatesType, OptimisedExtendedUnivariates>) ||
-                (std::is_same_v<UnivariateAccumulatorsType, TupleOfTuplesOfUnivariates> &&
-                 std::is_same_v<ExtendedUnivariatesType, ExtendedUnivariates>)
-    void accumulate_relation_univariates(UnivariateAccumulatorsType& univariate_accumulators,
-                                         const ExtendedUnivariatesType& extended_univariates,
+    /**
+     * @brief Add the value of each relation over univariates to an appropriate accumulator
+     *
+     * @tparam Parameters relation parameters type
+     * @tparam relation_idx The index of the relation
+     */
+    template <typename Parameters, size_t relation_idx = 0>
+    void accumulate_relation_univariates(TupleOfTuplesOfUnivariates& univariate_accumulators,
+                                         const ExtendedUnivariates& extended_univariates,
                                          const Parameters& relation_parameters,
                                          const FF& scaling_factor)
     {
@@ -326,11 +328,36 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
 
         // Repeat for the next relation.
         if constexpr (relation_idx + 1 < Flavor::NUM_RELATIONS) {
-            accumulate_relation_univariates<UnivariateAccumulatorsType,
-                                            ExtendedUnivariatesType,
-                                            Parameters,
-                                            relation_idx + 1>(
-                univariate_accumulators, extended_univariates, relation_parameters, scaling_factor);
+            accumulate_relation_univariates<
+
+                Parameters,
+                relation_idx + 1>(univariate_accumulators, extended_univariates, relation_parameters, scaling_factor);
+        }
+    }
+
+    /**
+     * @brief Add the value of each relation over univariates to an appropriate accumulator with index skipping
+     * optimisation
+     *
+     * @tparam Parameters relation parameters type
+     * @tparam relation_idx The index of the relation
+     */
+    template <typename Parameters, size_t relation_idx = 0>
+    void accumulate_relation_univariates(OptimisedTupleOfTuplesOfUnivariates& univariate_accumulators,
+                                         const OptimisedExtendedUnivariates& extended_univariates,
+                                         const Parameters& relation_parameters,
+                                         const FF& scaling_factor)
+    {
+        using Relation = std::tuple_element_t<relation_idx, Relations>;
+        Relation::accumulate(
+            std::get<relation_idx>(univariate_accumulators), extended_univariates, relation_parameters, scaling_factor);
+
+        // Repeat for the next relation.
+        if constexpr (relation_idx + 1 < Flavor::NUM_RELATIONS) {
+            accumulate_relation_univariates<
+
+                Parameters,
+                relation_idx + 1>(univariate_accumulators, extended_univariates, relation_parameters, scaling_factor);
         }
     }
 
@@ -338,7 +365,7 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
      * @brief Compute the combiner polynomial $G$ in the Protogalaxy paper.
      *
      */
-    template <bool disable_optimisation = false>
+    template <bool OptimisationEnabled = true>
     ExtendedUnivariateWithRandomization compute_combiner(const ProverInstances& instances, PowPolynomial<FF>& pow_betas)
     {
         BB_OP_COUNT_TIME();
@@ -359,9 +386,9 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
         // Univariates are optimised for usual PG, but we need the unoptimised version for tests (it's a version that
         // doesn't skip computation), so we need to define types depending on the template instantiation
         using ThreadAccumulators =
-            std::conditional_t<disable_optimisation, TupleOfTuplesOfUnivariates, OptimisedTupleOfTuplesOfUnivariates>;
+            std::conditional_t<OptimisationEnabled, OptimisedTupleOfTuplesOfUnivariates, TupleOfTuplesOfUnivariates>;
         using ExtendedUnivatiatesType =
-            std::conditional_t<disable_optimisation, ExtendedUnivariates, OptimisedExtendedUnivariates>;
+            std::conditional_t<OptimisationEnabled, OptimisedExtendedUnivariates, ExtendedUnivariates>;
 
         // Construct univariate accumulator containers; one per thread
         std::vector<ThreadAccumulators> thread_univariate_accumulators(num_threads);
@@ -381,13 +408,15 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
 
             for (size_t idx = start; idx < end; idx++) {
                 // No need to initialise extended_univariates to 0, it's assigned to
-                if constexpr (disable_optimisation) {
-                    extend_univariates(extended_univariates[thread_idx], instances, idx);
-                } else {
+                if constexpr (OptimisationEnabled) {
                     // Instantiate univariates with skipping to ignore computation in those indices (they are still
                     // available for skipping relations, but all derived univariate will ignore those evaluations)
-                    extend_univariates<ExtendedUnivatiatesType, /*skip_count=*/ProverInstances::NUM - 1>(
+                    extend_univariates</*skip_count=*/ProverInstances::NUM - 1>(
                         extended_univariates[thread_idx], instances, idx);
+
+                } else {
+
+                    extend_univariates(extended_univariates[thread_idx], instances, idx);
                 }
 
                 FF pow_challenge = pow_betas[idx];
@@ -395,39 +424,41 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
                 // Accumulate the i-th row's univariate contribution. Note that the relation parameters passed to
                 // this function have already been folded. Moreover, linear-dependent relations that act over the
                 // entire execution trace rather than on rows, will not be multiplied by the pow challenge.
-                if constexpr (disable_optimisation) {
+                if constexpr (OptimisationEnabled) {
                     accumulate_relation_univariates(
                         thread_univariate_accumulators[thread_idx],
                         extended_univariates[thread_idx],
-                        instances.relation_parameters, // these parameters have already been folded
+                        instances.optimised_relation_parameters, // these parameters have already been folded
                         pow_challenge);
                 } else {
+
                     accumulate_relation_univariates(
                         thread_univariate_accumulators[thread_idx],
                         extended_univariates[thread_idx],
-                        instances.optimised_relation_parameters, // these parameters have already been folded
+                        instances.relation_parameters, // these parameters have already been folded
                         pow_challenge);
                 }
             }
         });
-        if constexpr (disable_optimisation) {
-            Utils::zero_univariates(univariate_accumulators);
+        if constexpr (OptimisationEnabled) {
+            Utils::zero_univariates(optimised_univariate_accumulators);
             // Accumulate the per-thread univariate accumulators into a single set of accumulators
             for (auto& accumulators : thread_univariate_accumulators) {
-                Utils::add_nested_tuples(univariate_accumulators, accumulators);
+                Utils::add_nested_tuples(optimised_univariate_accumulators, accumulators);
             }
 
+            // Convert from optimised version to non-optimised
+            deoptimise_univariates(optimised_univariate_accumulators, univariate_accumulators);
+            //  Batch the univariate contributions from each sub-relation to obtain the round univariate
             return batch_over_relations(univariate_accumulators, instances.alphas);
+
         } else {
-            Utils::zero_univariates(optimised_univariate_accumulators);
+            Utils::zero_univariates(univariate_accumulators);
             // Accumulate the per-thread univariate accumulators into a single set of accumulators
             for (auto& accumulators : thread_univariate_accumulators) {
-                Utils::add_nested_tuples(optimised_univariate_accumulators, accumulators);
+                Utils::add_nested_tuples(univariate_accumulators, accumulators);
             }
 
-            // Convert from optimised version to non-optimised
-            deoptimise_univariates(optimised_univariate_accumulators, univariate_accumulators);
-            //  Batch the univariate contributions from each sub-relation to obtain the round univariate
             return batch_over_relations(univariate_accumulators, instances.alphas);
         }
     }
@@ -449,25 +480,10 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
         auto deoptimise = [&]<size_t outer_idx, size_t inner_idx>(auto& element) {
             auto& optimised_element = std::get<inner_idx>(std::get<outer_idx>(optimised_univariate_accumulators));
             element = optimised_element.convert();
-            // info("Element ", outer_idx, ".", inner_idx, "[", ":", "] = ", element);
         };
 
         Utils::template apply_to_tuple_of_tuples<0, 0>(new_univariate_accumulators, deoptimise);
     }
-    static void zero_skipped_indices(OptimisedTupleOfTuplesOfUnivariates& optimised_univariate_accumulators
-
-    )
-    {
-        auto deoptimise = [&]<size_t outer_idx, size_t inner_idx>(auto& element) {
-            // auto& optimised_element = std::get<inner_idx>(std::get<outer_idx>(optimised_univariate_accumulators));
-            for (size_t i = 1; i < ProverInstances::NUM; i++) {
-                element.evaluations[i] = FF(0);
-            }
-            // info("Element ", outer_idx, ".", inner_idx, "[", ":", "] = ", element);
-        };
-
-        Utils::template apply_to_tuple_of_tuples<0, 0>(optimised_univariate_accumulators, deoptimise);
-    }
 
     static ExtendedUnivariateWithRandomization batch_over_relations(TupleOfTuplesOfUnivariates& univariate_accumulators,
                                                                     const CombinedRelationSeparator& alpha)
@@ -478,7 +494,6 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
         size_t idx = 0;
         auto scale_and_sum = [&]<size_t outer_idx, size_t inner_idx>(auto& element) {
             auto extended = element.template extend_to<ProverInstances::BATCHED_EXTENDED_LENGTH>();
-            // info("Relation ", outer_idx, ".", inner_idx, "[", 0, "] = ", extended.value_at(0));
             extended *= alpha[idx];
             result += extended;
             idx++;
diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/goblin_ultra_flavor.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/goblin_ultra_flavor.hpp
index e637397dd94..112f8ac41d2 100644
--- a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/goblin_ultra_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/goblin_ultra_flavor.hpp
@@ -90,7 +90,9 @@ class GoblinUltraFlavor {
 
     template <size_t NUM_INSTANCES>
     using OptimisedProtogalaxyTupleOfTuplesOfUnivariates =
-        decltype(create_optimised_protogalaxy_tuple_of_tuples_of_univariates<Relations, NUM_INSTANCES>());
+        decltype(create_protogalaxy_tuple_of_tuples_of_univariates<Relations,
+                                                                   NUM_INSTANCES,
+                                                                   /*optimised=*/true>());
     using SumcheckTupleOfTuplesOfUnivariates = decltype(create_sumcheck_tuple_of_tuples_of_univariates<Relations>());
     using TupleOfArraysOfValues = decltype(create_tuple_of_arrays_of_values<Relations>());
 
diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp
index 16886aacef0..330b6314964 100644
--- a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp
@@ -77,7 +77,9 @@ class UltraFlavor {
         decltype(create_protogalaxy_tuple_of_tuples_of_univariates<Relations, NUM_INSTANCES>());
     template <size_t NUM_INSTANCES>
     using OptimisedProtogalaxyTupleOfTuplesOfUnivariates =
-        decltype(create_optimised_protogalaxy_tuple_of_tuples_of_univariates<Relations, NUM_INSTANCES>());
+        decltype(create_protogalaxy_tuple_of_tuples_of_univariates<Relations,
+                                                                   NUM_INSTANCES,
+                                                                   /*optimised=*/true>());
     using SumcheckTupleOfTuplesOfUnivariates = decltype(create_sumcheck_tuple_of_tuples_of_univariates<Relations>());
     using TupleOfArraysOfValues = decltype(create_tuple_of_arrays_of_values<Relations>());
 
diff --git a/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp b/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp
index 70fd11699be..51f70990f18 100644
--- a/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp
@@ -162,7 +162,9 @@ class AvmFlavor {
         decltype(create_protogalaxy_tuple_of_tuples_of_univariates<Relations, NUM_INSTANCES>());
     template <size_t NUM_INSTANCES>
     using OptimisedProtogalaxyTupleOfTuplesOfUnivariates =
-        decltype(create_optimised_protogalaxy_tuple_of_tuples_of_univariates<Relations, NUM_INSTANCES>());
+        decltype(create_protogalaxy_tuple_of_tuples_of_univariates<Relations,
+                                                                   NUM_INSTANCES,
+                                                                   /*optimised=*/true>());
     using SumcheckTupleOfTuplesOfUnivariates = decltype(create_sumcheck_tuple_of_tuples_of_univariates<Relations>());
     using TupleOfArraysOfValues = decltype(create_tuple_of_arrays_of_values<Relations>());
 

From 182b8c855577c0b17c6c81326f1a5cb6dd7416c6 Mon Sep 17 00:00:00 2001
From: Rumata888 <isennovskiy@gmail.com>
Date: Mon, 29 Apr 2024 12:44:01 +0000
Subject: [PATCH 09/13] More addressing Mara's comments

---
 .../cpp/src/barretenberg/protogalaxy/protogalaxy.test.cpp  | 5 +++++
 .../translator_vm/goblin_translator_flavor.hpp             | 7 -------
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy.test.cpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy.test.cpp
index bf72f4ca5eb..410e1aa8ff6 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy.test.cpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy.test.cpp
@@ -288,6 +288,11 @@ template <typename Flavor> class ProtoGalaxyTests : public testing::Test {
 
         bb::Univariate<FF, 11> expected_eta{ { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21 } };
         EXPECT_EQ(instances.relation_parameters.eta, expected_eta);
+        // Optimised relation parameters are the same, we just don't compute any values for non-used indices when
+        // deriving values from them
+        for (size_t i = 0; i < 11; i++) {
+            EXPECT_EQ(instances.optimised_relation_parameters.eta.evaluations[i], expected_eta.evaluations[i]);
+        }
     }
 
     /**
diff --git a/barretenberg/cpp/src/barretenberg/translator_vm/goblin_translator_flavor.hpp b/barretenberg/cpp/src/barretenberg/translator_vm/goblin_translator_flavor.hpp
index 13afacd9c0f..46eac85ce3a 100644
--- a/barretenberg/cpp/src/barretenberg/translator_vm/goblin_translator_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/translator_vm/goblin_translator_flavor.hpp
@@ -879,13 +879,6 @@ class GoblinTranslatorFlavor {
      * @brief A container for univariates used during sumcheck.
      */
     template <size_t LENGTH> using ProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH>>;
-    /**
-     * @brief A container for univariates used during Protogalaxy folding and sumcheck with some of the computation
-     * optmistically ignored
-     * @details During folding and sumcheck, the prover evaluates the relations on these univariates.
-     */
-    template <size_t LENGTH, size_t SKIP_COUNT>
-    using OptimisedProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH, 0, SKIP_COUNT>>;
 
     /**
      * @brief A container for univariates produced during the hot loop in sumcheck.

From 30cb2fb0fb58f81d99da0226eff42a0f93915305 Mon Sep 17 00:00:00 2001
From: Rumata888 <isennovskiy@gmail.com>
Date: Tue, 30 Apr 2024 09:46:45 +0000
Subject: [PATCH 10/13] address mara's comments

---
 .../protogalaxy/combiner.test.cpp             |   2 +
 .../protogalaxy/protogalaxy_prover.hpp        | 142 +++++++++++-------
 .../barretenberg/vm/generated/avm_flavor.hpp  |   5 -
 3 files changed, 93 insertions(+), 56 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp b/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp
index 8d1dbb4ea05..4ff7f81cb51 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/combiner.test.cpp
@@ -45,6 +45,8 @@ TEST(Protogalaxy, CombinerOn2Instances)
                     /*log_circuit_size=*/1, idx * 128);
                 restrict_to_standard_arithmetic_relation(prover_polynomials);
                 // This ensures that the combiner accumulator for second instance = 0
+                // The value is computed by generating the python script values, computing the resulting accumulator and
+                // taking the value at index 1
                 if (idx == NUM_INSTANCES - 1) {
                     prover_polynomials.q_c[0] -= 13644570;
                 }
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
index fcaf3fea5e0..c03af2e5333 100644
--- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
+++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
@@ -360,20 +360,18 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
                 relation_idx + 1>(univariate_accumulators, extended_univariates, relation_parameters, scaling_factor);
         }
     }
-
     /**
-     * @brief Compute the combiner polynomial $G$ in the Protogalaxy paper.
+     * @brief Compute the combiner polynomial $G$ in the Protogalaxy paper
      *
      */
-    template <bool OptimisationEnabled = true>
+    template <bool OptimisationEnabled, std::enable_if_t<!OptimisationEnabled, bool> = true>
     ExtendedUnivariateWithRandomization compute_combiner(const ProverInstances& instances, PowPolynomial<FF>& pow_betas)
     {
-        BB_OP_COUNT_TIME();
         size_t common_instance_size = instances[0]->proving_key.circuit_size;
         pow_betas.compute_values();
         // Determine number of threads for multithreading.
-        // Note: Multithreading is "on" for every round but we reduce the number of threads from the max available
-        // based on a specified minimum number of iterations per thread. This eventually leads to the use of a
+        // Note: Multithreading is "on" for every round but we reduce the number of threads from the max available based
+        // on a specified minimum number of iterations per thread. This eventually leads to the use of a
         // single thread. For now we use a power of 2 number of threads simply to ensure the round size is evenly
         // divided.
         size_t max_num_threads = get_num_cpus_pow2(); // number of available threads (power of 2)
@@ -385,10 +383,8 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
 
         // Univariates are optimised for usual PG, but we need the unoptimised version for tests (it's a version that
         // doesn't skip computation), so we need to define types depending on the template instantiation
-        using ThreadAccumulators =
-            std::conditional_t<OptimisationEnabled, OptimisedTupleOfTuplesOfUnivariates, TupleOfTuplesOfUnivariates>;
-        using ExtendedUnivatiatesType =
-            std::conditional_t<OptimisationEnabled, OptimisedExtendedUnivariates, ExtendedUnivariates>;
+        using ThreadAccumulators = TupleOfTuplesOfUnivariates;
+        using ExtendedUnivatiatesType = ExtendedUnivariates;
 
         // Construct univariate accumulator containers; one per thread
         std::vector<ThreadAccumulators> thread_univariate_accumulators(num_threads);
@@ -407,60 +403,104 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
             size_t end = (thread_idx + 1) * iterations_per_thread;
 
             for (size_t idx = start; idx < end; idx++) {
-                // No need to initialise extended_univariates to 0, it's assigned to
-                if constexpr (OptimisationEnabled) {
-                    // Instantiate univariates with skipping to ignore computation in those indices (they are still
-                    // available for skipping relations, but all derived univariate will ignore those evaluations)
-                    extend_univariates</*skip_count=*/ProverInstances::NUM - 1>(
-                        extended_univariates[thread_idx], instances, idx);
-
-                } else {
 
-                    extend_univariates(extended_univariates[thread_idx], instances, idx);
-                }
+                extend_univariates(extended_univariates[thread_idx], instances, idx);
 
                 FF pow_challenge = pow_betas[idx];
 
                 // Accumulate the i-th row's univariate contribution. Note that the relation parameters passed to
                 // this function have already been folded. Moreover, linear-dependent relations that act over the
                 // entire execution trace rather than on rows, will not be multiplied by the pow challenge.
-                if constexpr (OptimisationEnabled) {
-                    accumulate_relation_univariates(
-                        thread_univariate_accumulators[thread_idx],
-                        extended_univariates[thread_idx],
-                        instances.optimised_relation_parameters, // these parameters have already been folded
-                        pow_challenge);
-                } else {
-
-                    accumulate_relation_univariates(
-                        thread_univariate_accumulators[thread_idx],
-                        extended_univariates[thread_idx],
-                        instances.relation_parameters, // these parameters have already been folded
-                        pow_challenge);
-                }
+
+                accumulate_relation_univariates(
+                    thread_univariate_accumulators[thread_idx],
+                    extended_univariates[thread_idx],
+                    instances.relation_parameters, // these parameters have already been folded
+                    pow_challenge);
             }
         });
-        if constexpr (OptimisationEnabled) {
-            Utils::zero_univariates(optimised_univariate_accumulators);
-            // Accumulate the per-thread univariate accumulators into a single set of accumulators
-            for (auto& accumulators : thread_univariate_accumulators) {
-                Utils::add_nested_tuples(optimised_univariate_accumulators, accumulators);
-            }
+        Utils::zero_univariates(univariate_accumulators);
+        // Accumulate the per-thread univariate accumulators into a single set of accumulators
+        for (auto& accumulators : thread_univariate_accumulators) {
+            Utils::add_nested_tuples(univariate_accumulators, accumulators);
+        }
 
-            // Convert from optimised version to non-optimised
-            deoptimise_univariates(optimised_univariate_accumulators, univariate_accumulators);
-            //  Batch the univariate contributions from each sub-relation to obtain the round univariate
-            return batch_over_relations(univariate_accumulators, instances.alphas);
+        return batch_over_relations(univariate_accumulators, instances.alphas);
+    }
+    /**
+     * @brief Compute the combiner polynomial $G$ in the Protogalaxy paper using indice skippping optimisation
+     *
+     * @todo (https://github.com/AztecProtocol/barretenberg/issues/968) Make combiner tests better
+     *
+     */
+    template <bool OptimisationEnabled = true, std::enable_if_t<OptimisationEnabled, bool> = true>
+    ExtendedUnivariateWithRandomization compute_combiner(const ProverInstances& instances, PowPolynomial<FF>& pow_betas)
+    {
+        BB_OP_COUNT_TIME();
+        size_t common_instance_size = instances[0]->proving_key.circuit_size;
+        pow_betas.compute_values();
+        // Determine number of threads for multithreading.
+        // Note: Multithreading is "on" for every round but we reduce the number of threads from the max available based
+        // on a specified minimum number of iterations per thread. This eventually leads to the use of a
+        // single thread. For now we use a power of 2 number of threads simply to ensure the round size is evenly
+        // divided.
+        size_t max_num_threads = get_num_cpus_pow2(); // number of available threads (power of 2)
+        size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread
+        size_t desired_num_threads = common_instance_size / min_iterations_per_thread;
+        size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified
+        num_threads = num_threads > 0 ? num_threads : 1;                     // ensure num threads is >= 1
+        size_t iterations_per_thread = common_instance_size / num_threads;   // actual iterations per thread
 
-        } else {
-            Utils::zero_univariates(univariate_accumulators);
-            // Accumulate the per-thread univariate accumulators into a single set of accumulators
-            for (auto& accumulators : thread_univariate_accumulators) {
-                Utils::add_nested_tuples(univariate_accumulators, accumulators);
-            }
+        // Univariates are optimised for usual PG, but we need the unoptimised version for tests (it's a version that
+        // doesn't skip computation), so we need to define types depending on the template instantiation
+        using ThreadAccumulators = OptimisedTupleOfTuplesOfUnivariates;
+        using ExtendedUnivatiatesType = OptimisedExtendedUnivariates;
 
-            return batch_over_relations(univariate_accumulators, instances.alphas);
+        // Construct univariate accumulator containers; one per thread
+        std::vector<ThreadAccumulators> thread_univariate_accumulators(num_threads);
+        for (auto& accum : thread_univariate_accumulators) {
+            // just normal relation lengths
+            Utils::zero_univariates(accum);
         }
+
+        // Construct extended univariates containers; one per thread
+        std::vector<ExtendedUnivatiatesType> extended_univariates;
+        extended_univariates.resize(num_threads);
+
+        // Accumulate the contribution from each sub-relation
+        parallel_for(num_threads, [&](size_t thread_idx) {
+            size_t start = thread_idx * iterations_per_thread;
+            size_t end = (thread_idx + 1) * iterations_per_thread;
+
+            for (size_t idx = start; idx < end; idx++) {
+                // No need to initialise extended_univariates to 0, it's assigned to
+                // Instantiate univariates with skipping to ignore computation in those indices (they are still
+                // available for skipping relations, but all derived univariate will ignore those evaluations)
+                extend_univariates</*skip_count=*/ProverInstances::NUM - 1>(
+                    extended_univariates[thread_idx], instances, idx);
+
+                FF pow_challenge = pow_betas[idx];
+
+                // Accumulate the i-th row's univariate contribution. Note that the relation parameters passed to
+                // this function have already been folded. Moreover, linear-dependent relations that act over the
+                // entire execution trace rather than on rows, will not be multiplied by the pow challenge.
+                accumulate_relation_univariates(
+                    thread_univariate_accumulators[thread_idx],
+                    extended_univariates[thread_idx],
+                    instances.optimised_relation_parameters, // these parameters have already been folded
+                    pow_challenge);
+            }
+        });
+        Utils::zero_univariates(optimised_univariate_accumulators);
+        // Accumulate the per-thread univariate accumulators into a single set of accumulators
+        for (auto& accumulators : thread_univariate_accumulators) {
+            Utils::add_nested_tuples(optimised_univariate_accumulators, accumulators);
+        }
+
+        // Convert from optimised version to non-optimised
+        deoptimise_univariates(optimised_univariate_accumulators, univariate_accumulators);
+        //  Batch the univariate contributions from each sub-relation to obtain the round univariate
+        return batch_over_relations(univariate_accumulators, instances.alphas);
     }
 
     /**
diff --git a/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp b/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp
index e04e3f5de41..08c3cffd783 100644
--- a/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/vm/generated/avm_flavor.hpp
@@ -166,11 +166,6 @@ class AvmFlavor {
     template <size_t NUM_INSTANCES>
     using ProtogalaxyTupleOfTuplesOfUnivariates =
         decltype(create_protogalaxy_tuple_of_tuples_of_univariates<Relations, NUM_INSTANCES>());
-    template <size_t NUM_INSTANCES>
-    using OptimisedProtogalaxyTupleOfTuplesOfUnivariates =
-        decltype(create_protogalaxy_tuple_of_tuples_of_univariates<Relations,
-                                                                   NUM_INSTANCES,
-                                                                   /*optimised=*/true>());
     using SumcheckTupleOfTuplesOfUnivariates = decltype(create_sumcheck_tuple_of_tuples_of_univariates<Relations>());
     using TupleOfArraysOfValues = decltype(create_tuple_of_arrays_of_values<Relations>());
 

From d2189764f505c3030362cd4a0d6e46d10178cffa Mon Sep 17 00:00:00 2001
From: Rumata888 <isennovskiy@gmail.com>
Date: Tue, 30 Apr 2024 10:10:21 +0000
Subject: [PATCH 11/13] fix

---
 barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
index e1343012c1a..f18011d8153 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
@@ -339,13 +339,6 @@ class ECCVMFlavor {
      * @brief A container for univariates used during Protogalaxy and sumcheck.
      */
     template <size_t LENGTH> using ProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH>>;
-    /**
-     * @brief A container for univariates used during Protogalaxy folding in 'optimised' mode.
-     * @details Univariates in the optimised version skip some redundant computation the result of which we already know
-     * (optimistically)
-     */
-    template <size_t LENGTH, size_t SKIP_COUNT>
-    using OptimisedProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH, 0, SKIP_COUNT>>;
 
     /**
      * @brief A container for univariates produced during the hot loop in sumcheck.

From 982bd6d5c8a39054bcba4bcf150afaad04f2e746 Mon Sep 17 00:00:00 2001
From: Rumata888 <isennovskiy@gmail.com>
Date: Tue, 30 Apr 2024 10:11:11 +0000
Subject: [PATCH 12/13] remove unnecessary change

---
 barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
index f18011d8153..e1828ca8fe4 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
@@ -336,7 +336,7 @@ class ECCVMFlavor {
     };
 
     /**
-     * @brief A container for univariates used during Protogalaxy and sumcheck.
+     * @brief A container for univariates used during sumcheck.
      */
     template <size_t LENGTH> using ProverUnivariates = AllEntities<bb::Univariate<FF, LENGTH>>;
 

From e9f25780145235f6721fcb5dcde8da9cff7fab21 Mon Sep 17 00:00:00 2001
From: Rumata888 <isennovskiy@gmail.com>
Date: Tue, 30 Apr 2024 10:15:54 +0000
Subject: [PATCH 13/13] comments

---
 .../cpp/src/barretenberg/polynomials/univariate.hpp       | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
index 21efa2dad83..6471ba85b56 100644
--- a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
+++ b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp
@@ -21,7 +21,9 @@ template <class Fr, size_t view_domain_end, size_t view_domain_start, size_t ski
  * domain under the hood.
  *
  * @tparam skip_count Skip computing the values of elements [domain_start+1,..,domain_start+skip_count]. Used for
- * optimising computation in protogalaxy
+ * optimising computation in protogalaxy. The value at [domain_start] is the value from the accumulator instance, while
+ * the values in [domain_start+1, ... domain_start + skip_count] in the accumulator should be zero if the original
+ * instances are correct.
  */
 template <class Fr, size_t domain_end, size_t domain_start = 0, size_t skip_count = 0> class Univariate {
   public:
@@ -195,6 +197,8 @@ template <class Fr, size_t domain_end, size_t domain_start = 0, size_t skip_coun
     {
         size_t i = 0;
         for (auto& eval : evaluations) {
+            // If skip count is zero, will be enabled on every line, otherwise don't compute for [domain_start+1,..,
+            // domain_start + skip_count]
             if (i == 0 || i >= (skip_count + 1)) {
                 eval -= scalar;
             }
@@ -206,6 +210,8 @@ template <class Fr, size_t domain_end, size_t domain_start = 0, size_t skip_coun
     {
         size_t i = 0;
         for (auto& eval : evaluations) {
+            // If skip count is zero, will be enabled on every line, otherwise don't compute for [domain_start+1,..,
+            // domain_start + skip_count]
             if (i == 0 || i >= (skip_count + 1)) {
                 eval *= scalar;
             }