refactor: Remove copy from compute_row_evaluations (#8875)

This PR simplifies `compute_row_evaluations` while slightly improving its performance. Before we had some unnecessary redundant zeroing of memory and many redundant copies. The code also had an unnecessarily complicated model of using void type functions that mutate their inputs when there is no advantage, so I refactor to make use of a more normal i/o model for clarity. I'm surprised there's not a better performance advantage, but we get a small benefit while also improving clarity. # x86 ``` Benchmark Time CPU Time Old Time New CPU Old CPU New -------------------------------------------------------------------------------------------------------------------- ClientIVCBench/Full/6 -0.0102 -0.0091 33216 32878 30724 30443 OVERALL_GEOMEAN -0.0102 -0.0091 33 33 31 30 ``` # WASM ``` Benchmark Time CPU Time Old Time New CPU Old CPU New -------------------------------------------------------------------------------------------------------------------- ClientIVCBench/Full/6 -0.0010 -0.0010 102429 102328 102429160000 102327660000 OVERALL_GEOMEAN -0.0010 -0.0010 102 102 102429160 102327660 ```
AztecProtocol · Sep 30, 2024 · 9cd450e · 9cd450e
1 parent ead4649
commit 9cd450e
Show file tree

Hide file tree

Showing 6 changed files with 84 additions and 71 deletions.
diff --git a/barretenberg/cpp/scripts/compare_branch_vs_baseline_remote.sh b/barretenberg/cpp/scripts/compare_branch_vs_baseline_remote.sh
@@ -19,6 +19,11 @@ HARDWARE_CONCURRENCY=${HARDWARE_CONCURRENCY:-16}
 BASELINE_BRANCH="master"
 BENCH_TOOLS_DIR="$BUILD_DIR/_deps/benchmark-src/tools"
 
+if [ ! -z "$(git status --untracked-files=no --porcelain)" ]; then
+  echo "Git status is unclean; the script will not be able to check out $BASELINE_BRANCH."
+  exit 1
+fi
+
 echo -e "\nComparing $BENCHMARK between $BASELINE_BRANCH and current branch:"
 
 # Move above script dir.

diff --git a/barretenberg/cpp/scripts/compare_branch_vs_baseline_remote_wasm.sh b/barretenberg/cpp/scripts/compare_branch_vs_baseline_remote_wasm.sh
@@ -19,6 +19,12 @@ HARDWARE_CONCURRENCY=${HARDWARE_CONCURRENCY:-16}
 BASELINE_BRANCH="master"
 BENCH_TOOLS_DIR="$BUILD_DIR/_deps/benchmark-src/tools"
 
+if [ ! -z "$(git status --untracked-files=no --porcelain)" ]; then
+  echo "Git status is unclean; the script will not be able to check out $BASELINE_BRANCH."
+  exit 1
+fi
+
+
 echo -e "\nComparing $BENCHMARK between $BASELINE_BRANCH and current branch:"
 
 # Move above script dir.

diff --git a/barretenberg/cpp/scripts/compare_client_ivc_bench.sh b/barretenberg/cpp/scripts/compare_client_ivc_bench.sh
@@ -1,4 +1,4 @@
 #!/usr/bin/env bash
 set -eu
 
-./scripts/compare_branch_vs_baseline_remote_wasm.sh client_ivc_bench 'Full/6$'
+./scripts/compare_branch_vs_baseline_remote.sh client_ivc_bench 'Full/6$'
diff --git a/barretenberg/cpp/scripts/compare_client_ivc_bench_wasm.sh b/barretenberg/cpp/scripts/compare_client_ivc_bench_wasm.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+set -eu
+
+./scripts/compare_branch_vs_baseline_remote_wasm.sh client_ivc_bench 'Full/6$'
diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp
@@ -24,6 +24,7 @@ template <class DeciderProvingKeys_> class ProtogalaxyProverInternal {
     using RelationUtils = bb::RelationUtils<Flavor>;
     using ProverPolynomials = typename Flavor::ProverPolynomials;
     using Relations = typename Flavor::Relations;
+    using AllValues = typename Flavor::AllValues;
     using RelationSeparator = typename Flavor::RelationSeparator;
     static constexpr size_t NUM_KEYS = DeciderProvingKeys_::NUM;
     using UnivariateRelationParametersNoOptimisticSkipping =
@@ -54,6 +55,43 @@ template <class DeciderProvingKeys_> class ProtogalaxyProverInternal {
 
     static constexpr size_t NUM_SUBRELATIONS = DeciderPKs::NUM_SUBRELATIONS;
 
+    /**
+     * @brief A scale subrelations evaluations by challenges ('alphas') and part of the linearly dependent relation
+     * evaluation(s).
+     *
+     * @details Note that a linearly dependent subrelation is not computed on a specific row but rather on the entire
+     * execution trace.
+     *
+     * @param evals The evaluations of all subrelations on some row
+     * @param challenges The 'alpha' challenges used to batch the subrelations
+     * @param linearly_dependent_contribution An accumulator for values of  the linearly-dependent (i.e., 'whole-trace')
+     * subrelations
+     * @return FF The evaluation of the linearly-independent (i.e., 'per-row') subrelations
+     */
+    inline static FF process_subrelation_evaluations(const RelationEvaluations& evals,
+                                                     const std::array<FF, NUM_SUBRELATIONS>& challenges,
+                                                     FF& linearly_dependent_contribution)
+    {
+        // TODO(https://github.com/AztecProtocol/barretenberg/issues/1115): Iniitalize with first subrelation value to
+        // avoid Montgomery allocating 0 and doing a mul. This is about 60ns per row.
+        FF linearly_independent_contribution{ 0 };
+        size_t idx = 0;
+
+        auto scale_by_challenge_and_accumulate =
+            [&]<size_t relation_idx, size_t subrelation_idx, typename Element>(Element& element) {
+                using Relation = typename std::tuple_element_t<relation_idx, Relations>;
+                const Element contribution = element * challenges[idx];
+                if (subrelation_is_linearly_independent<Relation, subrelation_idx>()) {
+                    linearly_independent_contribution += contribution;
+                } else {
+                    linearly_dependent_contribution += contribution;
+                }
+                idx++;
+            };
+        RelationUtils::apply_to_tuple_of_arrays_elements(scale_by_challenge_and_accumulate, evals);
+        return linearly_independent_contribution;
+    }
+
     /**
      * @brief Compute the values of the aggregated relation evaluations at each row in the execution trace, representing
      * f_i(ω) in the Protogalaxy paper, given the evaluations of all the prover polynomials and \vec{α} (the batching
@@ -67,40 +105,41 @@ template <class DeciderProvingKeys_> class ProtogalaxyProverInternal {
      * linearly dependent subrelation and α_j is its corresponding batching challenge.
      */
     static std::vector<FF> compute_row_evaluations(const ProverPolynomials& polynomials,
-                                                   const RelationSeparator& alpha,
+                                                   const RelationSeparator& alphas_,
                                                    const RelationParameters<FF>& relation_parameters)
 
     {
 
         BB_OP_COUNT_TIME_NAME("ProtogalaxyProver_::compute_row_evaluations");
+
         const size_t polynomial_size = polynomials.get_polynomial_size();
-        std::vector<FF> full_honk_evaluations(polynomial_size);
+        std::vector<FF> aggregated_relation_evaluations(polynomial_size);
+
+        const std::array<FF, NUM_SUBRELATIONS> alphas = [&alphas_]() {
+            std::array<FF, NUM_SUBRELATIONS> tmp;
+            tmp[0] = 1;
+            std::copy(alphas_.begin(), alphas_.end(), tmp.begin() + 1);
+            return tmp;
+        }();
+
         const std::vector<FF> linearly_dependent_contribution_accumulators = parallel_for_heuristic(
             polynomial_size,
             /*accumulator default*/ FF(0),
-            [&](size_t row, FF& linearly_dependent_contribution_accumulator) {
-                auto row_evaluations = polynomials.get_row(row);
-                RelationEvaluations relation_evaluations;
-                RelationUtils::zero_elements(relation_evaluations);
-
-                RelationUtils::template accumulate_relation_evaluations<>(
-                    row_evaluations, relation_evaluations, relation_parameters, FF(1));
-
-                auto output = FF(0);
-                auto running_challenge = FF(1);
-                RelationUtils::scale_and_batch_elements(relation_evaluations,
-                                                        alpha,
-                                                        running_challenge,
-                                                        output,
-                                                        linearly_dependent_contribution_accumulator);
-
-                full_honk_evaluations[row] = output;
+            [&](size_t row_idx, FF& linearly_dependent_contribution_accumulator) {
+                const AllValues row = polynomials.get_row(row_idx);
+                // Evaluate all subrelations on the given row. Separator is 1 since we are not summing across rows here.
+                const RelationEvaluations evals =
+                    RelationUtils::accumulate_relation_evaluations(row, relation_parameters, FF(1));
+
+                // Sum against challenges alpha
+                aggregated_relation_evaluations[row_idx] =
+                    process_subrelation_evaluations(evals, alphas, linearly_dependent_contribution_accumulator);
             },
             thread_heuristics::ALWAYS_MULTITHREAD);
-        full_honk_evaluations[0] += sum(linearly_dependent_contribution_accumulators);
-        return full_honk_evaluations;
-    }
+        aggregated_relation_evaluations[0] += sum(linearly_dependent_contribution_accumulators);
 
+        return aggregated_relation_evaluations;
+    }
     /**
      * @brief  Recursively compute the parent nodes of each level in the tree, starting from the leaves. Note that at
      * each level, the resulting parent nodes will be polynomials of degree (level+1) because we multiply by an

diff --git a/barretenberg/cpp/src/barretenberg/relations/utils.hpp b/barretenberg/cpp/src/barretenberg/relations/utils.hpp
@@ -174,15 +174,16 @@ template <typename Flavor> class RelationUtils {
      */
     template <typename Parameters>
     // TODO(#224)(Cody): Input should be an array?
-    inline static void accumulate_relation_evaluations(const PolynomialEvaluations& evaluations,
-                                                       RelationEvaluations& relation_evaluations,
-                                                       const Parameters& relation_parameters,
-                                                       const FF& partial_evaluation_result)
+    inline static RelationEvaluations accumulate_relation_evaluations(const PolynomialEvaluations& evaluations,
+                                                                      const Parameters& relation_parameters,
+                                                                      const FF& partial_evaluation_result)
     {
+        RelationEvaluations result;
         constexpr_for<0, NUM_RELATIONS, 1>([&]<size_t rel_index>() {
             accumulate_single_relation<Parameters, rel_index>(
-                evaluations, relation_evaluations, relation_parameters, partial_evaluation_result);
+                evaluations, result, relation_parameters, partial_evaluation_result);
         });
+        return result;
     }
 
     template <typename Parameters, size_t relation_idx, bool consider_skipping = true>
@@ -251,48 +252,6 @@ template <typename Flavor> class RelationUtils {
         apply_to_tuple_of_arrays(scale_by_challenges_and_accumulate, tuple);
     }
 
-    /**
-     * @brief Scales elements, representing evaluations of polynomials in subrelations, by separate challenges and then
-     * sum them together. This function has identical functionality with the one above with the caveat that one such
-     * evaluation is part of a linearly dependent subrelation and hence needs to be accumulated separately.
-     *
-     * @details Such functionality is needed when computing the evaluation of the full relation at a specific row in
-     * the execution trace because a linearly dependent subrelation does not act on a specific row but rather on the
-     * entire execution trace.
-     *
-     * @param tuple
-     * @param challenges
-     * @param current_scalar
-     * @param result
-     * @param linearly_dependent_contribution
-     */
-    static void scale_and_batch_elements(auto& tuple,
-                                         const RelationSeparator& challenges,
-                                         FF current_scalar,
-                                         FF& result,
-                                         FF& linearly_dependent_contribution)
-        requires bb::IsFoldingFlavor<Flavor>
-    {
-        size_t idx = 0;
-        std::array<FF, NUM_SUBRELATIONS> tmp{ current_scalar };
-
-        std::copy(challenges.begin(), challenges.end(), tmp.begin() + 1);
-
-        auto scale_by_challenge_and_accumulate =
-            [&]<size_t relation_idx, size_t subrelation_idx, typename Element>(Element& element) {
-                using Relation = typename std::tuple_element_t<relation_idx, Relations>;
-                const bool is_subrelation_linearly_independent =
-                    bb::subrelation_is_linearly_independent<Relation, subrelation_idx>();
-                if (is_subrelation_linearly_independent) {
-                    result += element * tmp[idx];
-                } else {
-                    linearly_dependent_contribution += element * tmp[idx];
-                }
-                idx++;
-            };
-        apply_to_tuple_of_arrays_elements(scale_by_challenge_and_accumulate, tuple);
-    }
-
     /**
      * @brief Scale elements by consecutive powers of a given challenge then sum the result
      * @param result Batched result
@@ -336,7 +295,7 @@ template <typename Flavor> class RelationUtils {
      * dependent contribution when we compute the evaluation of full rel_U(G)H at particular row.)
      */
     template <size_t outer_idx = 0, size_t inner_idx = 0, typename Operation, typename... Ts>
-    static void apply_to_tuple_of_arrays_elements(Operation&& operation, std::tuple<Ts...>& tuple)
+    static void apply_to_tuple_of_arrays_elements(Operation&& operation, const std::tuple<Ts...>& tuple)
     {
         using Relation = typename std::tuple_element_t<outer_idx, Relations>;
         const auto subrelation_length = Relation::SUBRELATION_PARTIAL_LENGTHS.size();