Skip to content

Commit

Permalink
refactor: Remove copy from compute_row_evaluations (#8875)
Browse files Browse the repository at this point in the history
This PR simplifies `compute_row_evaluations` while slightly improving
its performance. Before we had some unnecessary redundant zeroing of
memory and many redundant copies. The code also had an unnecessarily
complicated model of using void type functions that mutate their inputs
when there is no advantage, so I refactor to make use of a more normal
i/o model for clarity.

I'm surprised there's not a better performance advantage, but we get a
small benefit while also improving clarity.

# x86
```
Benchmark                               Time             CPU      Time Old      Time New       CPU Old       CPU New
--------------------------------------------------------------------------------------------------------------------
ClientIVCBench/Full/6                -0.0102         -0.0091         33216         32878         30724         30443
OVERALL_GEOMEAN                      -0.0102         -0.0091            33            33            31            30
```

# WASM
```
Benchmark                               Time             CPU      Time Old      Time New       CPU Old       CPU New
--------------------------------------------------------------------------------------------------------------------
ClientIVCBench/Full/6                -0.0010         -0.0010        102429        102328  102429160000  102327660000
OVERALL_GEOMEAN                      -0.0010         -0.0010           102           102     102429160     102327660
```
  • Loading branch information
codygunton authored Sep 30, 2024
1 parent ead4649 commit 9cd450e
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 71 deletions.
5 changes: 5 additions & 0 deletions barretenberg/cpp/scripts/compare_branch_vs_baseline_remote.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ HARDWARE_CONCURRENCY=${HARDWARE_CONCURRENCY:-16}
BASELINE_BRANCH="master"
BENCH_TOOLS_DIR="$BUILD_DIR/_deps/benchmark-src/tools"

if [ ! -z "$(git status --untracked-files=no --porcelain)" ]; then
echo "Git status is unclean; the script will not be able to check out $BASELINE_BRANCH."
exit 1
fi

echo -e "\nComparing $BENCHMARK between $BASELINE_BRANCH and current branch:"

# Move above script dir.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ HARDWARE_CONCURRENCY=${HARDWARE_CONCURRENCY:-16}
BASELINE_BRANCH="master"
BENCH_TOOLS_DIR="$BUILD_DIR/_deps/benchmark-src/tools"

if [ ! -z "$(git status --untracked-files=no --porcelain)" ]; then
echo "Git status is unclean; the script will not be able to check out $BASELINE_BRANCH."
exit 1
fi


echo -e "\nComparing $BENCHMARK between $BASELINE_BRANCH and current branch:"

# Move above script dir.
Expand Down
2 changes: 1 addition & 1 deletion barretenberg/cpp/scripts/compare_client_ivc_bench.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env bash
set -eu

./scripts/compare_branch_vs_baseline_remote_wasm.sh client_ivc_bench 'Full/6$'
./scripts/compare_branch_vs_baseline_remote.sh client_ivc_bench 'Full/6$'
4 changes: 4 additions & 0 deletions barretenberg/cpp/scripts/compare_client_ivc_bench_wasm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env bash
set -eu

./scripts/compare_branch_vs_baseline_remote_wasm.sh client_ivc_bench 'Full/6$'
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ template <class DeciderProvingKeys_> class ProtogalaxyProverInternal {
using RelationUtils = bb::RelationUtils<Flavor>;
using ProverPolynomials = typename Flavor::ProverPolynomials;
using Relations = typename Flavor::Relations;
using AllValues = typename Flavor::AllValues;
using RelationSeparator = typename Flavor::RelationSeparator;
static constexpr size_t NUM_KEYS = DeciderProvingKeys_::NUM;
using UnivariateRelationParametersNoOptimisticSkipping =
Expand Down Expand Up @@ -54,6 +55,43 @@ template <class DeciderProvingKeys_> class ProtogalaxyProverInternal {

static constexpr size_t NUM_SUBRELATIONS = DeciderPKs::NUM_SUBRELATIONS;

/**
* @brief A scale subrelations evaluations by challenges ('alphas') and part of the linearly dependent relation
* evaluation(s).
*
* @details Note that a linearly dependent subrelation is not computed on a specific row but rather on the entire
* execution trace.
*
* @param evals The evaluations of all subrelations on some row
* @param challenges The 'alpha' challenges used to batch the subrelations
* @param linearly_dependent_contribution An accumulator for values of the linearly-dependent (i.e., 'whole-trace')
* subrelations
* @return FF The evaluation of the linearly-independent (i.e., 'per-row') subrelations
*/
inline static FF process_subrelation_evaluations(const RelationEvaluations& evals,
const std::array<FF, NUM_SUBRELATIONS>& challenges,
FF& linearly_dependent_contribution)
{
// TODO(https://github.com/AztecProtocol/barretenberg/issues/1115): Iniitalize with first subrelation value to
// avoid Montgomery allocating 0 and doing a mul. This is about 60ns per row.
FF linearly_independent_contribution{ 0 };
size_t idx = 0;

auto scale_by_challenge_and_accumulate =
[&]<size_t relation_idx, size_t subrelation_idx, typename Element>(Element& element) {
using Relation = typename std::tuple_element_t<relation_idx, Relations>;
const Element contribution = element * challenges[idx];
if (subrelation_is_linearly_independent<Relation, subrelation_idx>()) {
linearly_independent_contribution += contribution;
} else {
linearly_dependent_contribution += contribution;
}
idx++;
};
RelationUtils::apply_to_tuple_of_arrays_elements(scale_by_challenge_and_accumulate, evals);
return linearly_independent_contribution;
}

/**
* @brief Compute the values of the aggregated relation evaluations at each row in the execution trace, representing
* f_i(ω) in the Protogalaxy paper, given the evaluations of all the prover polynomials and \vec{α} (the batching
Expand All @@ -67,40 +105,41 @@ template <class DeciderProvingKeys_> class ProtogalaxyProverInternal {
* linearly dependent subrelation and α_j is its corresponding batching challenge.
*/
static std::vector<FF> compute_row_evaluations(const ProverPolynomials& polynomials,
const RelationSeparator& alpha,
const RelationSeparator& alphas_,
const RelationParameters<FF>& relation_parameters)

{

BB_OP_COUNT_TIME_NAME("ProtogalaxyProver_::compute_row_evaluations");

const size_t polynomial_size = polynomials.get_polynomial_size();
std::vector<FF> full_honk_evaluations(polynomial_size);
std::vector<FF> aggregated_relation_evaluations(polynomial_size);

const std::array<FF, NUM_SUBRELATIONS> alphas = [&alphas_]() {
std::array<FF, NUM_SUBRELATIONS> tmp;
tmp[0] = 1;
std::copy(alphas_.begin(), alphas_.end(), tmp.begin() + 1);
return tmp;
}();

const std::vector<FF> linearly_dependent_contribution_accumulators = parallel_for_heuristic(
polynomial_size,
/*accumulator default*/ FF(0),
[&](size_t row, FF& linearly_dependent_contribution_accumulator) {
auto row_evaluations = polynomials.get_row(row);
RelationEvaluations relation_evaluations;
RelationUtils::zero_elements(relation_evaluations);

RelationUtils::template accumulate_relation_evaluations<>(
row_evaluations, relation_evaluations, relation_parameters, FF(1));

auto output = FF(0);
auto running_challenge = FF(1);
RelationUtils::scale_and_batch_elements(relation_evaluations,
alpha,
running_challenge,
output,
linearly_dependent_contribution_accumulator);

full_honk_evaluations[row] = output;
[&](size_t row_idx, FF& linearly_dependent_contribution_accumulator) {
const AllValues row = polynomials.get_row(row_idx);
// Evaluate all subrelations on the given row. Separator is 1 since we are not summing across rows here.
const RelationEvaluations evals =
RelationUtils::accumulate_relation_evaluations(row, relation_parameters, FF(1));

// Sum against challenges alpha
aggregated_relation_evaluations[row_idx] =
process_subrelation_evaluations(evals, alphas, linearly_dependent_contribution_accumulator);
},
thread_heuristics::ALWAYS_MULTITHREAD);
full_honk_evaluations[0] += sum(linearly_dependent_contribution_accumulators);
return full_honk_evaluations;
}
aggregated_relation_evaluations[0] += sum(linearly_dependent_contribution_accumulators);

return aggregated_relation_evaluations;
}
/**
* @brief Recursively compute the parent nodes of each level in the tree, starting from the leaves. Note that at
* each level, the resulting parent nodes will be polynomials of degree (level+1) because we multiply by an
Expand Down
55 changes: 7 additions & 48 deletions barretenberg/cpp/src/barretenberg/relations/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,15 +174,16 @@ template <typename Flavor> class RelationUtils {
*/
template <typename Parameters>
// TODO(#224)(Cody): Input should be an array?
inline static void accumulate_relation_evaluations(const PolynomialEvaluations& evaluations,
RelationEvaluations& relation_evaluations,
const Parameters& relation_parameters,
const FF& partial_evaluation_result)
inline static RelationEvaluations accumulate_relation_evaluations(const PolynomialEvaluations& evaluations,
const Parameters& relation_parameters,
const FF& partial_evaluation_result)
{
RelationEvaluations result;
constexpr_for<0, NUM_RELATIONS, 1>([&]<size_t rel_index>() {
accumulate_single_relation<Parameters, rel_index>(
evaluations, relation_evaluations, relation_parameters, partial_evaluation_result);
evaluations, result, relation_parameters, partial_evaluation_result);
});
return result;
}

template <typename Parameters, size_t relation_idx, bool consider_skipping = true>
Expand Down Expand Up @@ -251,48 +252,6 @@ template <typename Flavor> class RelationUtils {
apply_to_tuple_of_arrays(scale_by_challenges_and_accumulate, tuple);
}

/**
* @brief Scales elements, representing evaluations of polynomials in subrelations, by separate challenges and then
* sum them together. This function has identical functionality with the one above with the caveat that one such
* evaluation is part of a linearly dependent subrelation and hence needs to be accumulated separately.
*
* @details Such functionality is needed when computing the evaluation of the full relation at a specific row in
* the execution trace because a linearly dependent subrelation does not act on a specific row but rather on the
* entire execution trace.
*
* @param tuple
* @param challenges
* @param current_scalar
* @param result
* @param linearly_dependent_contribution
*/
static void scale_and_batch_elements(auto& tuple,
const RelationSeparator& challenges,
FF current_scalar,
FF& result,
FF& linearly_dependent_contribution)
requires bb::IsFoldingFlavor<Flavor>
{
size_t idx = 0;
std::array<FF, NUM_SUBRELATIONS> tmp{ current_scalar };

std::copy(challenges.begin(), challenges.end(), tmp.begin() + 1);

auto scale_by_challenge_and_accumulate =
[&]<size_t relation_idx, size_t subrelation_idx, typename Element>(Element& element) {
using Relation = typename std::tuple_element_t<relation_idx, Relations>;
const bool is_subrelation_linearly_independent =
bb::subrelation_is_linearly_independent<Relation, subrelation_idx>();
if (is_subrelation_linearly_independent) {
result += element * tmp[idx];
} else {
linearly_dependent_contribution += element * tmp[idx];
}
idx++;
};
apply_to_tuple_of_arrays_elements(scale_by_challenge_and_accumulate, tuple);
}

/**
* @brief Scale elements by consecutive powers of a given challenge then sum the result
* @param result Batched result
Expand Down Expand Up @@ -336,7 +295,7 @@ template <typename Flavor> class RelationUtils {
* dependent contribution when we compute the evaluation of full rel_U(G)H at particular row.)
*/
template <size_t outer_idx = 0, size_t inner_idx = 0, typename Operation, typename... Ts>
static void apply_to_tuple_of_arrays_elements(Operation&& operation, std::tuple<Ts...>& tuple)
static void apply_to_tuple_of_arrays_elements(Operation&& operation, const std::tuple<Ts...>& tuple)
{
using Relation = typename std::tuple_element_t<outer_idx, Relations>;
const auto subrelation_length = Relation::SUBRELATION_PARTIAL_LENGTHS.size();
Expand Down

0 comments on commit 9cd450e

Please sign in to comment.