diff --git a/barretenberg/cpp/src/barretenberg/common/thread_utils.cpp b/barretenberg/cpp/src/barretenberg/common/thread_utils.cpp new file mode 100644 index 00000000000..69549bbd7f0 --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/common/thread_utils.cpp @@ -0,0 +1,40 @@ +#include "thread_utils.hpp" + +namespace barretenberg::thread_utils { +/** + * @brief calculates number of threads to create based on minimum iterations per thread + * @details Finds the number of cpus with get_num_cpus(), and calculates `desired_num_threads` + * Returns the min of `desired_num_threads` and `max_num_threads`. + * Note that it will not calculate a power of 2 necessarily, use `calculate_num_threads_pow2` instead + * + * @param num_iterations + * @param min_iterations_per_thread + * @return size_t + */ +size_t calculate_num_threads(size_t num_iterations, size_t min_iterations_per_thread) +{ + size_t max_num_threads = get_num_cpus(); // number of available threads + size_t desired_num_threads = num_iterations / min_iterations_per_thread; + size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified + num_threads = num_threads > 0 ? num_threads : 1; // ensure num_threads is at least 1 + return num_threads; +} + +/** + * @brief calculates number of threads to create based on minimum iterations per thread, guaranteed power of 2 + * @details Same functionality as `calculate_num_threads` but guaranteed power of 2 + * @param num_iterations + * @param min_iterations_per_thread + * @return size_t + */ +size_t calculate_num_threads_pow2(size_t num_iterations, size_t min_iterations_per_thread) +{ + size_t max_num_threads = get_num_cpus_pow2(); // number of available threads (power of 2) + size_t desired_num_threads = num_iterations / min_iterations_per_thread; + desired_num_threads = static_cast(1ULL << numeric::get_msb(desired_num_threads)); + size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified + num_threads = num_threads > 0 ? num_threads : 1; // ensure num_threads is at least 1 + return num_threads; +} + +} // namespace barretenberg::thread_utils \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/common/thread_utils.hpp b/barretenberg/cpp/src/barretenberg/common/thread_utils.hpp new file mode 100644 index 00000000000..55ee79ff1ab --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/common/thread_utils.hpp @@ -0,0 +1,29 @@ +#include "thread.hpp" + +namespace barretenberg::thread_utils { + +const size_t DEFAULT_MIN_ITERS_PER_THREAD = 1 << 4; + +/** + * @brief calculates number of threads to create based on minimum iterations per thread + * @details Finds the number of cpus with get_num_cpus(), and calculates `desired_num_threads` + * Returns the min of `desired_num_threads` and `max_num_theads`. + * Note that it will not calculate a power of 2 necessarily, use `calculate_num_threads_pow2` instead + * + * @param num_iterations + * @param min_iterations_per_thread + * @return size_t + */ +size_t calculate_num_threads(size_t num_iterations, size_t min_iterations_per_thread = DEFAULT_MIN_ITERS_PER_THREAD); + +/** + * @brief calculates number of threads to create based on minimum iterations per thread, guaranteed power of 2 + * @details Same functionality as `calculate_num_threads` but guaranteed power of 2 + * @param num_iterations + * @param min_iterations_per_thread + * @return size_t + */ +size_t calculate_num_threads_pow2(size_t num_iterations, + size_t min_iterations_per_thread = DEFAULT_MIN_ITERS_PER_THREAD); + +} // namespace barretenberg::thread_utils \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp b/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp index ae2dfc36ac7..1c335957c29 100644 --- a/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp +++ b/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp @@ -1,6 +1,7 @@ #pragma once #include "barretenberg/common/log.hpp" #include "barretenberg/common/thread.hpp" +#include "barretenberg/common/thread_utils.hpp" #include "barretenberg/polynomials/barycentric.hpp" #include "barretenberg/polynomials/pow.hpp" #include "barretenberg/proof_system/flavor/flavor.hpp" @@ -140,12 +141,10 @@ template class SumcheckProverRound { // Note: Multithreading is "on" for every round but we reduce the number of threads from the max available based // on a specified minimum number of iterations per thread. This eventually leads to the use of a single thread. // For now we use a power of 2 number of threads simply to ensure the round size is evenly divided. - size_t max_num_threads = get_num_cpus_pow2(); // number of available threads (power of 2) size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread - size_t desired_num_threads = round_size / min_iterations_per_thread; - size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified - num_threads = num_threads > 0 ? num_threads : 1; // ensure num threads is >= 1 - size_t iterations_per_thread = round_size / num_threads; // actual iterations per thread + size_t num_threads = + barretenberg::thread_utils::calculate_num_threads_pow2(round_size, min_iterations_per_thread); + size_t iterations_per_thread = round_size / num_threads; // actual iterations per thread // Constuct univariate accumulator containers; one per thread std::vector thread_univariate_accumulators(num_threads); diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index c644b778d31..c898d9d8a6f 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -1,6 +1,8 @@ #include "polynomial.hpp" #include "barretenberg/common/assert.hpp" #include "barretenberg/common/slab_allocator.hpp" +#include "barretenberg/common/thread.hpp" +#include "barretenberg/common/thread_utils.hpp" #include "polynomial_arithmetic.hpp" #include #include @@ -306,12 +308,17 @@ template void Polynomial::add_scaled(std::span other const size_t other_size = other.size(); ASSERT(in_place_operation_viable(other_size)); - /** TODO parallelize using some kind of generic evaluation domain - * we really only need to know the thread size, but we don't need all the FFT roots - */ - for (size_t i = 0; i < other_size; ++i) { - coefficients_.get()[i] += scaling_factor * other[i]; - } + // Calculates number of threads with thread_utils::calculate_num_threads + size_t num_threads = thread_utils::calculate_num_threads(other_size); + size_t range_per_thread = other_size / num_threads; + size_t leftovers = other_size - (range_per_thread * num_threads); + parallel_for(num_threads, [&](size_t j) { + size_t offset = j * range_per_thread; + size_t end = (j == num_threads - 1) ? offset + range_per_thread + leftovers : offset + range_per_thread; + for (size_t i = offset; i < end; ++i) { + coefficients_.get()[i] += scaling_factor * other[i]; + } + }); } template Polynomial& Polynomial::operator+=(std::span other) @@ -319,12 +326,16 @@ template Polynomial& Polynomial::operator+=(std::span Polynomial& Polynomial::operator-=(std::span Polynomial& Polynomial::operator*=(const Fr scaling_facor) +template Polynomial& Polynomial::operator*=(const Fr scaling_factor) { ASSERT(in_place_operation_viable()); - for (size_t i = 0; i < size_; ++i) { - coefficients_.get()[i] *= scaling_facor; - } + size_t num_threads = thread_utils::calculate_num_threads(size_); + size_t range_per_thread = size_ / num_threads; + size_t leftovers = size_ - (range_per_thread * num_threads); + parallel_for(num_threads, [&](size_t j) { + size_t offset = j * range_per_thread; + size_t end = (j == num_threads - 1) ? offset + range_per_thread + leftovers : offset + range_per_thread; + for (size_t i = offset; i < end; ++i) { + coefficients_.get()[i] *= scaling_factor; + } + }); + return *this; } diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp index f26ac1e69f9..d22f47c5f73 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp @@ -188,7 +188,7 @@ template class Polynomial { * * @param scaling_factor s */ - Polynomial& operator*=(const Fr scaling_facor); + Polynomial& operator*=(const Fr scaling_factor); /** * @brief evaluates p(X) = ∑ᵢ aᵢ⋅Xⁱ considered as multi-linear extension p(X₀,…,Xₘ₋₁) = ∑ᵢ aᵢ⋅Lᵢ(X₀,…,Xₘ₋₁)