Skip to content

Commit

Permalink
Combine compute_combiner
Browse files Browse the repository at this point in the history
  • Loading branch information
codygunton committed Aug 19, 2024
1 parent 2aca4d9 commit 07d7050
Showing 1 changed file with 46 additions and 90 deletions.
136 changes: 46 additions & 90 deletions barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,9 +376,16 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {

/**
* @brief Compute the combiner polynomial $G$ in the Protogalaxy paper
* @details We have implemented an optimization that (eg in the case where we fold one instance-witness pair at a
* time) assumes the value G(1) is 0, which is true in the case where the witness to be folded is valid.
* @todo (https://github.com/AztecProtocol/barretenberg/issues/968) Make combiner tests better
*
* @tparam skip_zero_computations whether to use the the optimization that skips computing zero.
* @param instances
* @param pow_betas
* @return ExtendedUnivariateWithRandomization
*/
template <bool OptimisationEnabled, std::enable_if_t<!OptimisationEnabled, bool> = true>
template <bool skip_zero_computations = true>
ExtendedUnivariateWithRandomization compute_combiner(const ProverInstances& instances, PowPolynomial<FF>& pow_betas)
{
BB_OP_COUNT_TIME();
Expand All @@ -398,8 +405,10 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {

// Univariates are optimised for usual PG, but we need the unoptimised version for tests (it's a version that
// doesn't skip computation), so we need to define types depending on the template instantiation
using ThreadAccumulators = TupleOfTuplesOfUnivariates;
using ExtendedUnivatiatesType = ExtendedUnivariates;
using ThreadAccumulators =
std::conditional_t<skip_zero_computations, OptimisedTupleOfTuplesOfUnivariates, TupleOfTuplesOfUnivariates>;
using ExtendedUnivatiatesType =
std::conditional_t<skip_zero_computations, OptimisedExtendedUnivariates, ExtendedUnivariates>;

// Construct univariate accumulator containers; one per thread
std::vector<ThreadAccumulators> thread_univariate_accumulators(num_threads);
Expand All @@ -418,104 +427,51 @@ template <class ProverInstances_> class ProtoGalaxyProver_ {
size_t end = (thread_idx + 1) * iterations_per_thread;

for (size_t idx = start; idx < end; idx++) {

extend_univariates(extended_univariates[thread_idx], instances, idx);
// Instantiate univariates, possibly with skipping toto ignore computation in those indices (they are
// still available for skipping relations, but all derived univariate will ignore those evaluations)
// No need to initialise extended_univariates to 0, as it's assigned to.
constexpr size_t skip_count = skip_zero_computations ? ProverInstances::NUM - 1 : 0;
extend_univariates<skip_count>(extended_univariates[thread_idx], instances, idx);

FF pow_challenge = pow_betas[idx];

// Accumulate the i-th row's univariate contribution. Note that the relation parameters passed to
// this function have already been folded. Moreover, linear-dependent relations that act over the
// entire execution trace rather than on rows, will not be multiplied by the pow challenge.

accumulate_relation_univariates(
thread_univariate_accumulators[thread_idx],
extended_univariates[thread_idx],
instances.relation_parameters, // these parameters have already been folded
pow_challenge);
if constexpr (skip_zero_computations) {
accumulate_relation_univariates(
thread_univariate_accumulators[thread_idx],
extended_univariates[thread_idx],
instances.optimised_relation_parameters, // these parameters have already been folded
pow_challenge);
} else {
accumulate_relation_univariates(
thread_univariate_accumulators[thread_idx],
extended_univariates[thread_idx],
instances.relation_parameters, // these parameters have already been folded
pow_challenge);
}
}
});
Utils::zero_univariates(univariate_accumulators);
// Accumulate the per-thread univariate accumulators into a single set of accumulators
for (auto& accumulators : thread_univariate_accumulators) {
Utils::add_nested_tuples(univariate_accumulators, accumulators);
}

return batch_over_relations(univariate_accumulators, instances.alphas);
}
/**
* @brief Compute the combiner polynomial $G$ in the Protogalaxy paper using indice skippping optimisation
*
* @todo (https://github.com/AztecProtocol/barretenberg/issues/968) Make combiner tests better
*
*/
template <bool OptimisationEnabled = true, std::enable_if_t<OptimisationEnabled, bool> = true>
ExtendedUnivariateWithRandomization compute_combiner(const ProverInstances& instances, PowPolynomial<FF>& pow_betas)
{
BB_OP_COUNT_TIME();
size_t common_instance_size = instances[0]->proving_key.circuit_size;
pow_betas.compute_values();
// Determine number of threads for multithreading.
// Note: Multithreading is "on" for every round but we reduce the number of threads from the max available based
// on a specified minimum number of iterations per thread. This eventually leads to the use of a
// single thread. For now we use a power of 2 number of threads simply to ensure the round size is evenly
// divided.
size_t max_num_threads = get_num_cpus_pow2(); // number of available threads (power of 2)
size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread
size_t desired_num_threads = common_instance_size / min_iterations_per_thread;
size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified
num_threads = num_threads > 0 ? num_threads : 1; // ensure num threads is >= 1
size_t iterations_per_thread = common_instance_size / num_threads; // actual iterations per thread

// Univariates are optimised for usual PG, but we need the unoptimised version for tests (it's a version that
// doesn't skip computation), so we need to define types depending on the template instantiation
using ThreadAccumulators = OptimisedTupleOfTuplesOfUnivariates;
using ExtendedUnivatiatesType = OptimisedExtendedUnivariates;

// Construct univariate accumulator containers; one per thread
std::vector<ThreadAccumulators> thread_univariate_accumulators(num_threads);
for (auto& accum : thread_univariate_accumulators) {
// just normal relation lengths
Utils::zero_univariates(accum);
}

// Construct extended univariates containers; one per thread
std::vector<ExtendedUnivatiatesType> extended_univariates;
extended_univariates.resize(num_threads);

// Accumulate the contribution from each sub-relation
parallel_for(num_threads, [&](size_t thread_idx) {
size_t start = thread_idx * iterations_per_thread;
size_t end = (thread_idx + 1) * iterations_per_thread;

for (size_t idx = start; idx < end; idx++) {
// No need to initialise extended_univariates to 0, it's assigned to
// Instantiate univariates with skipping to ignore computation in those indices (they are still
// available for skipping relations, but all derived univariate will ignore those evaluations)
extend_univariates</*skip_count=*/ProverInstances::NUM - 1>(
extended_univariates[thread_idx], instances, idx);
const auto batch_univariates = [&](auto& possibly_optimised_univariate_accumulators) {
Utils::zero_univariates(possibly_optimised_univariate_accumulators);
// Accumulate the per-thread univariate accumulators into a single set of accumulators
for (auto& accumulators : thread_univariate_accumulators) {
Utils::add_nested_tuples(possibly_optimised_univariate_accumulators, accumulators);
}

FF pow_challenge = pow_betas[idx];
if constexpr (skip_zero_computations) { // Convert from optimised version to non-optimised
deoptimise_univariates(possibly_optimised_univariate_accumulators, univariate_accumulators);
};
// Batch the univariate contributions from each sub-relation to obtain the round univariate
return batch_over_relations(univariate_accumulators, instances.alphas);
};

// Accumulate the i-th row's univariate contribution. Note that the relation parameters passed to
// this function have already been folded. Moreover, linear-dependent relations that act over the
// entire execution trace rather than on rows, will not be multiplied by the pow challenge.
accumulate_relation_univariates(
thread_univariate_accumulators[thread_idx],
extended_univariates[thread_idx],
instances.optimised_relation_parameters, // these parameters have already been folded
pow_challenge);
}
});
Utils::zero_univariates(optimised_univariate_accumulators);
// Accumulate the per-thread univariate accumulators into a single set of accumulators
for (auto& accumulators : thread_univariate_accumulators) {
Utils::add_nested_tuples(optimised_univariate_accumulators, accumulators);
if constexpr (skip_zero_computations) { // Convert from optimised version to non-optimised
return batch_univariates(optimised_univariate_accumulators);
} else {
return batch_univariates(univariate_accumulators);
}

// Convert from optimised version to non-optimised
deoptimise_univariates(optimised_univariate_accumulators, univariate_accumulators);
// Batch the univariate contributions from each sub-relation to obtain the round univariate
return batch_over_relations(univariate_accumulators, instances.alphas);
}

/**
Expand Down

0 comments on commit 07d7050

Please sign in to comment.