Skip to content

Commit

Permalink
chore: optimise polynomial initialisation (#10073)
Browse files Browse the repository at this point in the history
Analysing the impact of using a large ambient trace (2^20) in the
ClientIVC bench, with no changes to the circuit, one culprit is
initalisation of polynomials defined over the full domain with 0. As
such, I parallelised the initialisation function inside the polynomial
class, which also brings improvement to the Client IVC bench as it is.

Default benchmark 
NOW: 
```
--------------------------------------------------------------------------------
Benchmark                      Time             CPU   
--------------------------------------------------------------------------------
ClientIVCBench/Full/6      29956 ms        28100 ms
```
BEFORE: 
```
--------------------------------------------------------------------------------
Benchmark                      Time             CPU   
--------------------------------------------------------------------------------
ClientIVCBench/Full/6      32341 ms        30470 ms
```


Benchmark with 2^20 ambient trace

NOW: 
```
--------------------------------------------------------------------------------
Benchmark                      Time             CPU   
--------------------------------------------------------------------------------
ClientIVCBench/Full/6      39013 ms        36526 ms 
```
BEFORE: 
```
--------------------------------------------------------------------------------
Benchmark                      Time             CPU   
--------------------------------------------------------------------------------
ClientIVCBench/Full/6      44346 ms        41778 ms 
```
Note: this is disabled for AVM as they do parallel polynomial
construction and have smaller polynomials.
  • Loading branch information
maramihali authored Nov 21, 2024
1 parent b8bace9 commit e608742
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 52 deletions.
24 changes: 22 additions & 2 deletions barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,29 @@ void Polynomial<Fr>::allocate_backing_memory(size_t size, size_t virtual_size, s
*
* @param size The size of the polynomial.
*/
template <typename Fr> Polynomial<Fr>::Polynomial(size_t size, size_t virtual_size, size_t start_index)
template <typename Fr>
Polynomial<Fr>::Polynomial(size_t size, size_t virtual_size, size_t start_index, bool disable_parallelisation)
{
PROFILE_THIS_NAME("polynomial allocation with zeroing");

allocate_backing_memory(size, virtual_size, start_index);
memset(static_cast<void*>(coefficients_.backing_memory_.get()), 0, sizeof(Fr) * size);
if (disable_parallelisation) {
// In AVM polynomials are small and already constructed in parallel
memset(static_cast<void*>(coefficients_.backing_memory_.get()), 0, sizeof(Fr) * size);
return;
}

size_t num_threads = calculate_num_threads(size);
size_t range_per_thread = size / num_threads;
size_t leftovers = size - (range_per_thread * num_threads);

parallel_for(num_threads, [&](size_t j) {
size_t offset = j * range_per_thread;
size_t range = (j == num_threads - 1) ? range_per_thread + leftovers : range_per_thread;
ASSERT(offset < size || size == 0);
ASSERT((offset + range) <= size);
memset(static_cast<void*>(coefficients_.backing_memory_.get() + offset), 0, sizeof(Fr) * range);
});
}

/**
Expand All @@ -76,6 +95,7 @@ template <typename Fr> Polynomial<Fr>::Polynomial(size_t size, size_t virtual_si
template <typename Fr>
Polynomial<Fr>::Polynomial(size_t size, size_t virtual_size, size_t start_index, [[maybe_unused]] DontZeroMemory flag)
{
PROFILE_THIS_NAME("polynomial allocation without zeroing");
allocate_backing_memory(size, virtual_size, start_index);
}

Expand Down
8 changes: 3 additions & 5 deletions barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,11 @@ template <typename Fr> class Polynomial {
using FF = Fr;
enum class DontZeroMemory { FLAG };

Polynomial(size_t size, size_t virtual_size, size_t start_index = 0);
Polynomial(size_t size, size_t virtual_size, size_t start_index = 0, bool disable_parallelisation = false);
// Intended just for plonk, where size == virtual_size always
Polynomial(size_t size)
: Polynomial(size, size)
{
PROFILE_THIS();
}
: Polynomial(size, size){};

// Constructor that does not initialize values, use with caution to save time.
Polynomial(size_t size, size_t virtual_size, size_t start_index, DontZeroMemory flag);
Polynomial(size_t size, size_t virtual_size, DontZeroMemory flag)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ template <IsUltraFlavor Flavor> class DeciderProvingKey_ {
}
{

PROFILE_THIS_NAME("constructing proving key");
PROFILE_THIS_NAME("allocating proving key");

proving_key = ProvingKey(dyadic_circuit_size, circuit.public_inputs.size(), commitment_key);
// If not using structured trace OR if using structured trace but overflow has occurred (overflow block in
Expand Down Expand Up @@ -189,27 +189,27 @@ template <IsUltraFlavor Flavor> class DeciderProvingKey_ {
// Allocate the table polynomials
if constexpr (IsUltraFlavor<Flavor>) {
for (auto& poly : proving_key.polynomials.get_tables()) {
poly = typename Flavor::Polynomial(max_tables_size, dyadic_circuit_size, table_offset);
poly = Polynomial(max_tables_size, dyadic_circuit_size, table_offset);
}
}
}
{
PROFILE_THIS_NAME("allocating sigmas and ids");

for (auto& sigma : proving_key.polynomials.get_sigmas()) {
sigma = typename Flavor::Polynomial(proving_key.circuit_size);
sigma = Polynomial(proving_key.circuit_size);
}
for (auto& id : proving_key.polynomials.get_ids()) {
id = typename Flavor::Polynomial(proving_key.circuit_size);
id = Polynomial(proving_key.circuit_size);
}
}
{
ZoneScopedN("allocating lookup read counts and tags");
// Allocate the read counts and tags polynomials
proving_key.polynomials.lookup_read_counts =
typename Flavor::Polynomial(max_tables_size, dyadic_circuit_size, table_offset);
Polynomial(max_tables_size, dyadic_circuit_size, table_offset);
proving_key.polynomials.lookup_read_tags =
typename Flavor::Polynomial(max_tables_size, dyadic_circuit_size, table_offset);
Polynomial(max_tables_size, dyadic_circuit_size, table_offset);
}
{
ZoneScopedN("allocating lookup and databus inverses");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,49 +51,54 @@ AvmCircuitBuilder::ProverPolynomials AvmCircuitBuilder::compute_polynomials() co
}));

// catch-all with fully formed polynomials
AVM_TRACK_TIME(
"circuit_builder/init_polys_unshifted", ({
auto unshifted = polys.get_unshifted();
AVM_TRACK_TIME("circuit_builder/init_polys_unshifted", ({
auto unshifted = polys.get_unshifted();

// An array which stores for each column of the trace the smallest size of the
// truncated column containing all non-zero elements.
// It is used to allocate the polynomials without memory overhead for the tail of zeros.
std::array<size_t, Row::SIZE> col_nonzero_size{};
// An array which stores for each column of the trace the smallest size of the
// truncated column containing all non-zero elements.
// It is used to allocate the polynomials without memory overhead for the tail of zeros.
std::array<size_t, Row::SIZE> col_nonzero_size{};

// Computation of size of columns.
// Non-parallel version takes 0.5 second for a trace size of 200k rows.
// A parallel version might be considered in the future.
for (size_t i = 0; i < num_rows; i++) {
const auto row = rows[i].as_vector();
for (size_t col = 0; col < Row::SIZE; col++) {
if (!row[col].is_zero()) {
col_nonzero_size[col] = i + 1;
}
}
}
// Computation of size of columns.
// Non-parallel version takes 0.5 second for a trace size of 200k rows.
// A parallel version might be considered in the future.
for (size_t i = 0; i < num_rows; i++) {
const auto row = rows[i].as_vector();
for (size_t col = 0; col < Row::SIZE; col++) {
if (!row[col].is_zero()) {
col_nonzero_size[col] = i + 1;
}
}
}

// Set of the labels for derived/inverse polynomials.
const auto derived_labels = polys.get_derived_labels();
std::set<std::string> derived_labels_set(derived_labels.begin(), derived_labels.end());
// Set of the labels for derived/inverse polynomials.
const auto derived_labels = polys.get_derived_labels();
std::set<std::string> derived_labels_set(derived_labels.begin(), derived_labels.end());

bb::parallel_for(num_unshifted, [&](size_t i) {
auto& poly = unshifted[i];
const auto col_idx = polys_to_cols_unshifted_idx[i];
size_t col_size = 0;
bb::parallel_for(num_unshifted, [&](size_t i) {
auto& poly = unshifted[i];
const auto col_idx = polys_to_cols_unshifted_idx[i];
size_t col_size = 0;

// We fully allocate the inverse polynomials. We leave this potential memory optimization for later.
if (derived_labels_set.contains(labels[i])) {
col_size = num_rows;
} else {
col_size = col_nonzero_size[col_idx];
}
// We fully allocate the inverse polynomials. We leave this potential memory optimization for
// later.
if (derived_labels_set.contains(labels[i])) {
col_size = num_rows;
} else {
col_size = col_nonzero_size[col_idx];
}

if (poly.is_empty()) {
// Not set above
poly = Polynomial{ /*memory size*/ col_size, /*largest possible index*/ circuit_subgroup_size };
}
});
}));
if (poly.is_empty()) {
// Not set above
poly = Polynomial{ /*memory size*/
col_size,
/*largest possible index as virtual size*/ circuit_subgroup_size,
/*start_index=*/0,
/*/*disable parallel initialisation=*/true
};
}
});
}));

AVM_TRACK_TIME(
"circuit_builder/set_polys_unshifted", ({
Expand Down
7 changes: 5 additions & 2 deletions bb-pilcom/bb-pil-backend/templates/circuit_builder.cpp.hbs
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,11 @@ namespace bb {

if (poly.is_empty()) {
// Not set above
poly = Polynomial{ /*memory size*/ col_size, /*largest possible index*/ circuit_subgroup_size };
}
poly = Polynomial{ /*memory size*/ col_size,
/*largest possible index as virtual size*/ circuit_subgroup_size,
/*start_index=*/0,
/*disable parallel initialization=*/true
}; }
});
}));

Expand Down

0 comments on commit e608742

Please sign in to comment.