Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: honk sumcheck performance #2925

Merged
merged 15 commits into from
Oct 19, 2023
49 changes: 30 additions & 19 deletions barretenberg/cpp/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -183,34 +183,40 @@
"MULTITHREADING": "ON"
}
},
{
"name": "xray-1thread",
"displayName": "Build with single-threaded XRay Profiling",
"description": "Build with Clang and enable single-threaded LLVM XRay for profiling",
"generator": "Unix Makefiles",
"inherits": "clang16",
"environment": {
"CFLAGS": "-fxray-instrument -fxray-instruction-threshold=10",
"CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=10",
"LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=10"
},
"cacheVariables": {
"MULTITHREADING": "OFF"
},
"binaryDir": "build-xray-1thread"
},
{
"name": "xray",
"displayName": "Build with multi-threaded XRay Profiling",
"description": "Build with Clang and enable multi-threaded LLVM XRay for profiling",
"generator": "Unix Makefiles",
"inherits": "clang16",
"environment": {
"CFLAGS": "-fxray-instrument -fxray-instruction-threshold=10",
"CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=10",
"LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=10"
"CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100",
"CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100",
"LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100"
},
"binaryDir": "build-xray"
},
{
"name": "xray-verbose",
"displayName": "Build with detailed XRay Profiling",
"description": "Build with Clang and enable detailed LLVM XRay for profiling",
"inherits": "xray",
"environment": {
"CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150",
"CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150",
"LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150"
},
"binaryDir": "build-xray-verbose"
},
{
"name": "xray-1thread",
"displayName": "Build with single-threaded XRay Profiling",
"description": "Build with Clang and enable single-threaded LLVM XRay for profiling",
"inherits": "xray",
"cacheVariables": {
"MULTITHREADING": "OFF"
},
"binaryDir": "build-xray-1thread"
}
],
"buildPresets": [
Expand Down Expand Up @@ -303,6 +309,11 @@
"jobs": 0,
"targets": ["barretenberg.wasm"]
},
{
"name": "xray-verbose",
"configurePreset": "xray-verbose",
"inherits": "default"
},
{
"name": "xray-1thread",
"configurePreset": "xray-1thread",
Expand Down
3 changes: 2 additions & 1 deletion barretenberg/cpp/scripts/collect_profile_information.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,6 @@ llvm-xray-16 stack xray-log.honk_bench_main_simple.* \
--instr_map=./bin/honk_bench_main_simple --stack-format=flame --aggregate-threads --aggregation-type=time --all-stacks \
| node ../scripts/llvm_xray_stack_flame_corrector.js \
| shorten_cpp_names \
| ../scripts/flamegraph.pl > xray.svg
| ../scripts/flamegraph.pl --width 1200 --fontsize 10 \
> xray.svg
echo "Profiling complete, now you can do e.g. 'scp mainframe:`readlink -f xray.svg` .' on a local terminal and open the SVG in a browser."
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ BASELINE_BRANCH="master"

echo -e "\nComparing $BENCH_TARGET between $BASELINE_BRANCH and current branch:"
# Set some directories
BASE_DIR="$HOME/barretenberg/cpp"
BASE_DIR="$HOME/aztec-packages/barretenberg/cpp"
BUILD_DIR="$BASE_DIR/build-bench" # matches build dir specified in bench preset
BENCH_RESULTS_DIR="$BASE_DIR/tmp_bench_results"
BENCH_TOOLS_DIR="$BUILD_DIR/_deps/benchmark-src/tools"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

echo -e '\nComparing Ultra Plonk/Honk benchmarks.'
# Set some directories
BASE_DIR="$HOME/barretenberg/cpp"
BASE_DIR="$HOME/aztec-packages/barretenberg/cpp"
BUILD_DIR="$BASE_DIR/build-bench"
BENCH_RESULTS_DIR="$BASE_DIR/tmp_bench_results"
BENCH_TOOLS_DIR="$BUILD_DIR/_deps/benchmark-src/tools"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,24 @@
#include "barretenberg/stdlib/primitives/packed_byte_array/packed_byte_array.hpp"
#include "barretenberg/stdlib/primitives/witness/witness.hpp"

using namespace proof_system::plonk;

using UltraBuilder = proof_system::UltraCircuitBuilder;
using UltraHonk = proof_system::honk::UltraComposer;
using namespace proof_system;

template <typename Builder> void generate_sha256_test_circuit(Builder& builder, size_t num_iterations)
{
std::string in;
in.resize(32);
proof_system::plonk::stdlib::packed_byte_array<Builder> input(&builder, in);
plonk::stdlib::packed_byte_array<Builder> input(&builder, in);
for (size_t i = 0; i < num_iterations; i++) {
input = proof_system::plonk::stdlib::sha256<Builder>(input);
input = plonk::stdlib::sha256<Builder>(input);
}
}

BBERG_INSTRUMENT BBERG_NOINLINE void sumcheck_profiling(honk::UltraProver& ext_prover)
{
ext_prover.construct_proof();
for (size_t i = 0; i < 200; i++) {
// Bench sumcheck
ext_prover.execute_relation_check_rounds();
}
}

Expand All @@ -44,15 +50,14 @@ void construct_proof_ultra() noexcept
{
barretenberg::srs::init_crs_factory("../srs_db/ignition");
// Constuct circuit and prover; don't include this part in measurement
auto builder = typename UltraHonk::CircuitBuilder();
generate_sha256_test_circuit<UltraBuilder>(builder, 1);
honk::UltraComposer::CircuitBuilder builder;
generate_sha256_test_circuit(builder, 1);
std::cout << "gates: " << builder.get_total_circuit_size() << std::endl;

auto composer = UltraHonk();
auto instance = composer.create_instance(builder);
auto ext_prover = composer.create_prover(instance);
for (size_t i = 0; i < 10; i++) {
auto proof = ext_prover.construct_proof();
}
honk::UltraComposer composer;
std::shared_ptr<honk::UltraComposer::Instance> instance = composer.create_instance(builder);
honk::UltraProver ext_prover = composer.create_prover(instance);
sumcheck_profiling(ext_prover);
}

int main()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,22 @@ void construct_proof_ultra(State& state, void (*test_circuit_function)(UltraBuil
BENCHMARK_CAPTURE(construct_proof_ultra, sha256, &bench_utils::generate_sha256_test_circuit<UltraBuilder>)
->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
->Repetitions(NUM_REPETITIONS)
->Unit(::benchmark::kSecond);
->Unit(::benchmark::kMillisecond);
BENCHMARK_CAPTURE(construct_proof_ultra, keccak, &bench_utils::generate_keccak_test_circuit<UltraBuilder>)
->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
->Repetitions(NUM_REPETITIONS)
->Unit(::benchmark::kSecond);
->Unit(::benchmark::kMillisecond);
BENCHMARK_CAPTURE(construct_proof_ultra,
ecdsa_verification,
&bench_utils::generate_ecdsa_verification_test_circuit<UltraBuilder>)
->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
->Repetitions(NUM_REPETITIONS)
->Unit(::benchmark::kSecond);
->Unit(::benchmark::kMillisecond);
BENCHMARK_CAPTURE(construct_proof_ultra,
merkle_membership,
&bench_utils::generate_merkle_membership_test_circuit<UltraBuilder>)
->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
->Repetitions(NUM_REPETITIONS)
->Unit(::benchmark::kSecond);
->Unit(::benchmark::kMillisecond);

} // namespace ultra_honk_bench
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,22 @@ void construct_proof_ultra(State& state, void (*test_circuit_function)(UltraBuil
BENCHMARK_CAPTURE(construct_proof_ultra, sha256, &bench_utils::generate_sha256_test_circuit<UltraBuilder>)
->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
->Repetitions(NUM_REPETITIONS)
->Unit(::benchmark::kSecond);
->Unit(::benchmark::kMillisecond);
BENCHMARK_CAPTURE(construct_proof_ultra, keccak, &bench_utils::generate_keccak_test_circuit<UltraBuilder>)
->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
->Repetitions(NUM_REPETITIONS)
->Unit(::benchmark::kSecond);
->Unit(::benchmark::kMillisecond);
BENCHMARK_CAPTURE(construct_proof_ultra,
ecdsa_verification,
&bench_utils::generate_ecdsa_verification_test_circuit<UltraBuilder>)
->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
->Repetitions(NUM_REPETITIONS)
->Unit(::benchmark::kSecond);
->Unit(::benchmark::kMillisecond);
BENCHMARK_CAPTURE(construct_proof_ultra,
merkle_membership,
&bench_utils::generate_merkle_membership_test_circuit<UltraBuilder>)
->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
->Repetitions(NUM_REPETITIONS)
->Unit(::benchmark::kSecond);
->Unit(::benchmark::kMillisecond);

} // namespace ultra_plonk_bench
18 changes: 18 additions & 0 deletions barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once

#ifdef _WIN32
#define BBERG_INLINE __forceinline inline
#else
#define BBERG_INLINE __attribute__((always_inline)) inline
#endif

// TODO(AD): Other compilers
#if defined(__clang__)
#define BBERG_INSTRUMENT [[clang::xray_always_instrument]]
#define BBERG_NO_INSTRUMENT [[clang::xray_never_instrument]]
#define BBERG_NOINLINE [[clang::noinline]]
#else
#define BBERG_INSTRUMENT
#define BBERG_NO_INSTRUMENT
#define BBERG_NOINLINE
#endif
7 changes: 0 additions & 7 deletions barretenberg/cpp/src/barretenberg/common/inline.hpp

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include <thread>
#include <vector>

#include "barretenberg/common/compiler_hints.hpp"

namespace {

class ThreadPool {
Expand Down Expand Up @@ -50,7 +52,7 @@ class ThreadPool {
std::condition_variable complete_condition_;
bool stop = false;

void worker_loop(size_t thread_index);
BBERG_NO_INSTRUMENT void worker_loop(size_t thread_index);

void do_iterations()
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#pragma once
#include "barretenberg/common/assert.hpp"
#include "barretenberg/common/inline.hpp"
#include "barretenberg/common/compiler_hints.hpp"
#include "barretenberg/numeric/random/engine.hpp"
#include "barretenberg/numeric/uint128/uint128.hpp"
#include "barretenberg/numeric/uint256/uint256.hpp"
Expand Down
2 changes: 1 addition & 1 deletion barretenberg/cpp/src/barretenberg/ecc/groups/element.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#pragma once

#include "affine_element.hpp"
#include "barretenberg/common/inline.hpp"
#include "barretenberg/common/compiler_hints.hpp"
#include "barretenberg/common/mem.hpp"
#include "barretenberg/numeric/random/engine.hpp"
#include "barretenberg/numeric/uint256/uint256.hpp"
Expand Down
4 changes: 2 additions & 2 deletions barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,12 @@ template <typename Flavor> class SumcheckProver {
void partially_evaluate(auto& polynomials, size_t round_size, FF round_challenge)
{
// after the first round, operate in place on partially_evaluated_polynomials
for (size_t j = 0; j < polynomials.size(); ++j) {
parallel_for(polynomials.size(), [&](size_t j) {
for (size_t i = 0; i < round_size; i += 2) {
partially_evaluated_polynomials[j][i >> 1] =
polynomials[j][i] + round_challenge * (polynomials[j][i + 1] - polynomials[j][i]);
}
}
});
};
};

Expand Down