Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: honk profiling by pass, tsan preset #2982

Merged
merged 28 commits into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions barretenberg/cpp/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,18 @@
"DISABLE_ASM": "ON"
}
},
{
"name": "tsan",
ludamad marked this conversation as resolved.
Show resolved Hide resolved
"displayName": "Debugging build with address sanitizer on Clang-16",
"description": "Build with address sanitizer on clang16 with debugging information",
"inherits": "clang16-dbg",
"binaryDir": "build-tsan",
"environment": {
"CFLAGS": "-fsanitize=thread",
"CXXFLAGS": "-fsanitize=thread",
"LDFLAGS": "-fsanitize=thread"
}
},
{
"name": "asan",
"displayName": "Debugging build with address sanitizer on Clang-16",
Expand Down Expand Up @@ -246,6 +258,11 @@
"inherits": "default",
"configurePreset": "asan"
},
{
ludamad marked this conversation as resolved.
Show resolved Hide resolved
"name": "tsan",
"inherits": "default",
"configurePreset": "tsan"
},
{
"name": "gcc",
"inherits": "default",
Expand Down
11 changes: 6 additions & 5 deletions barretenberg/cpp/scripts/collect_profile_information.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#!/bin/bash
set -eu

PRESET=${1:-xray-1thread} # can also be 'xray'
PRESET=${1:-xray} # can also be 'xray-1thread'
ONLY_PROCESS=${2:-}
EXECUTABLE=${3:-ultra_honk_passes_bench}

# Move above script dir.
cd $(dirname $0)/..
Expand All @@ -15,10 +16,10 @@ cd build-$PRESET

if [ -z "$ONLY_PROCESS" ]; then
# Clear old profile data.
rm -f xray-log.honk_bench_main_simple.*
rm -f xray-log.$EXECUTABLE.*

# Run benchmark with profiling.
XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1" ./bin/honk_bench_main_simple
XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1" ./bin/$EXECUTABLE
fi

function shorten_cpp_names() {
Expand All @@ -37,8 +38,8 @@ function shorten_cpp_names() {
}

# Process benchmark file.
llvm-xray-16 stack xray-log.honk_bench_main_simple.* \
--instr_map=./bin/honk_bench_main_simple --stack-format=flame --aggregate-threads --aggregation-type=time --all-stacks \
llvm-xray-16 stack xray-log.$EXECUTABLE.* \
--instr_map=./bin/$EXECUTABLE --stack-format=flame --aggregate-threads --aggregation-type=time --all-stacks \
| node ../scripts/llvm_xray_stack_flame_corrector.js \
| shorten_cpp_names \
| ../scripts/flamegraph.pl --width 1200 --fontsize 10 \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
set(BENCHMARK_SOURCES
standard_plonk.bench.cpp
ultra_honk.bench.cpp
ultra_honk_passes.bench.cpp
ultra_plonk.bench.cpp
)

Expand All @@ -19,17 +20,4 @@ foreach(BENCHMARK_SOURCE ${BENCHMARK_SOURCES})
add_executable(${BENCHMARK_NAME}_bench main.bench.cpp ${BENCHMARK_SOURCE} benchmark_utilities.hpp)
target_link_libraries(${BENCHMARK_NAME}_bench ${LINKED_LIBRARIES})
add_custom_target(run_${BENCHMARK_NAME} COMMAND ${BENCHMARK_NAME} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
endforeach()

add_executable(
honk_bench_main_simple
main.simple.cpp
)

target_link_libraries(
honk_bench_main_simple
PRIVATE
stdlib_sha256
stdlib_keccak
stdlib_merkle_tree
)
endforeach()
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
#pragma once
#include <benchmark/benchmark.h>
#include <cstddef>

#include "barretenberg/honk/composer/ultra_composer.hpp"
#include "barretenberg/honk/proof_system/ultra_prover.hpp"
#include "barretenberg/plonk/composer/ultra_composer.hpp"
#include "barretenberg/proof_system/types/circuit_type.hpp"
#include "barretenberg/stdlib/encryption/ecdsa/ecdsa.hpp"
#include "barretenberg/stdlib/hash/keccak/keccak.hpp"
Expand Down Expand Up @@ -203,6 +206,26 @@ void construct_proof_with_specified_num_gates(State& state,
}
}

inline proof_system::honk::UltraProver get_prover(
proof_system::honk::UltraComposer& composer,
void (*test_circuit_function)(proof_system::honk::UltraComposer::CircuitBuilder&, size_t),
size_t num_iterations)
{
proof_system::honk::UltraComposer::CircuitBuilder builder;
test_circuit_function(builder, num_iterations);
std::shared_ptr<proof_system::honk::UltraComposer::Instance> instance = composer.create_instance(builder);
return composer.create_prover(instance);
}

inline proof_system::plonk::UltraProver get_prover(
proof_system::plonk::UltraComposer& composer,
void (*test_circuit_function)(proof_system::honk::UltraComposer::CircuitBuilder&, size_t),
size_t num_iterations)
{
proof_system::plonk::UltraComposer::CircuitBuilder builder;
test_circuit_function(builder, num_iterations);
return composer.create_prover(builder);
}
/**
* @brief Performs proof constuction for benchmarks based on a provided circuit function
*
Expand All @@ -219,29 +242,18 @@ void construct_proof_with_specified_num_iterations(State& state,
size_t)) noexcept
{
barretenberg::srs::init_crs_factory("../srs_db/ignition");

Composer composer;

auto num_iterations = static_cast<size_t>(state.range(0));
for (auto _ : state) {
// Constuct circuit and prover; don't include this part in measurement
state.PauseTiming();
auto builder = typename Composer::CircuitBuilder();
test_circuit_function(builder, num_iterations);

auto composer = Composer();
if constexpr (proof_system::IsAnyOf<Composer, proof_system::honk::UltraComposer>) {
auto instance = composer.create_instance(builder);
auto ext_prover = composer.create_prover(instance);
state.ResumeTiming();

// Construct proof
auto proof = ext_prover.construct_proof();

} else {
auto ext_prover = composer.create_prover(builder);
state.ResumeTiming();
auto prover = get_prover(composer, test_circuit_function, num_iterations);
state.ResumeTiming();

// Construct proof
auto proof = ext_prover.construct_proof();
}
// Construct proof
auto proof = prover.construct_proof();
}
}

Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#include <benchmark/benchmark.h>

#include "barretenberg/benchmark/honk_bench/benchmark_utilities.hpp"
#include "barretenberg/honk/composer/ultra_composer.hpp"
#include "barretenberg/honk/proof_system/ultra_prover.hpp"
#include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp"

using namespace benchmark;
using namespace proof_system;

enum { PREAMBLE, WIRE_COMMITMENTS, SORTED_LIST_ACCUMULATOR, GRAND_PRODUCT_COMPUTATION, RELATION_CHECK, ZEROMORPH };
ludamad marked this conversation as resolved.
Show resolved Hide resolved

BBERG_INSTRUMENT BBERG_NOINLINE static void test_pass_inner(State& state,
honk::UltraProver& prover,
size_t index) noexcept
{

auto time_if_index = [&](size_t target_index, auto&& func) -> void {
if (index == target_index) {
state.ResumeTiming();
func();
state.PauseTiming();
} else {
func();
}
};
for (auto _ : state) {
state.PauseTiming();
time_if_index(PREAMBLE, [&] { prover.execute_preamble_round(); });
time_if_index(WIRE_COMMITMENTS, [&] { prover.execute_wire_commitments_round(); });
time_if_index(SORTED_LIST_ACCUMULATOR, [&] { prover.execute_sorted_list_accumulator_round(); });
time_if_index(GRAND_PRODUCT_COMPUTATION, [&] { prover.execute_grand_product_computation_round(); });
time_if_index(RELATION_CHECK, [&] { prover.execute_relation_check_rounds(); });
time_if_index(ZEROMORPH, [&] { prover.execute_zeromorph_rounds(); });
state.ResumeTiming();
}
}
BBERG_INSTRUMENT BBERG_NOINLINE static void test_pass(State& state, size_t index) noexcept
{
barretenberg::srs::init_crs_factory("../srs_db/ignition");

honk::UltraComposer composer;
honk::UltraProver prover =
bench_utils::get_prover(composer, &bench_utils::generate_keccak_test_circuit<UltraCircuitBuilder>, 1);
ludamad marked this conversation as resolved.
Show resolved Hide resolved
test_pass_inner(state, prover, index);
}
#define PASS_BENCHMARK(pass) \
static void PASS_##pass(State& state) noexcept \
{ \
test_pass(state, pass); \
} \
BENCHMARK(PASS_##pass)->Unit(::benchmark::kMillisecond)

// Fast passes take a long time to benchmark because of how we compute statistical significance.
// Limit to one iteration so we don't spend a lot of time redoing full proofs just to measure this part.
PASS_BENCHMARK(PREAMBLE)->Iterations(1);
PASS_BENCHMARK(WIRE_COMMITMENTS)->Iterations(1);
PASS_BENCHMARK(SORTED_LIST_ACCUMULATOR)->Iterations(1);
PASS_BENCHMARK(GRAND_PRODUCT_COMPUTATION)->Iterations(1);
PASS_BENCHMARK(RELATION_CHECK);
PASS_BENCHMARK(ZEROMORPH);
2 changes: 1 addition & 1 deletion barretenberg/cpp/src/barretenberg/common/thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,4 @@ void parallel_for(size_t num_iterations, const std::function<void(size_t)>& func
// parallel_for_queued(num_iterations, func);
#endif
#endif
}
}
23 changes: 22 additions & 1 deletion barretenberg/cpp/src/barretenberg/common/thread.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <barretenberg/env/hardware_concurrency.hpp>
#include <barretenberg/numeric/bitop/get_msb.hpp>
#include <functional>
#include <iostream>
#include <thread>
#include <vector>

Expand All @@ -21,4 +22,24 @@ inline size_t get_num_cpus_pow2()
return static_cast<size_t>(1ULL << numeric::get_msb(get_num_cpus()));
}

void parallel_for(size_t num_iterations, const std::function<void(size_t)>& func);
void parallel_for(size_t num_iterations, const std::function<void(size_t)>& func);

/**
* A modified parallel_for optimized for work being done in batches.
* This is more appropriate for work with small granularity, to avoid thread caching issues and overhead.
*/
inline void parallel_for_batched(size_t num_iterations, auto&& func)
ludamad marked this conversation as resolved.
Show resolved Hide resolved
{
size_t num_threads = get_num_cpus_pow2();
size_t batch_size = (num_iterations + num_threads - 1) / num_threads; // round up division
// We will use parallel_for to dispatch the batches
parallel_for(num_threads, [&](size_t thread_idx) {
// Calculate start and end for this batch
size_t start = thread_idx * batch_size;
size_t end = std::min(start + batch_size, num_iterations);

for (size_t i = start; i < end; ++i) {
func(i);
}
});
}
Loading