diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index 67b3ad028e0d..002a19d456c1 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -274,16 +274,6 @@ "MULTITHREADING": "ON" } }, - { - "name": "wasm-bench", - "displayName": "WASM benchmarking.", - "description": "WASM benchmarking.", - "inherits": "wasm-threads", - "binaryDir": "build-wasm-bench", - "environment": { - "CXXFLAGS": "-DWASMTIME_ENV_HACK" - } - }, { "name": "xray", "displayName": "Build with multi-threaded XRay Profiling", @@ -444,12 +434,6 @@ "jobs": 0, "targets": ["barretenberg.wasm"] }, - { - "name": "wasm-bench", - "configurePreset": "wasm-bench", - "inheritConfigureEnvironment": true, - "jobs": 0 - }, { "name": "xray", "configurePreset": "xray", diff --git a/barretenberg/cpp/scripts/_benchmark_remote_lock.sh b/barretenberg/cpp/scripts/_benchmark_remote_lock.sh index 551a26b90706..8ed0787051a0 100644 --- a/barretenberg/cpp/scripts/_benchmark_remote_lock.sh +++ b/barretenberg/cpp/scripts/_benchmark_remote_lock.sh @@ -23,5 +23,6 @@ echo "Benchmarking lock created at ~/BENCHMARK_IN_PROGRESS." # Trap to ensure cleanup runs on ANY exit, including from a signal trap cleanup EXIT +trap cleanup INT # handle ctrl-c # don't exit, the caller script will run \ No newline at end of file diff --git a/barretenberg/cpp/scripts/benchmark_remote.sh b/barretenberg/cpp/scripts/benchmark_remote.sh index 05a61392584b..b892d505ffba 100755 --- a/barretenberg/cpp/scripts/benchmark_remote.sh +++ b/barretenberg/cpp/scripts/benchmark_remote.sh @@ -11,6 +11,7 @@ BENCHMARK=${1:-goblin_bench} COMMAND=${2:-./$BENCHMARK} PRESET=${3:-clang16} BUILD_DIR=${4:-build} +HARDWARE_CONCURRENCY=${HARDWARE_CONCURRENCY:-16} # Move above script dir. cd $(dirname $0)/.. @@ -24,4 +25,4 @@ source scripts/_benchmark_remote_lock.sh cd $BUILD_DIR scp $BB_SSH_KEY ./bin/$BENCHMARK $BB_SSH_INSTANCE:$BB_SSH_CPP_PATH/build ssh $BB_SSH_KEY $BB_SSH_INSTANCE \ - "cd $BB_SSH_CPP_PATH/build ; $COMMAND" + "cd $BB_SSH_CPP_PATH/build ; HARDWARE_CONCURRENCY=$HARDWARE_CONCURRENCY $COMMAND" diff --git a/barretenberg/cpp/scripts/benchmark_wasm.sh b/barretenberg/cpp/scripts/benchmark_wasm.sh index a7565485bdf4..91f5d25b2c4e 100755 --- a/barretenberg/cpp/scripts/benchmark_wasm.sh +++ b/barretenberg/cpp/scripts/benchmark_wasm.sh @@ -3,15 +3,16 @@ set -eu BENCHMARK=${1:-goblin_bench} COMMAND=${2:-./bin/$BENCHMARK} +HARDWARE_CONCURRENCY=${HARDWARE_CONCURRENCY:-16} # Move above script dir. cd $(dirname $0)/.. # Configure and build. -cmake --preset wasm-bench -cmake --build --preset wasm-bench --target $BENCHMARK +cmake --preset wasm-threads +cmake --build --preset wasm-threads --target $BENCHMARK -cd build-wasm-bench +cd build-wasm-threads # Consistency with _wasm.sh targets / shorter $COMMAND. cp ./bin/$BENCHMARK . -wasmtime run -Wthreads=y -Sthreads=y $COMMAND \ No newline at end of file +wasmtime run --env HARDWARE_CONCURRENCY=$HARDWARE_CONCURRENCY -Wthreads=y -Sthreads=y --dir=.. $COMMAND \ No newline at end of file diff --git a/barretenberg/cpp/scripts/benchmark_wasm_remote.sh b/barretenberg/cpp/scripts/benchmark_wasm_remote.sh index f131cc63a6f9..62213556579a 100755 --- a/barretenberg/cpp/scripts/benchmark_wasm_remote.sh +++ b/barretenberg/cpp/scripts/benchmark_wasm_remote.sh @@ -9,17 +9,22 @@ set -eu BENCHMARK=${1:-goblin_bench} COMMAND=${2:-./$BENCHMARK} +HARDWARE_CONCURRENCY=${HARDWARE_CONCURRENCY:-16} # Move above script dir. cd $(dirname $0)/.. # Configure and build. -cmake --preset wasm-bench -cmake --build --preset wasm-bench --target $BENCHMARK +cmake --preset wasm-threads +cmake --build --preset wasm-threads --target $BENCHMARK source scripts/_benchmark_remote_lock.sh -cd build-wasm-bench -scp $BB_SSH_KEY ./bin/$BENCHMARK $BB_SSH_INSTANCE:$BB_SSH_CPP_PATH/build-wasm-bench +cd build-wasm-threads +# ensure folder structure +ssh $BB_SSH_KEY $BB_SSH_INSTANCE "mkdir -p $BB_SSH_CPP_PATH/build-wasm-threads" +# copy build wasm threads +scp $BB_SSH_KEY ./bin/$BENCHMARK $BB_SSH_INSTANCE:$BB_SSH_CPP_PATH/build-wasm-threads +# run wasm benchmarking ssh $BB_SSH_KEY $BB_SSH_INSTANCE \ - "cd $BB_SSH_CPP_PATH/build-wasm-bench ; /home/ubuntu/.wasmtime/bin/wasmtime run -Wthreads=y -Sthreads=y $COMMAND" + "cd $BB_SSH_CPP_PATH/build-wasm-threads ; /home/ubuntu/.wasmtime/bin/wasmtime run --env HARDWARE_CONCURRENCY=$HARDWARE_CONCURRENCY -Wthreads=y -Sthreads=y --dir=.. $COMMAND" diff --git a/barretenberg/cpp/src/barretenberg/commitment_schemes/ipa/ipa.hpp b/barretenberg/cpp/src/barretenberg/commitment_schemes/ipa/ipa.hpp index a0c7f7350961..f89b42ca60ac 100644 --- a/barretenberg/cpp/src/barretenberg/commitment_schemes/ipa/ipa.hpp +++ b/barretenberg/cpp/src/barretenberg/commitment_schemes/ipa/ipa.hpp @@ -235,6 +235,9 @@ template class IPA { // Compute G_zero // First construct s_vec std::vector s_vec(poly_degree); + // TODO(https://github.com/AztecProtocol/barretenberg/issues/857): This code is not efficient as its O(nlogn). + // This can be optimized to be linear by computing a tree of products. Its very readable, so we're + // leaving it unoptimized for now. run_loop_in_parallel_if_effective( poly_degree, [&s_vec, &round_challenges_inv, log_poly_degree](size_t start, size_t end) { diff --git a/barretenberg/cpp/src/barretenberg/env/hardware_concurrency.cpp b/barretenberg/cpp/src/barretenberg/env/hardware_concurrency.cpp index 7814a02e406f..9217e014eb94 100644 --- a/barretenberg/cpp/src/barretenberg/env/hardware_concurrency.cpp +++ b/barretenberg/cpp/src/barretenberg/env/hardware_concurrency.cpp @@ -1,6 +1,7 @@ #include "hardware_concurrency.hpp" #include #include +#include #include #include #include @@ -12,14 +13,9 @@ uint32_t env_hardware_concurrency() #ifndef __wasm__ try { #endif -#ifdef WASMTIME_ENV_HACK - // TODO(https://github.com/AztecProtocol/barretenberg/issues/837): Undo this hack, rely on WASI. - return 16; -#else - static auto val = std::getenv("HARDWARE_CONCURRENCY"); - static const uint32_t cores = val ? (uint32_t)std::stoul(val) : std::thread::hardware_concurrency(); - return cores; -#endif + static auto val = std::getenv("HARDWARE_CONCURRENCY"); + static const uint32_t cores = val ? (uint32_t)std::stoul(val) : std::thread::hardware_concurrency(); + return cores; #ifndef __wasm__ } catch (std::exception const&) { throw std::runtime_error("HARDWARE_CONCURRENCY invalid."); diff --git a/barretenberg/cpp/src/barretenberg/polynomials/pow.bench.cpp b/barretenberg/cpp/src/barretenberg/polynomials/pow.bench.cpp new file mode 100644 index 000000000000..083ff70db5ef --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/polynomials/pow.bench.cpp @@ -0,0 +1,27 @@ +#include "barretenberg/polynomials/pow.hpp" +#include "barretenberg/ecc/curves/bn254/fr.hpp" +#include + +using namespace benchmark; +using namespace bb; + +namespace { + +void compute_pow_poly(benchmark::State& state) +{ + // just set up huge vector + std::vector betas{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 }; + + for (auto _ : state) { + int64_t num_betas = state.range(0); + std::vector cur_betas(betas.begin(), betas.begin() + num_betas); + PowPolynomial pow{ cur_betas }; + pow.compute_values(); + } +} + +BENCHMARK(compute_pow_poly)->Unit(benchmark::kMillisecond)->Arg(20); + +} // namespace +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/polynomials/pow.hpp b/barretenberg/cpp/src/barretenberg/polynomials/pow.hpp index 9079724c98a1..7dcc2fbddeb0 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/pow.hpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/pow.hpp @@ -1,4 +1,6 @@ #pragma once +#include "barretenberg/common/compiler_hints.hpp" +#include "barretenberg/common/op_count.hpp" #include "barretenberg/common/thread.hpp" #include @@ -121,8 +123,9 @@ template struct PowPolynomial { * @brief Given \vec{β} = {β_0,...,β_{d-1}} compute pow_\vec{β}(i) for i=0,...,2^{d}-1 * */ - void compute_values() + BB_PROFILE void compute_values() { + BB_OP_COUNT_TIME(); size_t pow_size = 1 << betas.size(); pow_betas = std::vector(pow_size); @@ -136,6 +139,11 @@ template struct PowPolynomial { size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified num_threads = num_threads > 0 ? num_threads : 1; // ensure num threads is >= 1 size_t iterations_per_thread = pow_size / num_threads; // actual iterations per thread + + // TODO(https://github.com/AztecProtocol/barretenberg/issues/864): This computation is asymtotically slow as it + // does pow_size * log(pow_size) work. However, in practice, its super efficient because its trivially + // parallelizable and only takes 45ms for the whole 6 iter IVC benchmark. Its also very readable, so we're + // leaving it unoptimized for now. parallel_for(num_threads, [&](size_t thread_idx) { size_t start = thread_idx * iterations_per_thread; size_t end = (thread_idx + 1) * iterations_per_thread; diff --git a/barretenberg/cpp/src/barretenberg/srs/global_crs.cpp b/barretenberg/cpp/src/barretenberg/srs/global_crs.cpp index fcfbac3e30cb..0d09ee6c2e92 100644 --- a/barretenberg/cpp/src/barretenberg/srs/global_crs.cpp +++ b/barretenberg/cpp/src/barretenberg/srs/global_crs.cpp @@ -24,23 +24,7 @@ void init_crs_factory(std::string crs_path) if (crs_factory != nullptr) { return; } -#ifdef WASMTIME_ENV_HACK - static_cast(crs_path); - // We only need this codepath in wasmtime because the SRS cannot be loaded in our usual ways - // and we don't need a real CRS for our purposes. - // TODO(https://github.com/AztecProtocol/barretenberg/issues/837): make this a real SRS. - std::cout << "WASMTIME_ENV_HACK: started generating fake bn254 curve" << std::endl; - std::vector points; - // 2**19 points - points.reserve(1 << 19); - for (int i = 0; i < (1 << 19); i++) { - points.push_back(g1::affine_element::random_element()); - } - init_crs_factory(points, g2::affine_element{ fq::random_element(), fq::random_element() }); - std::cout << "WASMTIME_ENV_HACK: finished generating fake bn254 curve" << std::endl; -#else crs_factory = std::make_shared>(crs_path); -#endif } // Initializes the crs using the memory buffers @@ -54,23 +38,7 @@ void init_grumpkin_crs_factory(std::string crs_path) if (grumpkin_crs_factory != nullptr) { return; } -#ifdef WASMTIME_ENV_HACK - // We only need this codepath in wasmtime because the SRS cannot be loaded in our usual ways - // and we don't need a real CRS for our purposes. - // TODO(https://github.com/AztecProtocol/barretenberg/issues/837): make this a real SRS. - static_cast(crs_path); - std::cout << "WASMTIME_ENV_HACK: started generating fake grumpkin curve" << std::endl; - std::vector points; - // 2**18 points - points.reserve(1 << 18); - for (int i = 0; i < (1 << 18); i++) { - points.push_back(curve::Grumpkin::AffineElement::random_element()); - } - std::cout << "WASMTIME_ENV_HACK: finished generating fake grumpkin curve" << std::endl; - init_grumpkin_crs_factory(points); -#else grumpkin_crs_factory = std::make_shared>(crs_path); -#endif } std::shared_ptr> get_crs_factory()