Skip to content

Commit

Permalink
Merge branch 'master' into ad/test-transform-circleci
Browse files Browse the repository at this point in the history
  • Loading branch information
ludamad authored Feb 23, 2024
2 parents 0556f38 + faa9586 commit f0edc4f
Show file tree
Hide file tree
Showing 10 changed files with 61 additions and 67 deletions.
16 changes: 0 additions & 16 deletions barretenberg/cpp/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -274,16 +274,6 @@
"MULTITHREADING": "ON"
}
},
{
"name": "wasm-bench",
"displayName": "WASM benchmarking.",
"description": "WASM benchmarking.",
"inherits": "wasm-threads",
"binaryDir": "build-wasm-bench",
"environment": {
"CXXFLAGS": "-DWASMTIME_ENV_HACK"
}
},
{
"name": "xray",
"displayName": "Build with multi-threaded XRay Profiling",
Expand Down Expand Up @@ -444,12 +434,6 @@
"jobs": 0,
"targets": ["barretenberg.wasm"]
},
{
"name": "wasm-bench",
"configurePreset": "wasm-bench",
"inheritConfigureEnvironment": true,
"jobs": 0
},
{
"name": "xray",
"configurePreset": "xray",
Expand Down
1 change: 1 addition & 0 deletions barretenberg/cpp/scripts/_benchmark_remote_lock.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@ echo "Benchmarking lock created at ~/BENCHMARK_IN_PROGRESS."

# Trap to ensure cleanup runs on ANY exit, including from a signal
trap cleanup EXIT
trap cleanup INT # handle ctrl-c

# don't exit, the caller script will run
3 changes: 2 additions & 1 deletion barretenberg/cpp/scripts/benchmark_remote.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ BENCHMARK=${1:-goblin_bench}
COMMAND=${2:-./$BENCHMARK}
PRESET=${3:-clang16}
BUILD_DIR=${4:-build}
HARDWARE_CONCURRENCY=${HARDWARE_CONCURRENCY:-16}

# Move above script dir.
cd $(dirname $0)/..
Expand All @@ -24,4 +25,4 @@ source scripts/_benchmark_remote_lock.sh
cd $BUILD_DIR
scp $BB_SSH_KEY ./bin/$BENCHMARK $BB_SSH_INSTANCE:$BB_SSH_CPP_PATH/build
ssh $BB_SSH_KEY $BB_SSH_INSTANCE \
"cd $BB_SSH_CPP_PATH/build ; $COMMAND"
"cd $BB_SSH_CPP_PATH/build ; HARDWARE_CONCURRENCY=$HARDWARE_CONCURRENCY $COMMAND"
9 changes: 5 additions & 4 deletions barretenberg/cpp/scripts/benchmark_wasm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@ set -eu

BENCHMARK=${1:-goblin_bench}
COMMAND=${2:-./bin/$BENCHMARK}
HARDWARE_CONCURRENCY=${HARDWARE_CONCURRENCY:-16}

# Move above script dir.
cd $(dirname $0)/..

# Configure and build.
cmake --preset wasm-bench
cmake --build --preset wasm-bench --target $BENCHMARK
cmake --preset wasm-threads
cmake --build --preset wasm-threads --target $BENCHMARK

cd build-wasm-bench
cd build-wasm-threads
# Consistency with _wasm.sh targets / shorter $COMMAND.
cp ./bin/$BENCHMARK .
wasmtime run -Wthreads=y -Sthreads=y $COMMAND
wasmtime run --env HARDWARE_CONCURRENCY=$HARDWARE_CONCURRENCY -Wthreads=y -Sthreads=y --dir=.. $COMMAND
15 changes: 10 additions & 5 deletions barretenberg/cpp/scripts/benchmark_wasm_remote.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,22 @@ set -eu

BENCHMARK=${1:-goblin_bench}
COMMAND=${2:-./$BENCHMARK}
HARDWARE_CONCURRENCY=${HARDWARE_CONCURRENCY:-16}

# Move above script dir.
cd $(dirname $0)/..

# Configure and build.
cmake --preset wasm-bench
cmake --build --preset wasm-bench --target $BENCHMARK
cmake --preset wasm-threads
cmake --build --preset wasm-threads --target $BENCHMARK

source scripts/_benchmark_remote_lock.sh

cd build-wasm-bench
scp $BB_SSH_KEY ./bin/$BENCHMARK $BB_SSH_INSTANCE:$BB_SSH_CPP_PATH/build-wasm-bench
cd build-wasm-threads
# ensure folder structure
ssh $BB_SSH_KEY $BB_SSH_INSTANCE "mkdir -p $BB_SSH_CPP_PATH/build-wasm-threads"
# copy build wasm threads
scp $BB_SSH_KEY ./bin/$BENCHMARK $BB_SSH_INSTANCE:$BB_SSH_CPP_PATH/build-wasm-threads
# run wasm benchmarking
ssh $BB_SSH_KEY $BB_SSH_INSTANCE \
"cd $BB_SSH_CPP_PATH/build-wasm-bench ; /home/ubuntu/.wasmtime/bin/wasmtime run -Wthreads=y -Sthreads=y $COMMAND"
"cd $BB_SSH_CPP_PATH/build-wasm-threads ; /home/ubuntu/.wasmtime/bin/wasmtime run --env HARDWARE_CONCURRENCY=$HARDWARE_CONCURRENCY -Wthreads=y -Sthreads=y --dir=.. $COMMAND"
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,9 @@ template <typename Curve> class IPA {
// Compute G_zero
// First construct s_vec
std::vector<Fr> s_vec(poly_degree);
// TODO(https://github.com/AztecProtocol/barretenberg/issues/857): This code is not efficient as its O(nlogn).
// This can be optimized to be linear by computing a tree of products. Its very readable, so we're
// leaving it unoptimized for now.
run_loop_in_parallel_if_effective(
poly_degree,
[&s_vec, &round_challenges_inv, log_poly_degree](size_t start, size_t end) {
Expand Down
12 changes: 4 additions & 8 deletions barretenberg/cpp/src/barretenberg/env/hardware_concurrency.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "hardware_concurrency.hpp"
#include <barretenberg/common/throw_or_abort.hpp>
#include <cstdlib>
#include <iostream>
#include <stdexcept>
#include <string>
#include <thread>
Expand All @@ -12,14 +13,9 @@ uint32_t env_hardware_concurrency()
#ifndef __wasm__
try {
#endif
#ifdef WASMTIME_ENV_HACK
// TODO(https://github.com/AztecProtocol/barretenberg/issues/837): Undo this hack, rely on WASI.
return 16;
#else
static auto val = std::getenv("HARDWARE_CONCURRENCY");
static const uint32_t cores = val ? (uint32_t)std::stoul(val) : std::thread::hardware_concurrency();
return cores;
#endif
static auto val = std::getenv("HARDWARE_CONCURRENCY");
static const uint32_t cores = val ? (uint32_t)std::stoul(val) : std::thread::hardware_concurrency();
return cores;
#ifndef __wasm__
} catch (std::exception const&) {
throw std::runtime_error("HARDWARE_CONCURRENCY invalid.");
Expand Down
27 changes: 27 additions & 0 deletions barretenberg/cpp/src/barretenberg/polynomials/pow.bench.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#include "barretenberg/polynomials/pow.hpp"
#include "barretenberg/ecc/curves/bn254/fr.hpp"
#include <benchmark/benchmark.h>

using namespace benchmark;
using namespace bb;

namespace {

void compute_pow_poly(benchmark::State& state)
{
// just set up huge vector
std::vector<bb::fr> betas{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 };

for (auto _ : state) {
int64_t num_betas = state.range(0);
std::vector<bb::fr> cur_betas(betas.begin(), betas.begin() + num_betas);
PowPolynomial pow{ cur_betas };
pow.compute_values();
}
}

BENCHMARK(compute_pow_poly)->Unit(benchmark::kMillisecond)->Arg(20);

} // namespace
BENCHMARK_MAIN();
10 changes: 9 additions & 1 deletion barretenberg/cpp/src/barretenberg/polynomials/pow.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#pragma once
#include "barretenberg/common/compiler_hints.hpp"
#include "barretenberg/common/op_count.hpp"
#include "barretenberg/common/thread.hpp"

#include <cstddef>
Expand Down Expand Up @@ -121,8 +123,9 @@ template <typename FF> struct PowPolynomial {
* @brief Given \vec{β} = {β_0,...,β_{d-1}} compute pow_\vec{β}(i) for i=0,...,2^{d}-1
*
*/
void compute_values()
BB_PROFILE void compute_values()
{
BB_OP_COUNT_TIME();
size_t pow_size = 1 << betas.size();
pow_betas = std::vector<FF>(pow_size);

Expand All @@ -136,6 +139,11 @@ template <typename FF> struct PowPolynomial {
size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified
num_threads = num_threads > 0 ? num_threads : 1; // ensure num threads is >= 1
size_t iterations_per_thread = pow_size / num_threads; // actual iterations per thread

// TODO(https://github.com/AztecProtocol/barretenberg/issues/864): This computation is asymtotically slow as it
// does pow_size * log(pow_size) work. However, in practice, its super efficient because its trivially
// parallelizable and only takes 45ms for the whole 6 iter IVC benchmark. Its also very readable, so we're
// leaving it unoptimized for now.
parallel_for(num_threads, [&](size_t thread_idx) {
size_t start = thread_idx * iterations_per_thread;
size_t end = (thread_idx + 1) * iterations_per_thread;
Expand Down
32 changes: 0 additions & 32 deletions barretenberg/cpp/src/barretenberg/srs/global_crs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,7 @@ void init_crs_factory(std::string crs_path)
if (crs_factory != nullptr) {
return;
}
#ifdef WASMTIME_ENV_HACK
static_cast<void>(crs_path);
// We only need this codepath in wasmtime because the SRS cannot be loaded in our usual ways
// and we don't need a real CRS for our purposes.
// TODO(https://github.com/AztecProtocol/barretenberg/issues/837): make this a real SRS.
std::cout << "WASMTIME_ENV_HACK: started generating fake bn254 curve" << std::endl;
std::vector<g1::affine_element> points;
// 2**19 points
points.reserve(1 << 19);
for (int i = 0; i < (1 << 19); i++) {
points.push_back(g1::affine_element::random_element());
}
init_crs_factory(points, g2::affine_element{ fq::random_element(), fq::random_element() });
std::cout << "WASMTIME_ENV_HACK: finished generating fake bn254 curve" << std::endl;
#else
crs_factory = std::make_shared<factories::FileCrsFactory<curve::BN254>>(crs_path);
#endif
}

// Initializes the crs using the memory buffers
Expand All @@ -54,23 +38,7 @@ void init_grumpkin_crs_factory(std::string crs_path)
if (grumpkin_crs_factory != nullptr) {
return;
}
#ifdef WASMTIME_ENV_HACK
// We only need this codepath in wasmtime because the SRS cannot be loaded in our usual ways
// and we don't need a real CRS for our purposes.
// TODO(https://github.com/AztecProtocol/barretenberg/issues/837): make this a real SRS.
static_cast<void>(crs_path);
std::cout << "WASMTIME_ENV_HACK: started generating fake grumpkin curve" << std::endl;
std::vector<curve::Grumpkin::AffineElement> points;
// 2**18 points
points.reserve(1 << 18);
for (int i = 0; i < (1 << 18); i++) {
points.push_back(curve::Grumpkin::AffineElement::random_element());
}
std::cout << "WASMTIME_ENV_HACK: finished generating fake grumpkin curve" << std::endl;
init_grumpkin_crs_factory(points);
#else
grumpkin_crs_factory = std::make_shared<factories::FileCrsFactory<curve::Grumpkin>>(crs_path);
#endif
}

std::shared_ptr<factories::CrsFactory<curve::BN254>> get_crs_factory()
Expand Down

0 comments on commit f0edc4f

Please sign in to comment.