Skip to content

Commit

Permalink
feat: Changing finite field arithmetic in wasm to 29 bits for multipl…
Browse files Browse the repository at this point in the history
…ications (#5435)

Now when we compile for wasm we use 9 29-bit limbs to perform all
multiplications. In wasmtime this results in -10% for multiplication
benchmark and -35% in squaring. This makes a -7% impact on wasm
client_ivc.
Other changes:
1. Most of the changes are just additions to tests to handle the new
montgomery form, because we have a lot of hardcoded internal
representations. If we get a new one, we should rewrite those to use
non-montgomery form. Also we need to recompute all parameters.
2. Added squarings to univariate and used in some relations
3. Added a script to run wasmer instead of wasmtime on the benchmark
(but you need to change arguments slightly to add "--")
4. For some reason wasm build has become really slow (especially on
relations and protogalaxy).
5. Added docs

![image](https://github.com/AztecProtocol/aztec-packages/assets/4798775/d5135995-c318-406d-a65a-587bdd3baae1)
  • Loading branch information
Rumata888 authored Apr 16, 2024
1 parent f849575 commit b2d9b9d
Show file tree
Hide file tree
Showing 37 changed files with 1,813 additions and 419 deletions.
2 changes: 1 addition & 1 deletion barretenberg/cpp/scripts/benchmark_wasm_remote.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ cd $(dirname $0)/..

# Configure and build.
cmake --preset wasm-threads
cmake --build --preset wasm-threads --target $BENCHMARK
cmake --build --preset wasm-threads --parallel --target $BENCHMARK

source scripts/_benchmark_remote_lock.sh

Expand Down
30 changes: 30 additions & 0 deletions barretenberg/cpp/scripts/benchmark_wasm_remote_wasmer.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bash
# This script automates the process of benchmarking WASM on a remote EC2 instance.
# Prerequisites:
# 1. Define the following environment variables:
# - BB_SSH_KEY: SSH key for EC2 instance, e.g., '-i key.pem'
# - BB_SSH_INSTANCE: EC2 instance URL
# - BB_SSH_CPP_PATH: Path to barretenberg/cpp in a cloned repository on the EC2 instance
set -eu

BENCHMARK=${1:-goblin_bench}
COMMAND=${2:-./$BENCHMARK}
HARDWARE_CONCURRENCY=${HARDWARE_CONCURRENCY:-16}

# Move above script dir.
cd $(dirname $0)/..

# Configure and build.
cmake --preset wasm-threads
cmake --build --preset wasm-threads --parallel --target $BENCHMARK

source scripts/_benchmark_remote_lock.sh

cd build-wasm-threads
# ensure folder structure
ssh $BB_SSH_KEY $BB_SSH_INSTANCE "mkdir -p $BB_SSH_CPP_PATH/build-wasm-threads"
# copy build wasm threads
scp $BB_SSH_KEY ./bin/$BENCHMARK $BB_SSH_INSTANCE:$BB_SSH_CPP_PATH/build-wasm-threads
# run wasm benchmarking
ssh $BB_SSH_KEY $BB_SSH_INSTANCE \
"cd $BB_SSH_CPP_PATH/build-wasm-threads ; /home/ubuntu/.wasmer/bin/wasmer run --dir=$BB_SSH_CPP_PATH --enable-threads --env HARDWARE_CONCURRENCY=$HARDWARE_CONCURRENCY $COMMAND"
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,65 @@ void sequential_copy(State& state)
}
}
}

/**
* @brief Evaluate how much uint256_t multiplication costs (in cache)
*
* @param state
*/
void uint_multiplication(State& state)
{
numeric::RNG& engine = numeric::get_debug_randomness();
std::vector<uint256_t> copy_vector(2);
for (size_t j = 0; j < 2; j++) {
copy_vector.emplace_back(engine.get_random_uint256());
copy_vector.emplace_back(engine.get_random_uint256());
copy_vector[0] += (1 - copy_vector[0].get_bit(0));
copy_vector[1] += (1 - copy_vector[1].get_bit(0));
}

for (auto _ : state) {
state.PauseTiming();
size_t num_cycles = 1 << static_cast<size_t>(state.range(0));
state.ResumeTiming();
for (size_t i = 0; i < num_cycles; i++) {
copy_vector[i & 1] *= copy_vector[1 - (i & 1)];
}
}
}

/**
* @brief Evaluate how much uint256_t extended multiplication costs (in cache)
*
* @param state
*/
void uint_extended_multiplication(State& state)
{
numeric::RNG& engine = numeric::get_debug_randomness();
std::vector<uint256_t> copy_vector(2);
for (size_t j = 0; j < 2; j++) {
copy_vector.emplace_back(engine.get_random_uint256());
copy_vector.emplace_back(engine.get_random_uint256());
copy_vector[0] += (1 - copy_vector[0].get_bit(0));
copy_vector[1] += (1 - copy_vector[1].get_bit(0));
}

for (auto _ : state) {
state.PauseTiming();
size_t num_cycles = 1 << static_cast<size_t>(state.range(0));
state.ResumeTiming();
for (size_t i = 0; i < num_cycles; i++) {
auto [r0, r1] = copy_vector[i & 1].mul_extended(copy_vector[1 - (i & 1)]);
state.PauseTiming();
copy_vector[i & 1] += r0;
copy_vector[1 - (i & 1)] += r1;
copy_vector[0] += (1 - copy_vector[0].get_bit(0));
copy_vector[1] += (1 - copy_vector[1].get_bit(0));
state.ResumeTiming();
}
}
}

} // namespace

BENCHMARK(parallel_for_field_element_addition)->Unit(kMicrosecond)->DenseRange(0, MAX_REPETITION_LOG);
Expand All @@ -380,4 +439,6 @@ BENCHMARK(projective_point_doubling)->Unit(kMicrosecond)->DenseRange(12, 22);
BENCHMARK(scalar_multiplication)->Unit(kMicrosecond)->DenseRange(12, 18);
BENCHMARK(cycle_waste)->Unit(kMicrosecond)->DenseRange(20, 30);
BENCHMARK(sequential_copy)->Unit(kMicrosecond)->DenseRange(20, 25);
BENCHMARK(uint_multiplication)->Unit(kMicrosecond)->DenseRange(12, 27);
BENCHMARK(uint_extended_multiplication)->Unit(kMicrosecond)->DenseRange(12, 27);
BENCHMARK_MAIN();
43 changes: 43 additions & 0 deletions barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,36 @@ class Bn254FqParams {
static constexpr uint64_t cube_root_2 = 0xaa303344d4741444UL;
static constexpr uint64_t cube_root_3 = 0x2c3b3f0d26594943UL;

static constexpr uint64_t modulus_wasm_0 = 0x187cfd47;
static constexpr uint64_t modulus_wasm_1 = 0x10460b6;
static constexpr uint64_t modulus_wasm_2 = 0x1c72a34f;
static constexpr uint64_t modulus_wasm_3 = 0x2d522d0;
static constexpr uint64_t modulus_wasm_4 = 0x1585d978;
static constexpr uint64_t modulus_wasm_5 = 0x2db40c0;
static constexpr uint64_t modulus_wasm_6 = 0xa6e141;
static constexpr uint64_t modulus_wasm_7 = 0xe5c2634;
static constexpr uint64_t modulus_wasm_8 = 0x30644e;

static constexpr uint64_t r_squared_wasm_0 = 0xe1a2a074659bac10UL;
static constexpr uint64_t r_squared_wasm_1 = 0x639855865406005aUL;
static constexpr uint64_t r_squared_wasm_2 = 0xff54c5802d3e2632UL;
static constexpr uint64_t r_squared_wasm_3 = 0x2a11a68c34ea65a6UL;

static constexpr uint64_t cube_root_wasm_0 = 0x62b1a3a46a337995UL;
static constexpr uint64_t cube_root_wasm_1 = 0xadc97d2722e2726eUL;
static constexpr uint64_t cube_root_wasm_2 = 0x64ee82ede2db85faUL;
static constexpr uint64_t cube_root_wasm_3 = 0x0c0afea1488a03bbUL;

static constexpr uint64_t primitive_root_0 = 0UL;
static constexpr uint64_t primitive_root_1 = 0UL;
static constexpr uint64_t primitive_root_2 = 0UL;
static constexpr uint64_t primitive_root_3 = 0UL;

static constexpr uint64_t primitive_root_wasm_0 = 0x0000000000000000UL;
static constexpr uint64_t primitive_root_wasm_1 = 0x0000000000000000UL;
static constexpr uint64_t primitive_root_wasm_2 = 0x0000000000000000UL;
static constexpr uint64_t primitive_root_wasm_3 = 0x0000000000000000UL;

static constexpr uint64_t endo_g1_lo = 0x7a7bd9d4391eb18d;
static constexpr uint64_t endo_g1_mid = 0x4ccef014a773d2cfUL;
static constexpr uint64_t endo_g1_hi = 0x0000000000000002UL;
Expand Down Expand Up @@ -57,6 +82,24 @@ class Bn254FqParams {
0x2a1f6744ce179d8eULL, 0x3829df06681f7cbdULL, 0x463456c802275bedULL, 0x543ece899c2f3b1cULL,
0x180a96573d3d9f8ULL, 0xf8b21270ddbb927ULL, 0x1d9598e8a7e39857ULL, 0x2ba010aa41eb7786ULL,
};

static constexpr uint64_t coset_generators_wasm_0[8] = { 0xeb8a8ec140766463ULL, 0xfded87957d76333dULL,
0x4c710c8092f2ff5eULL, 0x9af4916ba86fcb7fULL,
0xe9781656bdec97a0ULL, 0xfbdb0f2afaec667aULL,
0x4a5e94161069329bULL, 0x98e2190125e5febcULL };
static constexpr uint64_t coset_generators_wasm_1[8] = { 0xf2b1f20626a3da49ULL, 0x56c12d76cb13587fULL,
0x5251d378d7f4a143ULL, 0x4de2797ae4d5ea06ULL,
0x49731f7cf1b732c9ULL, 0xad825aed9626b0ffULL,
0xa91300efa307f9c3ULL, 0xa4a3a6f1afe94286ULL };
static constexpr uint64_t coset_generators_wasm_2[8] = { 0xf905ef8d84d5fea4ULL, 0x93b7a45b84f1507eULL,
0xe6b99ee0068dfab5ULL, 0x39bb9964882aa4ecULL,
0x8cbd93e909c74f23ULL, 0x276f48b709e2a0fcULL,
0x7a71433b8b7f4b33ULL, 0xcd733dc00d1bf56aULL };
static constexpr uint64_t coset_generators_wasm_3[8] = { 0x2958a27c02b7cd5fULL, 0x06bc8a3277c371abULL,
0x1484c05bce00b620ULL, 0x224cf685243dfa96ULL,
0x30152cae7a7b3f0bULL, 0x0d791464ef86e357ULL,
0x1b414a8e45c427ccULL, 0x290980b79c016c41ULL };

// used in msgpack schema serialization
static constexpr char schema_name[] = "fq";
static constexpr bool has_high_2adicity = false;
Expand Down
26 changes: 17 additions & 9 deletions barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,16 @@ TEST(fq, RandomElement)
TEST(fq, MulCheckAgainstConstants)
{
// test against some randomly generated test data
constexpr fq a{ 0x2523b6fa3956f038, 0x158aa08ecdd9ec1d, 0xf48216a4c74738d4, 0x2514cc93d6f0a1bf };
constexpr fq a_copy{ 0x2523b6fa3956f038, 0x158aa08ecdd9ec1d, 0xf48216a4c74738d4, 0x2514cc93d6f0a1bf };
constexpr fq b{ 0xb68aee5e4c8fc17c, 0xc5193de7f401d5e8, 0xb8777d4dde671db3, 0xe513e75c087b0bb };
constexpr fq b_copy = { 0xb68aee5e4c8fc17c, 0xc5193de7f401d5e8, 0xb8777d4dde671db3, 0xe513e75c087b0bb };
constexpr fq const_expected{ 0x7ed4174114b521c4, 0x58f5bd1d4279fdc2, 0x6a73ac09ee843d41, 0x687a76ae9b3425c };
constexpr fq a = uint256_t{ 0xa9b879029c49e60eUL, 0x2517b72250caa7b3UL, 0x6b86c81105dae2d1UL, 0x3a81735d5aec0c3UL };
constexpr fq a_copy =
uint256_t{ 0xa9b879029c49e60eUL, 0x2517b72250caa7b3UL, 0x6b86c81105dae2d1UL, 0x3a81735d5aec0c3UL };
constexpr fq b = uint256_t{ 0x744fc10aec23e56aUL, 0x5dea4788a3b936a6UL, 0xa0a89f4a8af01df1UL, 0x72ae28836807df3UL };
constexpr fq b_copy =
uint256_t{ 0x744fc10aec23e56aUL, 0x5dea4788a3b936a6UL, 0xa0a89f4a8af01df1UL, 0x72ae28836807df3UL };

constexpr fq const_expected =
uint256_t{ 0x6c0a789c0028fd09UL, 0xca9520d84c684efaUL, 0xcbf3f7b023a852b4UL, 0x1b2e4dac41400621UL };
constexpr fq const_result = a * b;

static_assert(const_result == const_expected);
static_assert(a == a_copy);
static_assert(b == b_copy);
Expand All @@ -111,7 +114,10 @@ TEST(fq, MulShortIntegers)
{
constexpr fq a{ 0xa, 0, 0, 0 };
constexpr fq b{ 0xb, 0, 0, 0 };
constexpr fq const_expected = { 0x65991a6dc2f3a183, 0xe3ba1f83394a2d08, 0x8401df65a169db3f, 0x1727099643607bba };
constexpr uint256_t a_original(a);
constexpr uint256_t b_original(b);
constexpr uint256_t prod_expected = (uint512_t(a_original) * uint512_t(b_original) % uint512_t(fq::modulus)).lo;
constexpr fq const_expected = prod_expected;
constexpr fq const_result = a * b;
static_assert(const_result == const_expected);

Expand Down Expand Up @@ -141,8 +147,10 @@ TEST(fq, MulSqrConsistency)

TEST(fq, SqrCheckAgainstConstants)
{
constexpr fq a{ 0x329596aa978981e8, 0x8542e6e254c2a5d0, 0xc5b687d82eadb178, 0x2d242aaf48f56b8a };
constexpr fq expected{ 0xbf4fb34e120b8b12, 0xf64d70efbf848328, 0xefbb6a533f2e7d89, 0x1de50f941425e4aa };
constexpr fq a = uint256_t{ 0xa9b879029c49e60eUL, 0x2517b72250caa7b3UL, 0x6b86c81105dae2d1UL, 0x3a81735d5aec0c3UL };

constexpr fq expected =
uint256_t{ 0x41081a42fdaa7e23UL, 0x44d1140f756ed419UL, 0x53716b0a6f253e63UL, 0xb1a0b04044d75fUL };
constexpr fq result = a.sqr();
static_assert(result == expected);

Expand Down
18 changes: 18 additions & 0 deletions barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq12.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

namespace bb {
struct Bn254Fq12Params {

#if defined(__SIZEOF_INT128__) && !defined(__wasm__)
static constexpr fq2 frobenius_coefficients_1{
{ 0xaf9ba69633144907UL, 0xca6b1d7387afb78aUL, 0x11bded5ef08a2087UL, 0x02f34d751a1f3a7cUL },
{ 0xa222ae234c492d72UL, 0xd00f02a4565de15bUL, 0xdc2ff3a253dfc926UL, 0x10a75716b3899551UL }
Expand All @@ -20,6 +22,22 @@ struct Bn254Fq12Params {
{ 0x365316184e46d97dUL, 0x0af7129ed4c96d9fUL, 0x659da72fca1009b5UL, 0x08116d8983a20d23UL },
{ 0xb1df4af7c39c1939UL, 0x3d9f02878a73bf7fUL, 0x9b2220928caf0ae0UL, 0x26684515eff054a6UL }
};
#else
static constexpr fq2 frobenius_coefficients_1{
{ 0xb75446af8a0c2399UL, 0xb5e243df8d8526c8UL, 0x7f6d66278fc2b89bUL, 0x2e05603062b5af58UL },
{ 0xaeefbf6e3bc6cc33UL, 0x7f50c04b4ed87762UL, 0x9a8b7572eb6a58d4UL, 0x9b83e6c410c870UL }
};

static constexpr fq2 frobenius_coefficients_2{
{ 0xd96ee8726e4983b2UL, 0xe9b7ed6a458f581eUL, 0x5361c2c89ea5d262UL, 0x24594fd198a79c6eUL },
{ 0UL, 0UL, 0UL, 0UL }
};

static constexpr fq2 frobenius_coefficients_3{
{ 0x9dc006978e6a3d3dUL, 0x695b3f038ef4bf24UL, 0x1a238968ba7a7ccdUL, 0x103828f20e49839cUL },
{ 0x5cbbb0bd4f4e6b31UL, 0xe83ce8be1b5b282bUL, 0x646d437ef03fbae3UL, 0x133cf9860031f0c0UL }
};
#endif
};

using fq12 = field12<fq2, fq6, Bn254Fq12Params>;
Expand Down
Loading

0 comments on commit b2d9b9d

Please sign in to comment.