Skip to content

Commit

Permalink
feat(avm): range check opt via aliases (#11846)
Browse files Browse the repository at this point in the history
Tests are passing but please check.
  • Loading branch information
fcarreiro authored Feb 10, 2025
1 parent 4526059 commit ce6a5bf
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 93 deletions.
73 changes: 40 additions & 33 deletions barretenberg/cpp/pil/vm2/range_check.pil
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@ namespace range_check(256);
#[skippable_if]
sel = 0;


// Witnesses
// Witnesses
// Value to range check
pol commit value;
// Number of bits to check against (this number must be <=128)
pol commit rng_chk_bits;

// Bit Size Columns
// It is enforced (further down) that the selected column is the lowest multiple of 16 that is greater than rng_chk_bits
// It is enforced (further down) that the selected column is the lowest multiple of 16 that is greater than rng_chk_bits
// e.g., rng_chk_bits = 10 ===> is_lte_u16, rng_chk_bits = 100 ==> is_lte_u112
// If rng_chk_bits is a multiple of 16, a prover is able to choose either is_lte_xx or is_lte_xx(+16), since the dynamic register will prove 0
// This isn't a concern and only costs the prover additional compute.
Expand Down Expand Up @@ -52,19 +51,30 @@ namespace range_check(256);
pol commit u16_r7;

// In each of these relations, the u16_r7 register contains the most significant 16 bits of value.
pol X_0 = is_lte_u16 * u16_r7;
pol X_1 = is_lte_u32 * (u16_r0 + u16_r7 * 2**16);
pol X_2 = is_lte_u48 * (u16_r0 + u16_r1 * 2**16 + u16_r7 * 2**32);
pol X_3 = is_lte_u64 * (u16_r0 + u16_r1 * 2**16 + u16_r2 * 2**32 + u16_r7 * 2**48);
pol X_4 = is_lte_u80 * (u16_r0 + u16_r1 * 2**16 + u16_r2 * 2**32 + u16_r3 * 2**48 + u16_r7 * 2**64);
pol X_5 = is_lte_u96 * (u16_r0 + u16_r1 * 2**16 + u16_r2 * 2**32 + u16_r3 * 2**48 + u16_r4 * 2**64 + u16_r7 * 2**80);
pol X_6 = is_lte_u112 * (u16_r0 + u16_r1 * 2**16 + u16_r2 * 2**32 + u16_r3 * 2**48 + u16_r4 * 2**64 + u16_r5 * 2**80 + u16_r7 * 2**96);
pol X_7 = is_lte_u128 * (u16_r0 + u16_r1 * 2**16 + u16_r2 * 2**32 + u16_r3 * 2**48 + u16_r4 * 2**64 + u16_r5 * 2**80 + u16_r6 * 2**96 + u16_r7 * 2**112);
pol PX_0 = 0;
pol R7_0 = u16_r7;
pol PX_1 = u16_r0;
pol R7_1 = R7_0 * 2**16;
pol PX_2 = PX_1 + u16_r1 * 2**16;
pol R7_2 = R7_1 * 2**16;
pol PX_3 = PX_2 + u16_r2 * 2**32;
pol R7_3 = R7_2 * 2**16;
pol PX_4 = PX_3 + u16_r3 * 2**48;
pol R7_4 = R7_3 * 2**16;
pol PX_5 = PX_4 + u16_r4 * 2**64;
pol R7_5 = R7_4 * 2**16;
pol PX_6 = PX_5 + u16_r5 * 2**80;
pol R7_6 = R7_5 * 2**16;
pol PX_7 = PX_6 + u16_r6 * 2**96;
pol R7_7 = R7_6 * 2**16;
// NOTE: when doing a smaller range check (like is_lte_u48 which only uses u16_r0, u16_r1 and u16_r7),
// the values of inactive registers (u16_r2...6) are unconstrained

// Since the is_lte_x are mutually exclusive, only one of the Xs will be non-zero
pol RESULT = X_0 + X_1 + X_2 + X_3 + X_4 + X_5 + X_6 + X_7;
// Since the is_lte_x are mutually exclusive, only one of the terms will be non-zero
pol RESULT = is_lte_u16 * (PX_0 + R7_0) + is_lte_u32 * (PX_1 + R7_1) +
is_lte_u48 * (PX_2 + R7_2) + is_lte_u64 * (PX_3 + R7_3) +
is_lte_u80 * (PX_4 + R7_4) + is_lte_u96 * (PX_5 + R7_5) +
is_lte_u112 * (PX_6 + R7_6) + is_lte_u128 * (PX_7 + R7_7);

// Enforce that value can be derived from whichever slice registers are activated by an is_lte flag
#[CHECK_RECOMPOSITION]
Expand All @@ -87,7 +97,7 @@ namespace range_check(256);
// [CALCULATION STEPS]
// 1) Calculate dyn_rng_chk_bits from the table above
// 2) Calculate dyn_rng_chk_pow_2 = 2^dyn_rng_chk_bits
// 3) Calculate dyn_diff = dyn_rng_chk_pow_2 - u16_r7 - 1
// 3) Calculate dyn_diff = dyn_rng_chk_pow_2 - u16_r7 - 1

// [ASSERTIONS]
// 1) Assert 0 <= dyn_rng_chk_bits <= 16 (i.e. dyn_rng_chk_bits supports up to a 16-bit number)
Expand Down Expand Up @@ -123,7 +133,6 @@ namespace range_check(256);
// 2) u16_r0 = 3, while all other registers including u16_r7 (the dynamic one) are set to zero - passing #[CHECK_RECOMPOSITION]
// 3) dyn_rng_chk_bits = 100 - 112 = -12, as per the table above - this fails #[LOOKUP_RNG_CHK_POW_2]


// The number of bits that need to be dynamically range checked.
pol commit dyn_rng_chk_bits;
// Valid values for dyn_rng_chk_bits are in the range [0, 16]
Expand All @@ -137,7 +146,6 @@ namespace range_check(256);
sel {dyn_rng_chk_bits, dyn_rng_chk_pow_2} in precomputed.sel_range_8 {precomputed.clk, precomputed.power_of_2};
// NOTE: `sel_range_8` is chosen because it gives us rows [0, 256] which will give us all of the powers we need (plus many we don't need)


// Now we need to perform the dynamic range check itself
// We check that u16_r7 < dyn_rng_chk_pow_2 ==> dyn_rng_chk_pow_2 - u16_r7 - 1 >= 0
pol commit dyn_diff;
Expand All @@ -147,8 +155,7 @@ namespace range_check(256);
#[LOOKUP_RNG_CHK_DIFF]
sel { dyn_diff } in precomputed.sel_range_16 { precomputed.clk };


// Lookup relations.
// Lookup relations.
// We only need these relations while we do not support pol in the LHS selector
pol commit sel_r0_16_bit_rng_lookup;
pol commit sel_r1_16_bit_rng_lookup;
Expand All @@ -160,34 +167,34 @@ namespace range_check(256);

// The lookups are cumulative - i.e. every value greater than 16 bits involve sel_r0_16_bit_rng_lookup
// Note that the lookup for the u16_r7 is always active (dynamic range check)
sel_r0_16_bit_rng_lookup - (is_lte_u32 + is_lte_u48 + is_lte_u64 + is_lte_u80 + is_lte_u96 + is_lte_u112 + is_lte_u128) = 0;
sel_r1_16_bit_rng_lookup - (is_lte_u48 + is_lte_u64 + is_lte_u80 + is_lte_u96 + is_lte_u112 + is_lte_u128) = 0;
sel_r2_16_bit_rng_lookup - (is_lte_u64 + is_lte_u80 + is_lte_u96 + is_lte_u112 + is_lte_u128) = 0;
sel_r3_16_bit_rng_lookup - (is_lte_u80 + is_lte_u96 + is_lte_u112 + is_lte_u128) = 0;
sel_r4_16_bit_rng_lookup - (is_lte_u96 + is_lte_u112 + is_lte_u128) = 0;
sel_r5_16_bit_rng_lookup - (is_lte_u112 + is_lte_u128) = 0;
sel_r6_16_bit_rng_lookup - is_lte_u128 = 0;
pol CUM_LTE_128 = is_lte_u128;
pol CUM_LTE_112 = is_lte_u112 + CUM_LTE_128;
pol CUM_LTE_96 = is_lte_u96 + CUM_LTE_112;
pol CUM_LTE_80 = is_lte_u80 + CUM_LTE_96;
pol CUM_LTE_64 = is_lte_u64 + CUM_LTE_80;
pol CUM_LTE_48 = is_lte_u48 + CUM_LTE_64;
pol CUM_LTE_32 = is_lte_u32 + CUM_LTE_48;
sel_r0_16_bit_rng_lookup - CUM_LTE_32 = 0;
sel_r1_16_bit_rng_lookup - CUM_LTE_48 = 0;
sel_r2_16_bit_rng_lookup - CUM_LTE_64 = 0;
sel_r3_16_bit_rng_lookup - CUM_LTE_80 = 0;
sel_r4_16_bit_rng_lookup - CUM_LTE_96 = 0;
sel_r5_16_bit_rng_lookup - CUM_LTE_112 = 0;
sel_r6_16_bit_rng_lookup - CUM_LTE_128 = 0;

#[LOOKUP_RNG_CHK_IS_R0_16_BIT]
sel_r0_16_bit_rng_lookup { u16_r0 } in precomputed.sel_range_16 { precomputed.clk };

#[LOOKUP_RNG_CHK_IS_R1_16_BIT]
sel_r1_16_bit_rng_lookup { u16_r1 } in precomputed.sel_range_16 { precomputed.clk };

#[LOOKUP_RNG_CHK_IS_R2_16_BIT]
sel_r2_16_bit_rng_lookup { u16_r2 } in precomputed.sel_range_16 { precomputed.clk };

#[LOOKUP_RNG_CHK_IS_R3_16_BIT]
sel_r3_16_bit_rng_lookup { u16_r3 } in precomputed.sel_range_16 { precomputed.clk };

#[LOOKUP_RNG_CHK_IS_R4_16_BIT]
sel_r4_16_bit_rng_lookup { u16_r4 } in precomputed.sel_range_16 { precomputed.clk };

#[LOOKUP_RNG_CHK_IS_R5_16_BIT]
sel_r5_16_bit_rng_lookup { u16_r5 } in precomputed.sel_range_16 { precomputed.clk };

#[LOOKUP_RNG_CHK_IS_R6_16_BIT]
sel_r6_16_bit_rng_lookup { u16_r6 } in precomputed.sel_range_16 { precomputed.clk };

#[LOOKUP_RNG_CHK_IS_R7_16_BIT]
sel { u16_r7 } in precomputed.sel_range_16 { precomputed.clk };
sel { u16_r7 } in precomputed.sel_range_16 { precomputed.clk };
Original file line number Diff line number Diff line change
Expand Up @@ -27,44 +27,40 @@ template <typename FF_> class range_checkImpl {
[[maybe_unused]] const RelationParameters<FF>&,
[[maybe_unused]] const FF& scaling_factor)
{
const auto range_check_X_0 = new_term.range_check_is_lte_u16 * new_term.range_check_u16_r7;
const auto range_check_X_1 =
new_term.range_check_is_lte_u32 * (new_term.range_check_u16_r0 + new_term.range_check_u16_r7 * FF(65536));
const auto range_check_X_2 =
new_term.range_check_is_lte_u48 * (new_term.range_check_u16_r0 + new_term.range_check_u16_r1 * FF(65536) +
new_term.range_check_u16_r7 * FF(4294967296UL));
const auto range_check_X_3 =
new_term.range_check_is_lte_u64 *
(new_term.range_check_u16_r0 + new_term.range_check_u16_r1 * FF(65536) +
new_term.range_check_u16_r2 * FF(4294967296UL) + new_term.range_check_u16_r7 * FF(281474976710656UL));
const auto range_check_X_4 =
new_term.range_check_is_lte_u80 *
(new_term.range_check_u16_r0 + new_term.range_check_u16_r1 * FF(65536) +
new_term.range_check_u16_r2 * FF(4294967296UL) + new_term.range_check_u16_r3 * FF(281474976710656UL) +
new_term.range_check_u16_r7 * FF(uint256_t{ 0UL, 1UL, 0UL, 0UL }));
const auto range_check_X_5 =
new_term.range_check_is_lte_u96 *
(new_term.range_check_u16_r0 + new_term.range_check_u16_r1 * FF(65536) +
new_term.range_check_u16_r2 * FF(4294967296UL) + new_term.range_check_u16_r3 * FF(281474976710656UL) +
new_term.range_check_u16_r4 * FF(uint256_t{ 0UL, 1UL, 0UL, 0UL }) +
new_term.range_check_u16_r7 * FF(uint256_t{ 0UL, 65536UL, 0UL, 0UL }));
const auto range_check_X_6 =
new_term.range_check_is_lte_u112 *
(new_term.range_check_u16_r0 + new_term.range_check_u16_r1 * FF(65536) +
new_term.range_check_u16_r2 * FF(4294967296UL) + new_term.range_check_u16_r3 * FF(281474976710656UL) +
new_term.range_check_u16_r4 * FF(uint256_t{ 0UL, 1UL, 0UL, 0UL }) +
new_term.range_check_u16_r5 * FF(uint256_t{ 0UL, 65536UL, 0UL, 0UL }) +
new_term.range_check_u16_r7 * FF(uint256_t{ 0UL, 4294967296UL, 0UL, 0UL }));
const auto range_check_X_7 =
new_term.range_check_is_lte_u128 *
(new_term.range_check_u16_r0 + new_term.range_check_u16_r1 * FF(65536) +
new_term.range_check_u16_r2 * FF(4294967296UL) + new_term.range_check_u16_r3 * FF(281474976710656UL) +
new_term.range_check_u16_r4 * FF(uint256_t{ 0UL, 1UL, 0UL, 0UL }) +
new_term.range_check_u16_r5 * FF(uint256_t{ 0UL, 65536UL, 0UL, 0UL }) +
new_term.range_check_u16_r6 * FF(uint256_t{ 0UL, 4294967296UL, 0UL, 0UL }) +
new_term.range_check_u16_r7 * FF(uint256_t{ 0UL, 281474976710656UL, 0UL, 0UL }));
const auto range_check_RESULT = range_check_X_0 + range_check_X_1 + range_check_X_2 + range_check_X_3 +
range_check_X_4 + range_check_X_5 + range_check_X_6 + range_check_X_7;
const auto range_check_PX_0 = FF(0);
const auto range_check_R7_0 = new_term.range_check_u16_r7;
const auto range_check_PX_1 = new_term.range_check_u16_r0;
const auto range_check_R7_1 = range_check_R7_0 * FF(65536);
const auto range_check_PX_2 = range_check_PX_1 + new_term.range_check_u16_r1 * FF(65536);
const auto range_check_R7_2 = range_check_R7_1 * FF(65536);
const auto range_check_PX_3 = range_check_PX_2 + new_term.range_check_u16_r2 * FF(4294967296UL);
const auto range_check_R7_3 = range_check_R7_2 * FF(65536);
const auto range_check_PX_4 = range_check_PX_3 + new_term.range_check_u16_r3 * FF(281474976710656UL);
const auto range_check_R7_4 = range_check_R7_3 * FF(65536);
const auto range_check_PX_5 =
range_check_PX_4 + new_term.range_check_u16_r4 * FF(uint256_t{ 0UL, 1UL, 0UL, 0UL });
const auto range_check_R7_5 = range_check_R7_4 * FF(65536);
const auto range_check_PX_6 =
range_check_PX_5 + new_term.range_check_u16_r5 * FF(uint256_t{ 0UL, 65536UL, 0UL, 0UL });
const auto range_check_R7_6 = range_check_R7_5 * FF(65536);
const auto range_check_PX_7 =
range_check_PX_6 + new_term.range_check_u16_r6 * FF(uint256_t{ 0UL, 4294967296UL, 0UL, 0UL });
const auto range_check_R7_7 = range_check_R7_6 * FF(65536);
const auto range_check_RESULT = new_term.range_check_is_lte_u16 * (range_check_PX_0 + range_check_R7_0) +
new_term.range_check_is_lte_u32 * (range_check_PX_1 + range_check_R7_1) +
new_term.range_check_is_lte_u48 * (range_check_PX_2 + range_check_R7_2) +
new_term.range_check_is_lte_u64 * (range_check_PX_3 + range_check_R7_3) +
new_term.range_check_is_lte_u80 * (range_check_PX_4 + range_check_R7_4) +
new_term.range_check_is_lte_u96 * (range_check_PX_5 + range_check_R7_5) +
new_term.range_check_is_lte_u112 * (range_check_PX_6 + range_check_R7_6) +
new_term.range_check_is_lte_u128 * (range_check_PX_7 + range_check_R7_7);
const auto range_check_CUM_LTE_128 = new_term.range_check_is_lte_u128;
const auto range_check_CUM_LTE_112 = new_term.range_check_is_lte_u112 + range_check_CUM_LTE_128;
const auto range_check_CUM_LTE_96 = new_term.range_check_is_lte_u96 + range_check_CUM_LTE_112;
const auto range_check_CUM_LTE_80 = new_term.range_check_is_lte_u80 + range_check_CUM_LTE_96;
const auto range_check_CUM_LTE_64 = new_term.range_check_is_lte_u64 + range_check_CUM_LTE_80;
const auto range_check_CUM_LTE_48 = new_term.range_check_is_lte_u48 + range_check_CUM_LTE_64;
const auto range_check_CUM_LTE_32 = new_term.range_check_is_lte_u32 + range_check_CUM_LTE_48;

{
using Accumulator = typename std::tuple_element_t<0, ContainerOverSubrelations>;
Expand Down Expand Up @@ -159,58 +155,43 @@ template <typename FF_> class range_checkImpl {
}
{
using Accumulator = typename std::tuple_element_t<13, ContainerOverSubrelations>;
auto tmp =
(new_term.range_check_sel_r0_16_bit_rng_lookup -
(new_term.range_check_is_lte_u32 + new_term.range_check_is_lte_u48 + new_term.range_check_is_lte_u64 +
new_term.range_check_is_lte_u80 + new_term.range_check_is_lte_u96 + new_term.range_check_is_lte_u112 +
new_term.range_check_is_lte_u128));
auto tmp = (new_term.range_check_sel_r0_16_bit_rng_lookup - range_check_CUM_LTE_32);
tmp *= scaling_factor;
std::get<13>(evals) += typename Accumulator::View(tmp);
}
{
using Accumulator = typename std::tuple_element_t<14, ContainerOverSubrelations>;
auto tmp = (new_term.range_check_sel_r1_16_bit_rng_lookup -
(new_term.range_check_is_lte_u48 + new_term.range_check_is_lte_u64 +
new_term.range_check_is_lte_u80 + new_term.range_check_is_lte_u96 +
new_term.range_check_is_lte_u112 + new_term.range_check_is_lte_u128));
auto tmp = (new_term.range_check_sel_r1_16_bit_rng_lookup - range_check_CUM_LTE_48);
tmp *= scaling_factor;
std::get<14>(evals) += typename Accumulator::View(tmp);
}
{
using Accumulator = typename std::tuple_element_t<15, ContainerOverSubrelations>;
auto tmp =
(new_term.range_check_sel_r2_16_bit_rng_lookup -
(new_term.range_check_is_lte_u64 + new_term.range_check_is_lte_u80 + new_term.range_check_is_lte_u96 +
new_term.range_check_is_lte_u112 + new_term.range_check_is_lte_u128));
auto tmp = (new_term.range_check_sel_r2_16_bit_rng_lookup - range_check_CUM_LTE_64);
tmp *= scaling_factor;
std::get<15>(evals) += typename Accumulator::View(tmp);
}
{
using Accumulator = typename std::tuple_element_t<16, ContainerOverSubrelations>;
auto tmp = (new_term.range_check_sel_r3_16_bit_rng_lookup -
(new_term.range_check_is_lte_u80 + new_term.range_check_is_lte_u96 +
new_term.range_check_is_lte_u112 + new_term.range_check_is_lte_u128));
auto tmp = (new_term.range_check_sel_r3_16_bit_rng_lookup - range_check_CUM_LTE_80);
tmp *= scaling_factor;
std::get<16>(evals) += typename Accumulator::View(tmp);
}
{
using Accumulator = typename std::tuple_element_t<17, ContainerOverSubrelations>;
auto tmp = (new_term.range_check_sel_r4_16_bit_rng_lookup -
(new_term.range_check_is_lte_u96 + new_term.range_check_is_lte_u112 +
new_term.range_check_is_lte_u128));
auto tmp = (new_term.range_check_sel_r4_16_bit_rng_lookup - range_check_CUM_LTE_96);
tmp *= scaling_factor;
std::get<17>(evals) += typename Accumulator::View(tmp);
}
{
using Accumulator = typename std::tuple_element_t<18, ContainerOverSubrelations>;
auto tmp = (new_term.range_check_sel_r5_16_bit_rng_lookup -
(new_term.range_check_is_lte_u112 + new_term.range_check_is_lte_u128));
auto tmp = (new_term.range_check_sel_r5_16_bit_rng_lookup - range_check_CUM_LTE_112);
tmp *= scaling_factor;
std::get<18>(evals) += typename Accumulator::View(tmp);
}
{
using Accumulator = typename std::tuple_element_t<19, ContainerOverSubrelations>;
auto tmp = (new_term.range_check_sel_r6_16_bit_rng_lookup - new_term.range_check_is_lte_u128);
auto tmp = (new_term.range_check_sel_r6_16_bit_rng_lookup - range_check_CUM_LTE_128);
tmp *= scaling_factor;
std::get<19>(evals) += typename Accumulator::View(tmp);
}
Expand Down

1 comment on commit ce6a5bf

@AztecBot
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark 'C++ Benchmark'.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 1.05.

Benchmark suite Current: ce6a5bf Previous: 4a8136c Ratio
nativeClientIVCBench/Ambient_17_in_20/6 20161.87518100003 ms/iter 19191.684199999996 ms/iter 1.05
nativeconstruct_proof_ultrahonk_power_of_2/20 4458.2611580000275 ms/iter 4079.7445510000043 ms/iter 1.09
Goblin::merge(t) 149128120 ns/iter 133117939 ns/iter 1.12

This comment was automatically generated by workflow using github-action-benchmark.

CC: @ludamad @codygunton

Please sign in to comment.