From 8678ca57ac772e79f7fda26aeb2404a92bff3be9 Mon Sep 17 00:00:00 2001 From: Al-Kindi-0 <82364884+Al-Kindi-0@users.noreply.github.com> Date: Tue, 14 Jan 2025 18:18:23 +0100 Subject: [PATCH 1/6] feat: improve Falcon signature verification --- assembly/src/ast/instruction/advice.rs | 3 + assembly/src/parser/grammar.lalrpop | 2 + assembly/src/parser/token.rs | 4 + core/src/sys_events.rs | 20 ++ .../operations/sys_ops/sys_event_handlers.rs | 43 ++++ stdlib/asm/crypto/dsa/rpo_falcon512.masm | 218 ++++++++++++------ stdlib/tests/crypto/falcon.rs | 50 +++- 7 files changed, 254 insertions(+), 86 deletions(-) diff --git a/assembly/src/ast/instruction/advice.rs b/assembly/src/ast/instruction/advice.rs index f6bfe807d..8ddcef758 100644 --- a/assembly/src/ast/instruction/advice.rs +++ b/assembly/src/ast/instruction/advice.rs @@ -13,6 +13,7 @@ use vm_core::sys_events::SystemEvent; #[derive(Clone, PartialEq, Eq, Debug)] pub enum SystemEventNode { PushU64Div, + PushFalconDiv, PushExt2intt, PushSmtPeek, PushMapVal, @@ -30,6 +31,7 @@ impl From<&SystemEventNode> for SystemEvent { use SystemEventNode::*; match value { PushU64Div => Self::U64Div, + PushFalconDiv => Self::FalconDiv, PushExt2intt => Self::Ext2Intt, PushSmtPeek => Self::SmtPeek, PushMapVal => Self::MapValueToStack, @@ -56,6 +58,7 @@ impl fmt::Display for SystemEventNode { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::PushU64Div => write!(f, "push_u64div"), + Self::PushFalconDiv => write!(f, "push_falcon_div"), Self::PushExt2intt => write!(f, "push_ext2intt"), Self::PushSmtPeek => write!(f, "push_smtpeek"), Self::PushMapVal => write!(f, "push_mapval"), diff --git a/assembly/src/parser/grammar.lalrpop b/assembly/src/parser/grammar.lalrpop index 378ae1ec5..06af15b57 100644 --- a/assembly/src/parser/grammar.lalrpop +++ b/assembly/src/parser/grammar.lalrpop @@ -56,6 +56,7 @@ extern { "push_sig" => Token::PushSig, "push_smtpeek" => Token::PushSmtpeek, "push_u64div" => Token::PushU64Div, + "push_falcon_div" => Token::PushFalconDiv, "and" => Token::And, "assert" => Token::Assert, "assertz" => Token::Assertz, @@ -677,6 +678,7 @@ SystemEvent: Instruction = { "adv" "." "push_sig" "." => Instruction::SysEvent(SystemEventNode::PushSignature { kind }), "adv" "." "push_smtpeek" => Instruction::SysEvent(SystemEventNode::PushSmtPeek), "adv" "." "push_u64div" => Instruction::SysEvent(SystemEventNode::PushU64Div), + "adv" "." "push_falcon_div" => Instruction::SysEvent(SystemEventNode::PushFalconDiv), } #[inline] diff --git a/assembly/src/parser/token.rs b/assembly/src/parser/token.rs index 94c4f73fd..688a19f78 100644 --- a/assembly/src/parser/token.rs +++ b/assembly/src/parser/token.rs @@ -153,6 +153,7 @@ pub enum Token<'input> { PushSmtset, PushSmtget, PushU64Div, + PushFalconDiv, And, Assert, Assertz, @@ -338,6 +339,7 @@ impl fmt::Display for Token<'_> { Token::PushSmtset => write!(f, "push_smtset"), Token::PushSmtget => write!(f, "push_smtget"), Token::PushU64Div => write!(f, "push_u64div"), + Token::PushFalconDiv => write!(f, "push_falcon_div"), Token::And => write!(f, "and"), Token::Assert => write!(f, "assert"), Token::Assertz => write!(f, "assertz"), @@ -531,6 +533,7 @@ impl<'input> Token<'input> { | Token::PushSmtset | Token::PushSmtget | Token::PushU64Div + | Token::PushFalconDiv | Token::And | Token::Assert | Token::Assertz @@ -676,6 +679,7 @@ impl<'input> Token<'input> { ("push_smtset", Token::PushSmtset), ("push_smtget", Token::PushSmtget), ("push_u64div", Token::PushU64Div), + ("push_falcon_div", Token::PushFalconDiv), ("and", Token::And), ("assert", Token::Assert), ("assertz", Token::Assertz), diff --git a/core/src/sys_events.rs b/core/src/sys_events.rs index 0a4c8df2a..d55416358 100644 --- a/core/src/sys_events.rs +++ b/core/src/sys_events.rs @@ -28,6 +28,7 @@ mod constants { pub const EVENT_HDWORD_TO_MAP_WITH_DOMAIN: u32 = 2822590340; pub const EVENT_HPERM_TO_MAP: u32 = 3297060969; pub const EVENT_FALCON_SIG_TO_STACK: u32 = 3419226139; + pub const EVENT_FALCON_DIV: u32 = 3419226155; } /// Defines a set of actions which can be initiated from the VM to inject new data into the advice @@ -119,6 +120,22 @@ pub enum SystemEvent { /// the remainder respectively. U64Div, + /// Pushes the result of divison (both the quotient and the remainder) of a [u64] by the Falcon + /// prime (M = 12289) onto the advice stack. + /// + /// Inputs: + /// Operand stack: [a1, a0, ...] + /// Advice stack: [...] + /// + /// Outputs: + /// Operand stack: [a1, a0, ...] + /// Advice stack: [q0, q1, r, ...] + /// + /// Where (a0, a1) are the 32-bit limbs of the dividend (with a0 representing the 32 least + /// significant bits and a1 representing the 32 most significant bits). + /// Similarly, (q0, q1) represent the quotient and r the remainder. + FalconDiv, + /// Given an element in a quadratic extension field on the top of the stack (i.e., a0, b1), /// computes its multiplicative inverse and push the result onto the advice stack. /// @@ -310,6 +327,7 @@ impl SystemEvent { SystemEvent::MapValueToStack => EVENT_MAP_VALUE_TO_STACK, SystemEvent::MapValueToStackN => EVENT_MAP_VALUE_TO_STACK_N, SystemEvent::U64Div => EVENT_U64_DIV, + SystemEvent::FalconDiv => EVENT_FALCON_DIV, SystemEvent::Ext2Inv => EVENT_EXT2_INV, SystemEvent::Ext2Intt => EVENT_EXT2_INTT, SystemEvent::SmtPeek => EVENT_SMT_PEEK, @@ -335,6 +353,7 @@ impl SystemEvent { EVENT_MAP_VALUE_TO_STACK => Some(SystemEvent::MapValueToStack), EVENT_MAP_VALUE_TO_STACK_N => Some(SystemEvent::MapValueToStackN), EVENT_U64_DIV => Some(SystemEvent::U64Div), + EVENT_FALCON_DIV => Some(SystemEvent::FalconDiv), EVENT_EXT2_INV => Some(SystemEvent::Ext2Inv), EVENT_EXT2_INTT => Some(SystemEvent::Ext2Intt), EVENT_SMT_PEEK => Some(SystemEvent::SmtPeek), @@ -367,6 +386,7 @@ impl fmt::Display for SystemEvent { Self::MapValueToStack => write!(f, "map_value_to_stack"), Self::MapValueToStackN => write!(f, "map_value_to_stack_with_len"), Self::U64Div => write!(f, "div_u64"), + Self::FalconDiv => write!(f, "falcon_div"), Self::Ext2Inv => write!(f, "ext2_inv"), Self::Ext2Intt => write!(f, "ext2_intt"), Self::SmtPeek => write!(f, "smt_peek"), diff --git a/processor/src/operations/sys_ops/sys_event_handlers.rs b/processor/src/operations/sys_ops/sys_event_handlers.rs index f37612971..beaf30176 100644 --- a/processor/src/operations/sys_ops/sys_event_handlers.rs +++ b/processor/src/operations/sys_ops/sys_event_handlers.rs @@ -18,6 +18,9 @@ use crate::{ /// The offset of the domain value on the stack in the `hdword_to_map_with_domain` system event. const HDWORD_TO_MAP_WITH_DOMAIN_DOMAIN_OFFSET: usize = 8; +/// Falcon signature prime. +const M: u64 = 12289; + impl Process { pub(super) fn handle_system_event( &self, @@ -38,6 +41,7 @@ impl Process { copy_map_value_to_adv_stack(advice_provider, process_state, true) }, SystemEvent::U64Div => push_u64_div_result(advice_provider, process_state), + SystemEvent::FalconDiv => push_falcon_mod_result(advice_provider, process_state), SystemEvent::Ext2Inv => push_ext2_inv_result(advice_provider, process_state), SystemEvent::Ext2Intt => push_ext2_intt_result(advice_provider, process_state), SystemEvent::SmtPeek => push_smtpeek_result(advice_provider, process_state), @@ -339,6 +343,45 @@ pub fn push_u64_div_result( Ok(()) } +/// Pushes the result of divison (both the quotient and the remainder) of a [u64] by the Falcon +/// prime (M = 12289) onto the advice stack. +/// +/// Inputs: +/// Operand stack: [a1, a0, ...] +/// Advice stack: [...] +/// +/// Outputs: +/// Operand stack: [a1, a0, ...] +/// Advice stack: [q0, q1, r, ...] +/// +/// Where (a0, a1) are the 32-bit limbs of the dividend (with a0 representing the 32 least +/// significant bits and a1 representing the 32 most significant bits). +/// Similarly, (q0, q1) represent the quotient and r the remainder. +/// +/// # Errors +/// Returns an error if the divisor is ZERO. +pub fn push_falcon_mod_result( + advice_provider: &mut impl AdviceProvider, + process: ProcessState, +) -> Result<(), ExecutionError> { + let dividend_hi = process.get_stack_item(0).as_int(); + let dividend_lo = process.get_stack_item(1).as_int(); + let dividend = (dividend_hi << 32) + dividend_lo; + + let quotient = dividend / M; + let remainder = dividend - quotient * M; + + let (q_hi, q_lo) = u64_to_u32_elements(quotient); + let (r_hi, r_lo) = u64_to_u32_elements(remainder); + assert_eq!(r_hi, ZERO); + + advice_provider.push_stack(AdviceSource::Value(r_lo))?; + advice_provider.push_stack(AdviceSource::Value(q_lo))?; + advice_provider.push_stack(AdviceSource::Value(q_hi))?; + + Ok(()) +} + /// Given an element in a quadratic extension field on the top of the stack (i.e., a0, b1), /// computes its multiplicative inverse and push the result onto the advice stack. /// diff --git a/stdlib/asm/crypto/dsa/rpo_falcon512.masm b/stdlib/asm/crypto/dsa/rpo_falcon512.masm index e142df9ac..2493f6a54 100644 --- a/stdlib/asm/crypto/dsa/rpo_falcon512.masm +++ b/stdlib/asm/crypto/dsa/rpo_falcon512.masm @@ -1,64 +1,72 @@ use.std::crypto::hashes::rpo +use.std::math::u64 # CONSTANTS # ================================================================================================= const.J=77321994752 const.M=12289 +const.M_HALF=6144 # (M-1) / 2 const.SQUARE_NORM_BOUND=34034726 # MODULAR REDUCTION FALCON PRIME # ============================================================================================= -#! Given dividend ( i.e. field element a ) on stack top, this routine computes c = a % 12289 +#! Given dividend ( i.e. a u64 given by its lower and higher u32 decomposition ) on the stack, +#! this routine computes c = a % 12289 #! #! Expected stack state #! -#! [a, ...] +#! [a_hi, a_lo, ...] #! #! Output stack state looks like #! #! [c, ...] | c = a % 12289 +#! +#! Cycles: 31 export.mod_12289 - u32split - push.M.0 - - adv.push_u64div + adv.push_falcon_div + # the advice stack contains now [qhi, qlo, r, ...] where q = qhi * 2^32 + qlo is quotient + # and r is remainder adv_push.2 u32assert2 + # => [qlo, qhi, a_hi, a_lo, ...] - swap push.M u32overflowing_mul + # => [overflow, M * qlo % 2^32, qhi, a_hi, a_lo, ...] movup.2 push.M + # => [M, qhi, overflow, M * qlo % 2^32, a_hi, a_lo, ...] u32overflowing_madd + # => [t1, t0, M * qlo % 32, a_hi, a_lo, ...] where t = t1 * 2^32 + t0 and t = M * qhi + overflow + # Note by the bound on x - r = q * M, we are guaranteed that t1 = 0 drop + # => [M * q / 2^32, (M * q) % 2^32, a_hi, a_lo, ...] + # => [res_hi, res_lo, a_hi, a_lo, ...] - adv_push.2 - drop - u32assert + adv_push.1 + dup u32lt.M assert + # => [r, res_hi, res_lo, a_hi, a_lo, ...] dup - movup.3 u32overflowing_add + # => [flag, (res_lo + r) % 2^32, r, res_hi, a_hi, a_lo, ...] where u = uhi * 2^32 + ulo and u = (res_lo + r) / 2^32 movup.3 u32overflowing_add + # => [flag, final_res_hi, final_res_lo, r, a_hi, a_lo, ...] flag should be 0 by the bound on inputs drop - - movup.5 + # => [final_res_hi, final_res_lo, r, a_hi, a_lo, ...] + + movup.3 assert_eq - movup.4 + movup.2 assert_eq - - swap - drop - swap - drop + # => [r, ...] end # HASH-TO-POINT @@ -379,7 +387,7 @@ export.probabilistic_product.16 end -# SQUARE NORM OF Z_q[x]/(phi) POLYNOMIALS +# SQUARE NORM OF Z_M[x]/(phi) POLYNOMIALS # ============================================================================================= #! Normalizes an `e` in [0, q) to be in [-(q-1) << 1, (q-1) << 1) and returns its square norm. @@ -399,20 +407,19 @@ end #! Input: [e, ...] #! Output [norm(e)^2, ...] #! -#! Cycles: 21 +#! Cycles: 20 export.norm_sq dup dup mul - swap - #=> [e, e^2, ...] + #=> [e^2, e, ...] - dup - push.6144 - u32gt - #=> [phi, e, e^2, ...] + push.M_HALF + dup.2 + u32lt + #=> [phi, e^2, e, ...] - swap + movup.2 mul.24578 # 2*q push.151019521 # q^2 sub @@ -422,33 +429,61 @@ export.norm_sq #=> [norm(e)^2, ...] end -#! On input a tuple (u, w, v), the following computes (v - (u + (- w % q) % q) % q). +#! Given a tuple (u, w, v), we want to compute (v - (u + (- w % q) % q) % q), where: +#! +#! 1. v is a field element given by its u32 decomposition i.e., (c_lo, c_hi) such that +#! v = c_hi * 2**32 + c_lo +#! 2. w is a field element representing the (i+512)-th coefficient of the product polynomial +#! pi (i.e., h * s2). We are guaranteed that w is at most 512 * (q-1)^2. +#! 3. u is a field element representing the i-th coefficient of the product polynomial +#! pi (i.e., h * s2). We are guaranteed that u is at most 512 * (q-1)^2. +#! #! We can avoid doing three modular reductions by using the following facts: #! -#! 1. q is much smaller than the Miden prime. Precisely, q * 2^50 < Q -#! 2. The coefficients of the product polynomial, u and w, are less than J := 512 * q^2 -#! 3. The coefficients of c are less than q. +#! 1. q is much smaller than the Miden prime Q. Precisely, q * 2^50 < Q +#! 2. The coefficients of the product polynomial, u and w, are strictly less than J := 512 * q^2. +#! 3. The coefficients of c are at most q - 1. #! -#! This means that we can substitute (v - (u + (- w % q) % q) % q) with v + w + J - u without -#! risking Q-overflow since |v + w + J - u| < 1025 * q^2 +#! This means that we can substitute (v - (u + (- w % q) % q) % q) with v + w + J - u +#! (note J % q= 0) without risking Q-underflow but we can still overflow. +#! For this reason, we use the u32 decomposition of v and perform the addition of +#! v and w + J - u as u64. Note that |w + J - u| <= 1024 * (q - 1)^2 +#! and hence there is the possibility of an overflow when we add v and w + J - u as u64. +#! When there is an overflow, we add 10952, which is equal to 2^32 % q, to the upper u32 limb of +#! the result of (v + (w + J - u)). Note that since |w + J - u| <= 1024 * (q-1)^2 < 2^38, and +#! 10952 < q, we are guaranteed that this final u32 addition to the upper limb will not overflow. #! #! To get the final result we reduce (v + w + J - u) modulo q. #! -#! Input: [v, w, u, ...] +#! Input: [pi0, pi512 + J, c_hi, c_lo, ...] #! Output: [e, ...] #! -#! Cycles: 44 +#! Cycles: 45 export.diff_mod_q - # 1) v + w + J - add push.J add - #=> [v + w + J, u] + # 1) Subtract + sub + #=> [pi512 + J - pi, c_hi, c_lo, ...] - # 2) v + w + J - u - swap sub - #=> [v + w + J - u] + # 2) u32split first u64 + u32split + #=> [tmp_hi, tmp_lo, c_hi, c_lo, ...] + + # 3) Add the two u64-s + exec.u64::overflowing_add + #=> [flag, res_hi, res_lo, ..] + + # 4) Handle potential overflow in the u64 addition + push.10952 # 2^32 mod q + push.0 + #=> [0, 10952, flag, res_hi, res_lo, ..] + swap.2 + #=> [flag, 10952, 0, res_hi, res_lo, ..] + cdrop + add + #=> [res_hi, res_lo, ..] - # 3) Reduce modulo q + # 5) Reduce modulo q exec.mod_12289 #=> [e, ...] end @@ -484,65 +519,89 @@ end #! Input: [pi_ptr, ...] #! Output: [norm_sq(s1), ...] #! -#! Cycles: 59000 +#! Cycles: 40454 export.compute_s1_norm_sq repeat.128 # 1) Load the next 4 * 3 coefficients - # load c_i + # load the next four pi_i padw - dup.4 add.5124 + dup.4 mem_loadw - # load pi_{i+512} + # load the next four pi_{i+512} padw dup.8 add.512 mem_loadw - # load pi_4 + # load the next four c_i padw - dup.12 + dup.12 add.5124 mem_loadw - #=> [PI, PI_{i+512}, C, pi_ptr, ...] + #=> [C, PI_{i+512}, PI, pi_ptr, ...] # 2) Compute the squared norm of (i + 0)-th coefficient of s1 - movup.8 - exec.mod_12289 - movup.5 - #=> [v, w, u, ...] where u is the i-th coefficient of `pi`, v is the i-th - # coefficient of `c` and w is the (512 + i)-th coefficient of `pi` polynomial. + u32split + #=> [c0_hi, c0_lo, c1, c2, c3, PI_{i+512}, PI, pi_ptr, ...] + movup.5 + push.J add + #=> [pi512_0, c0_hi, c0_lo, c1, c2, c3, pi512_1, pi512_2, pi512_3, PI, pi_ptr, ...] + movup.9 + #=> [pi0, pi512_0, c_hi, c_lo, c1, c2, c3, pi512_1, pi512_2, pi512_3, pi1, pi2, pi3, pi_ptr, ...] exec.diff_mod_q - #=> [e, ...] - + #=> [e, c1, c2, c3, pi512_1, pi512_2, pi512_3, pi1, pi2, pi3, pi_ptr, ...] exec.norm_sq #=> [norm(e)^2, ...] # Move the result out of the way so that we can process the remaining coefficients movdn.10 + #=> [c1, c2, c3, pi512_1, pi512_2, pi512_3, pi1, pi2, pi3, pi_ptr, e0, ...] # 3) Compute the squared norm of (i + 1)-th coefficient of s1 - movup.6 - exec.mod_12289 + + u32split + #=> [c1_hi, c1_lo, c2, c3, pi512_1, pi512_2, pi512_3, pi1, pi2, pi3, pi_ptr, e0, ...] movup.4 + #=> [pi512_1, c1_hi, c1_lo, c2, c3, pi512_2, pi512_3, pi2, pi3, pi_ptr, e0, ...] + push.J add + movup.7 + #=> [pi1, pi512_1, c1_hi, c1_lo, c2, c3, pi512_2, pi512_3, pi2, pi3, pi_ptr, e0, ...] exec.diff_mod_q exec.norm_sq + #=> [e, c2, c3, pi512_2, pi512_3, pi2, pi3, pi_ptr, e0, ...] + movdn.7 + #=> [c2, c3, pi512_2, pi512_3, pi2, pi3, pi_ptr, e0, e1, ...] # 4) Compute the squared norm of (i + 2)-th coefficient of s1 - movup.4 - exec.mod_12289 + + u32split + #=> [c2_hi, c2_lo, c3, pi512_2, pi512_3, pi2, pi3, pi_ptr, e0, e1, ...] movup.3 + push.J add + #=> [pi512_2, c2_hi, c2_lo, c3, pi512_3, pi2, pi3, pi_ptr, e0, e1, ...] + movup.5 + #=> [pi2, pi512_2, c2_hi, c2_lo, c3, pi512_3, pi3, pi_ptr, e0, e1, ...] exec.diff_mod_q exec.norm_sq + movdn.4 + #=> [c3, pi512_3, pi3, pi_ptr, e, e, e, ...] # 5) Compute the squared norm of (i + 3)-th coefficient of s1 - movup.2 - exec.mod_12289 - movup.2 + + u32split + #=> [c3_hi, c3_lo, pi512_3, pi3, pi_ptr, e0, e1, e2, ...] + movup.2 push.J add + movup.3 + #=> [pi3, pi512_3, c3_hi, c3_lo, pi_ptr, e0, e1, e2, ...] exec.diff_mod_q + #=> [e3, pi_ptr, e0, e1, e2, ...] exec.norm_sq + #=> [e3, pi_ptr, e0, e1, e2, ...] + swap + #=> [pi_ptr, e3, e0, e1, e2, ...] # 6) Increment the pointer add.4 @@ -561,21 +620,28 @@ end #! Input: [s2_ptr, ...] #! Output: [norm_sq(s2), ...] #! -#! Cycles: ~13500 +#! Cycles: 11137 export.compute_s2_norm_sq repeat.128 padw - dup.4 add.4 swap.5 + dup.4 mem_loadw - repeat.4 - exec.norm_sq - movdn.4 - end - + exec.norm_sq + swap + exec.norm_sq + add + swap + exec.norm_sq + add + swap + exec.norm_sq + add + swap + add.4 end drop - repeat.511 + repeat.127 add end end @@ -592,7 +658,7 @@ end #! Input: [PK, MSG, ...] #! Output: [...] #! -#! Cycles: ~ 90400 +#! Cycles: ~ 70465 export.verify.6660 # 1) Generate a Falcon signature using the secret key associated to PK on message MSG. @@ -639,13 +705,13 @@ export.verify.6660 exec.probabilistic_product #=> [...] (Cycles: 2504) - # 6) Compute the squared norm of s1 := c - h * s2 (in Z_q[x]/(phi)) + # 6) Compute the squared norm of s1 := c - h * s2 (in Z_M[x]/(phi)) locaddr.1024 #=> [pi_ptr, ...] exec.compute_s1_norm_sq - #=> [norm_sq(s1), ...] (Cycles: 58888) + #=> [norm_sq(s1), ...] (Cycles: 40454) # 7) Compute the squared norm of s2 @@ -653,7 +719,7 @@ export.verify.6660 #=> [s2_ptr, norm_sq(s1), ...] exec.compute_s2_norm_sq - #=> [norm_sq(s2), norm_sq(s1), ...] (Cycles: 13322) + #=> [norm_sq(s2), norm_sq(s1), ...] (Cycles: 11137) # 8) Check that ||(s1, s2)||^2 < K diff --git a/stdlib/tests/crypto/falcon.rs b/stdlib/tests/crypto/falcon.rs index 233ad9a7e..88bdf6f0b 100644 --- a/stdlib/tests/crypto/falcon.rs +++ b/stdlib/tests/crypto/falcon.rs @@ -14,9 +14,11 @@ use test_utils::{ MerkleStore, Rpo256, }, expect_exec_error_matches, + proptest::proptest, rand::{rand_value, rand_vector}, FieldElement, QuadFelt, Word, WORD_SIZE, }; +use vm_core::StarkField; /// Modulus used for rpo falcon 512. const M: u64 = 12289; @@ -86,21 +88,49 @@ fn test_falcon512_diff_mod_q() { exec.rpo_falcon512::diff_mod_q end "; - - let u = rand::thread_rng().gen_range(0..J); - let v = rand::thread_rng().gen_range(Q..M); + let v = rand::thread_rng().gen_range(0..Felt::MODULUS); + let (v_lo, v_hi) = (v as u32, v >> 32); let w = rand::thread_rng().gen_range(0..J); + let u = rand::thread_rng().gen_range(0..J); - let test1 = build_test!(source, &[u, v, w]); + let test1 = build_test!(source, &[v_lo as u64, v_hi as u64, w + J, u]); // Calculating (v - (u + (- w % q) % q) % q) should be the same as (v + w + J - u) % q. - let expanded_answer = (v as i64 - - (u as i64 + -(w as i64).rem_euclid(M as i64)).rem_euclid(M as i64)) - .rem_euclid(M as i64); - let simplified_answer = (v + w + J - u).rem_euclid(M); - assert_eq!(expanded_answer, i64::try_from(simplified_answer).unwrap()); + let expanded_answer = (v as i128 + - ((u as i64 + -(w as i64).rem_euclid(M as i64)).rem_euclid(M as i64) as i128)) + .rem_euclid(M as i128); + let simplified_answer = (v as i128 + w as i128 + J as i128 - u as i128).rem_euclid(M as i128); + assert_eq!(expanded_answer, i128::try_from(simplified_answer).unwrap()); + + test1.expect_stack(&[simplified_answer as u64]); +} + +proptest! { + #[test] + fn diff_mod_q_proptest(v in 0..Felt::MODULUS, w in 0..J, u in 0..J) { + + let source = " + use.std::crypto::dsa::rpo_falcon512 + + begin + exec.rpo_falcon512::diff_mod_q + end + "; + + let (v_lo, v_hi) = (v as u32, v >> 32); + + let test1 = build_test!(source, &[v_lo as u64, v_hi as u64, w + J, u]); + + // Calculating (v - (u + (- w % q) % q) % q) should be the same as (v + w + J - u) % q. + let expanded_answer = (v as i128 + - ((u as i64 + -(w as i64).rem_euclid(M as i64)).rem_euclid(M as i64) as i128)) + .rem_euclid(M as i128); + let simplified_answer = (v as i128 + w as i128 + J as i128 - u as i128).rem_euclid(M as i128); + assert_eq!(expanded_answer, i128::try_from(simplified_answer).unwrap()); + + test1.prop_expect_stack(&[simplified_answer as u64])?; + } - test1.expect_stack(&[simplified_answer]); } #[test] From 49a2ae47eb762333094dfbdfd9b7360f4efd6875 Mon Sep 17 00:00:00 2001 From: Al-Kindi-0 <82364884+Al-Kindi-0@users.noreply.github.com> Date: Sun, 19 Jan 2025 21:58:39 +0100 Subject: [PATCH 2/6] chore: address feedback --- CHANGELOG.md | 1 + stdlib/asm/crypto/dsa/rpo_falcon512.masm | 10 +++++++++- stdlib/tests/crypto/falcon.rs | 23 ++++++++++++++++++++--- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 03a183f36..3c469988a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ - Optimized the computation of the DEEP queries in the recursive verifier (#1594). - Added validity checks for the inputs to the recursive verifier (#1596). - Allow multiple memory reads in the same clock cycle (#1626) +- Improved Falcon signiture verification (#1623). ## 0.11.0 (2024-11-04) diff --git a/stdlib/asm/crypto/dsa/rpo_falcon512.masm b/stdlib/asm/crypto/dsa/rpo_falcon512.masm index 2493f6a54..8bdfb3165 100644 --- a/stdlib/asm/crypto/dsa/rpo_falcon512.masm +++ b/stdlib/asm/crypto/dsa/rpo_falcon512.masm @@ -8,6 +8,7 @@ const.J=77321994752 const.M=12289 const.M_HALF=6144 # (M-1) / 2 const.SQUARE_NORM_BOUND=34034726 +const.MEMORY_POINTER_OFFSET_OF_HASH_TO_POINT_POLY_FROM_PRODUCT_POLY=1281 # MODULAR REDUCTION FALCON PRIME # ============================================================================================= @@ -535,7 +536,7 @@ export.compute_s1_norm_sq # load the next four c_i padw - dup.12 add.5124 + dup.12 add.MEMORY_POINTER_OFFSET_OF_HASH_TO_POINT_POLY_FROM_PRODUCT_POLY mem_loadw #=> [C, PI_{i+512}, PI, pi_ptr, ...] @@ -626,21 +627,28 @@ export.compute_s2_norm_sq padw dup.4 mem_loadw + # => [c3, c2, c1, c0, s2_ptr, ...] where ci are coefficients of s2 exec.norm_sq swap + # => [c2, norm_sq(c3), c1, c0, s2_ptr, ...] exec.norm_sq add swap + # => [c1, norm_sq(c2) + norm_sq(c3), c0, s2_ptr, ...] exec.norm_sq add swap + # => [c0, norm_sq(c1) + norm_sq(c2) + norm_sq(c3), s2_ptr, ...] exec.norm_sq add + # => [norm_sq(c0) + norm_sq(c1) + norm_sq(c2) + norm_sq(c3), s2_ptr, ...] swap add.4 + # => [s2_ptr + 1, norm_sq(c0) + norm_sq(c1) + norm_sq(c2) + norm_sq(c3), ...] end drop + # (512 / 4) - 1 = 127 additions are needed repeat.127 add end diff --git a/stdlib/tests/crypto/falcon.rs b/stdlib/tests/crypto/falcon.rs index 88bdf6f0b..0fbc46273 100644 --- a/stdlib/tests/crypto/falcon.rs +++ b/stdlib/tests/crypto/falcon.rs @@ -88,10 +88,12 @@ fn test_falcon512_diff_mod_q() { exec.rpo_falcon512::diff_mod_q end "; - let v = rand::thread_rng().gen_range(0..Felt::MODULUS); + let v = Felt::MODULUS - 1; let (v_lo, v_hi) = (v as u32, v >> 32); - let w = rand::thread_rng().gen_range(0..J); - let u = rand::thread_rng().gen_range(0..J); + + // test largest possible value given v + let w = J - 1; + let u = 0; let test1 = build_test!(source, &[v_lo as u64, v_hi as u64, w + J, u]); @@ -103,6 +105,21 @@ fn test_falcon512_diff_mod_q() { assert_eq!(expanded_answer, i128::try_from(simplified_answer).unwrap()); test1.expect_stack(&[simplified_answer as u64]); + + // test smallest possible value given v + let w = 0; + let u = J - 1; + + let test2 = build_test!(source, &[v_lo as u64, v_hi as u64, w + J, u]); + + // Calculating (v - (u + (- w % q) % q) % q) should be the same as (v + w + J - u) % q. + let expanded_answer = (v as i128 + - ((u as i64 + -(w as i64).rem_euclid(M as i64)).rem_euclid(M as i64) as i128)) + .rem_euclid(M as i128); + let simplified_answer = (v as i128 + w as i128 + J as i128 - u as i128).rem_euclid(M as i128); + assert_eq!(expanded_answer, i128::try_from(simplified_answer).unwrap()); + + test2.expect_stack(&[simplified_answer as u64]); } proptest! { From 3e31b3548f1c8c245cb0793b674cddacc2eadd37 Mon Sep 17 00:00:00 2001 From: Al-Kindi-0 <82364884+Al-Kindi-0@users.noreply.github.com> Date: Sun, 19 Jan 2025 22:20:17 +0100 Subject: [PATCH 3/6] fix: clippy --- stdlib/tests/crypto/falcon.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/stdlib/tests/crypto/falcon.rs b/stdlib/tests/crypto/falcon.rs index 0fbc46273..1996b1f8c 100644 --- a/stdlib/tests/crypto/falcon.rs +++ b/stdlib/tests/crypto/falcon.rs @@ -95,14 +95,14 @@ fn test_falcon512_diff_mod_q() { let w = J - 1; let u = 0; - let test1 = build_test!(source, &[v_lo as u64, v_hi as u64, w + J, u]); + let test1 = build_test!(source, &[v_lo as u64, v_hi, w + J, u]); // Calculating (v - (u + (- w % q) % q) % q) should be the same as (v + w + J - u) % q. let expanded_answer = (v as i128 - ((u as i64 + -(w as i64).rem_euclid(M as i64)).rem_euclid(M as i64) as i128)) .rem_euclid(M as i128); let simplified_answer = (v as i128 + w as i128 + J as i128 - u as i128).rem_euclid(M as i128); - assert_eq!(expanded_answer, i128::try_from(simplified_answer).unwrap()); + assert_eq!(expanded_answer, simplified_answer); test1.expect_stack(&[simplified_answer as u64]); @@ -110,14 +110,14 @@ fn test_falcon512_diff_mod_q() { let w = 0; let u = J - 1; - let test2 = build_test!(source, &[v_lo as u64, v_hi as u64, w + J, u]); + let test2 = build_test!(source, &[v_lo as u64, v_hi, w + J, u]); // Calculating (v - (u + (- w % q) % q) % q) should be the same as (v + w + J - u) % q. let expanded_answer = (v as i128 - ((u as i64 + -(w as i64).rem_euclid(M as i64)).rem_euclid(M as i64) as i128)) .rem_euclid(M as i128); let simplified_answer = (v as i128 + w as i128 + J as i128 - u as i128).rem_euclid(M as i128); - assert_eq!(expanded_answer, i128::try_from(simplified_answer).unwrap()); + assert_eq!(expanded_answer, simplified_answer); test2.expect_stack(&[simplified_answer as u64]); } @@ -136,14 +136,14 @@ proptest! { let (v_lo, v_hi) = (v as u32, v >> 32); - let test1 = build_test!(source, &[v_lo as u64, v_hi as u64, w + J, u]); + let test1 = build_test!(source, &[v_lo as u64, v_hi, w + J, u]); // Calculating (v - (u + (- w % q) % q) % q) should be the same as (v + w + J - u) % q. let expanded_answer = (v as i128 - ((u as i64 + -(w as i64).rem_euclid(M as i64)).rem_euclid(M as i64) as i128)) .rem_euclid(M as i128); let simplified_answer = (v as i128 + w as i128 + J as i128 - u as i128).rem_euclid(M as i128); - assert_eq!(expanded_answer, i128::try_from(simplified_answer).unwrap()); + assert_eq!(expanded_answer, simplified_answer); test1.prop_expect_stack(&[simplified_answer as u64])?; } From 1d8ff5e17f7feb1e88aafac79e04c1358440f52e Mon Sep 17 00:00:00 2001 From: Al-Kindi-0 <82364884+Al-Kindi-0@users.noreply.github.com> Date: Sun, 26 Jan 2025 13:32:54 +0100 Subject: [PATCH 4/6] chore: address feedback --- .../operations/sys_ops/sys_event_handlers.rs | 5 +- stdlib/asm/crypto/dsa/rpo_falcon512.masm | 80 ++++++++++--------- stdlib/tests/crypto/falcon.rs | 14 ++-- 3 files changed, 52 insertions(+), 47 deletions(-) diff --git a/processor/src/operations/sys_ops/sys_event_handlers.rs b/processor/src/operations/sys_ops/sys_event_handlers.rs index beaf30176..7c78a15da 100644 --- a/processor/src/operations/sys_ops/sys_event_handlers.rs +++ b/processor/src/operations/sys_ops/sys_event_handlers.rs @@ -352,7 +352,7 @@ pub fn push_u64_div_result( /// /// Outputs: /// Operand stack: [a1, a0, ...] -/// Advice stack: [q0, q1, r, ...] +/// Advice stack: [q1, q0, r, ...] /// /// Where (a0, a1) are the 32-bit limbs of the dividend (with a0 representing the 32 least /// significant bits and a1 representing the 32 most significant bits). @@ -368,8 +368,7 @@ pub fn push_falcon_mod_result( let dividend_lo = process.get_stack_item(1).as_int(); let dividend = (dividend_hi << 32) + dividend_lo; - let quotient = dividend / M; - let remainder = dividend - quotient * M; + let (quotient, remainder) = (dividend / M, dividend % M); let (q_hi, q_lo) = u64_to_u32_elements(quotient); let (r_hi, r_lo) = u64_to_u32_elements(remainder); diff --git a/stdlib/asm/crypto/dsa/rpo_falcon512.masm b/stdlib/asm/crypto/dsa/rpo_falcon512.masm index 8bdfb3165..fd2222430 100644 --- a/stdlib/asm/crypto/dsa/rpo_falcon512.masm +++ b/stdlib/asm/crypto/dsa/rpo_falcon512.masm @@ -7,6 +7,8 @@ use.std::math::u64 const.J=77321994752 const.M=12289 const.M_HALF=6144 # (M-1) / 2 +const.M_DOUBLE=24578 +const.M_SQUARE=151019521 const.SQUARE_NORM_BOUND=34034726 const.MEMORY_POINTER_OFFSET_OF_HASH_TO_POINT_POLY_FROM_PRODUCT_POLY=1281 @@ -14,7 +16,7 @@ const.MEMORY_POINTER_OFFSET_OF_HASH_TO_POINT_POLY_FROM_PRODUCT_POLY=1281 # ============================================================================================= #! Given dividend ( i.e. a u64 given by its lower and higher u32 decomposition ) on the stack, -#! this routine computes c = a % 12289 +#! this routine computes c = a % M where M = 12289 #! #! Expected stack state #! @@ -22,9 +24,9 @@ const.MEMORY_POINTER_OFFSET_OF_HASH_TO_POINT_POLY_FROM_PRODUCT_POLY=1281 #! #! Output stack state looks like #! -#! [c, ...] | c = a % 12289 +#! [c, ...] | c = a % M #! -#! Cycles: 31 +#! Cycles: 27 export.mod_12289 adv.push_falcon_div # the advice stack contains now [qhi, qlo, r, ...] where q = qhi * 2^32 + qlo is quotient @@ -49,7 +51,10 @@ export.mod_12289 # => [res_hi, res_lo, a_hi, a_lo, ...] adv_push.1 - dup u32lt.M assert + dup + u32overflowing_sub.M + # => [underflow_bit, r - M, r, res_hi, res_lo, a_hi, a_lo, ...] + assert drop # => [r, res_hi, res_lo, a_hi, a_lo, ...] dup @@ -391,15 +396,15 @@ end # SQUARE NORM OF Z_M[x]/(phi) POLYNOMIALS # ============================================================================================= -#! Normalizes an `e` in [0, q) to be in [-(q-1) << 1, (q-1) << 1) and returns its square norm. +#! Normalizes an `e` in [0, M) to be in [-(M-1) << 1, (M-1) << 1) and returns its square norm. #! #! We use the following formula to do so: -#! normalize(e) = e^2 - phi * (2*q*e - q^2) where phi := (e > (q - 1)/2) +#! normalize(e) = e^2 - phi * (2*M*e - M^2) where phi := (e > (M - 1)/2) #! #! The formula implements: #! -#! if e > (q-1)/2: -#! return (q - e)^2 +#! if e > (M-1)/2: +#! return (M - e)^2 #! else: #! return e^2 #! @@ -421,16 +426,17 @@ export.norm_sq #=> [phi, e^2, e, ...] movup.2 - mul.24578 # 2*q - push.151019521 # q^2 + push.M_DOUBLE # 2*M + mul + push.M_SQUARE # M^2 sub - #=> [2*q*e - q^2, phi, e^2, ...] + #=> [2*M*e - M^2, phi, e^2, ...] mul sub #=> [norm(e)^2, ...] end -#! Given a tuple (u, w, v), we want to compute (v - (u + (- w % q) % q) % q), where: +#! Given a tuple (u, w, v), we want to compute (v - (u + (- w % M) % M) % M), where: #! #! 1. v is a field element given by its u32 decomposition i.e., (c_lo, c_hi) such that #! v = c_hi * 2**32 + c_lo @@ -441,26 +447,26 @@ end #! #! We can avoid doing three modular reductions by using the following facts: #! -#! 1. q is much smaller than the Miden prime Q. Precisely, q * 2^50 < Q -#! 2. The coefficients of the product polynomial, u and w, are strictly less than J := 512 * q^2. -#! 3. The coefficients of c are at most q - 1. +#! 1. M is much smaller than the Miden prime Q. Precisely, M * 2^50 < Q +#! 2. The coefficients of the product polynomial, u and w, are strictly less than J := 512 * M^2. +#! 3. The coefficients of c are at most M - 1. #! -#! This means that we can substitute (v - (u + (- w % q) % q) % q) with v + w + J - u -#! (note J % q= 0) without risking Q-underflow but we can still overflow. +#! This means that we can substitute (v - (u + (- w % M) % M) % M) with v + w + J - u +#! (note J % M = 0) without risking Q-underflow but we can still overflow. #! For this reason, we use the u32 decomposition of v and perform the addition of -#! v and w + J - u as u64. Note that |w + J - u| <= 1024 * (q - 1)^2 +#! v and w + J - u as u64. Note that |w + J - u| <= 1024 * (M - 1)^2 #! and hence there is the possibility of an overflow when we add v and w + J - u as u64. -#! When there is an overflow, we add 10952, which is equal to 2^32 % q, to the upper u32 limb of -#! the result of (v + (w + J - u)). Note that since |w + J - u| <= 1024 * (q-1)^2 < 2^38, and -#! 10952 < q, we are guaranteed that this final u32 addition to the upper limb will not overflow. +#! When there is an overflow, we add 10952, which is equal to 2^32 % M, to the upper u32 limb of +#! the result of (v + (w + J - u)). Note that since |w + J - u| <= 1024 * (M-1)^2 < 2^38, and +#! 10952 < M, we are guaranteed that this final u32 addition to the upper limb will not overflow. #! -#! To get the final result we reduce (v + w + J - u) modulo q. +#! To get the final result we reduce (v + w + J - u) modulo M. #! #! Input: [pi0, pi512 + J, c_hi, c_lo, ...] #! Output: [e, ...] #! #! Cycles: 45 -export.diff_mod_q +export.diff_mod_M # 1) Subtract sub @@ -475,7 +481,7 @@ export.diff_mod_q #=> [flag, res_hi, res_lo, ..] # 4) Handle potential overflow in the u64 addition - push.10952 # 2^32 mod q + push.10952 # 2^32 mod M push.0 #=> [0, 10952, flag, res_hi, res_lo, ..] swap.2 @@ -484,30 +490,30 @@ export.diff_mod_q add #=> [res_hi, res_lo, ..] - # 5) Reduce modulo q + # 5) Reduce modulo M exec.mod_12289 #=> [e, ...] end #! Takes a pointer to a polynomial pi of degree less than 1024 with coefficients in Z_Q and #! a polynomial c of degree 512 with coefficients also in Z_Q, where Q is the Miden prime. -#! The goal is to compute s1 = c - pi = c - h * s2 in Z_q[x]/(phi) where q is the Falcon prime. +#! The goal is to compute s1 = c - pi = c - h * s2 in Z_M[x]/(phi) where M is the Falcon prime. #! The pointer pi_ptr points both to pi and c through the relation c_ptr = pi_ptr + offset #! where offset := 1281. #! The naive way to compute s1 would be to first reduce the polynomial pi modulo the Falcon -#! prime q and then modulo the irreducible polynomial phi = x^512 + 1. Then we would need to negate -#! the coefficients of pi modulo q and only then can we add these coefficients to the coefficients -#! of c and then reduce the result modulo q one more time. +#! prime M and then modulo the irreducible polynomial phi = x^512 + 1. Then we would need to negate +#! the coefficients of pi modulo M and only then can we add these coefficients to the coefficients +#! of c and then reduce the result modulo M one more time. #! Knowing that the end goal of computing c is to compute its norm squared, we can do better. #! -#! We can compute s1 in a single pass by delaying the q-modular reduction til the end. This can +#! We can compute s1 in a single pass by delaying the M-modular reduction til the end. This can #! be achieved through a careful analysis of the computation of the difference between pi and c. #! #! The i-th coefficient s1_i of s1 is equal to c_i - (pi_i - pi_{512 + i}) which is equal to #! c_i + pi_{512 + i} - pi_i. Now, we know that the size of the pi_i coefficients is bounded by -#! J := 512 * q^2 and this means that J + pi_{512 + i} - pi_i does not Q-underflow and since -#! J = 0 modulo q, the addition of J does not affect the final result. It is also important to -#! note that adding J does not Q-overflow by virtue of q * 2^50 < Q. +#! J := 512 * M^2 and this means that J + pi_{512 + i} - pi_i does not Q-underflow and since +#! J = 0 mod M, the addition of J does not affect the final result. It is also important to +#! note that adding J does not Q-overflow by virtue of M * 2^50 < Q. #! All of the above implies that we can compute s1_i with only one modular reduction at the end, #! in addition to one modular reduction applied to c_i. #! Moreover, since we are only interested in the square norm of s1_i, we do not have to store @@ -549,7 +555,7 @@ export.compute_s1_norm_sq #=> [pi512_0, c0_hi, c0_lo, c1, c2, c3, pi512_1, pi512_2, pi512_3, PI, pi_ptr, ...] movup.9 #=> [pi0, pi512_0, c_hi, c_lo, c1, c2, c3, pi512_1, pi512_2, pi512_3, pi1, pi2, pi3, pi_ptr, ...] - exec.diff_mod_q + exec.diff_mod_M #=> [e, c1, c2, c3, pi512_1, pi512_2, pi512_3, pi1, pi2, pi3, pi_ptr, ...] exec.norm_sq #=> [norm(e)^2, ...] @@ -567,7 +573,7 @@ export.compute_s1_norm_sq push.J add movup.7 #=> [pi1, pi512_1, c1_hi, c1_lo, c2, c3, pi512_2, pi512_3, pi2, pi3, pi_ptr, e0, ...] - exec.diff_mod_q + exec.diff_mod_M exec.norm_sq #=> [e, c2, c3, pi512_2, pi512_3, pi2, pi3, pi_ptr, e0, ...] @@ -583,7 +589,7 @@ export.compute_s1_norm_sq #=> [pi512_2, c2_hi, c2_lo, c3, pi512_3, pi2, pi3, pi_ptr, e0, e1, ...] movup.5 #=> [pi2, pi512_2, c2_hi, c2_lo, c3, pi512_3, pi3, pi_ptr, e0, e1, ...] - exec.diff_mod_q + exec.diff_mod_M exec.norm_sq movdn.4 @@ -596,7 +602,7 @@ export.compute_s1_norm_sq movup.2 push.J add movup.3 #=> [pi3, pi512_3, c3_hi, c3_lo, pi_ptr, e0, e1, e2, ...] - exec.diff_mod_q + exec.diff_mod_M #=> [e3, pi_ptr, e0, e1, e2, ...] exec.norm_sq #=> [e3, pi_ptr, e0, e1, e2, ...] diff --git a/stdlib/tests/crypto/falcon.rs b/stdlib/tests/crypto/falcon.rs index 1996b1f8c..56b79ceef 100644 --- a/stdlib/tests/crypto/falcon.rs +++ b/stdlib/tests/crypto/falcon.rs @@ -69,7 +69,7 @@ fn test_falcon512_norm_sq() { end "; - // normalize(e) = e^2 - phi * (2*q*e - q^2) where phi := (e > (q - 1)/2) + // normalize(e) = e^2 - phi * (2*M*e - M^2) where phi := (e > (M - 1)/2) let upper = rand::thread_rng().gen_range(Q + 1..M); let test_upper = build_test!(source, &[upper]); test_upper.expect_stack(&[(M - upper) * (M - upper)]); @@ -85,7 +85,7 @@ fn test_falcon512_diff_mod_q() { use.std::crypto::dsa::rpo_falcon512 begin - exec.rpo_falcon512::diff_mod_q + exec.rpo_falcon512::diff_mod_M end "; let v = Felt::MODULUS - 1; @@ -97,7 +97,7 @@ fn test_falcon512_diff_mod_q() { let test1 = build_test!(source, &[v_lo as u64, v_hi, w + J, u]); - // Calculating (v - (u + (- w % q) % q) % q) should be the same as (v + w + J - u) % q. + // Calculating (v - (u + (- w % M) % M) % M) should be the same as (v + w + J - u) % M. let expanded_answer = (v as i128 - ((u as i64 + -(w as i64).rem_euclid(M as i64)).rem_euclid(M as i64) as i128)) .rem_euclid(M as i128); @@ -112,7 +112,7 @@ fn test_falcon512_diff_mod_q() { let test2 = build_test!(source, &[v_lo as u64, v_hi, w + J, u]); - // Calculating (v - (u + (- w % q) % q) % q) should be the same as (v + w + J - u) % q. + // Calculating (v - (u + (- w % M) % M) % M) should be the same as (v + w + J - u) % M. let expanded_answer = (v as i128 - ((u as i64 + -(w as i64).rem_euclid(M as i64)).rem_euclid(M as i64) as i128)) .rem_euclid(M as i128); @@ -124,13 +124,13 @@ fn test_falcon512_diff_mod_q() { proptest! { #[test] - fn diff_mod_q_proptest(v in 0..Felt::MODULUS, w in 0..J, u in 0..J) { + fn diff_mod_m_proptest(v in 0..Felt::MODULUS, w in 0..J, u in 0..J) { let source = " use.std::crypto::dsa::rpo_falcon512 begin - exec.rpo_falcon512::diff_mod_q + exec.rpo_falcon512::diff_mod_M end "; @@ -138,7 +138,7 @@ proptest! { let test1 = build_test!(source, &[v_lo as u64, v_hi, w + J, u]); - // Calculating (v - (u + (- w % q) % q) % q) should be the same as (v + w + J - u) % q. + // Calculating (v - (u + (- w % M) % M) % M) should be the same as (v + w + J - u) % M. let expanded_answer = (v as i128 - ((u as i64 + -(w as i64).rem_euclid(M as i64)).rem_euclid(M as i64) as i128)) .rem_euclid(M as i128); From 278e285a191f7784cc219132a80600243bd7c33d Mon Sep 17 00:00:00 2001 From: Al-Kindi-0 <82364884+Al-Kindi-0@users.noreply.github.com> Date: Sun, 26 Jan 2025 18:26:30 +0100 Subject: [PATCH 5/6] chore: fix after rebase --- stdlib/asm/crypto/dsa/rpo_falcon512.masm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/asm/crypto/dsa/rpo_falcon512.masm b/stdlib/asm/crypto/dsa/rpo_falcon512.masm index fd2222430..1141281ab 100644 --- a/stdlib/asm/crypto/dsa/rpo_falcon512.masm +++ b/stdlib/asm/crypto/dsa/rpo_falcon512.masm @@ -10,7 +10,7 @@ const.M_HALF=6144 # (M-1) / 2 const.M_DOUBLE=24578 const.M_SQUARE=151019521 const.SQUARE_NORM_BOUND=34034726 -const.MEMORY_POINTER_OFFSET_OF_HASH_TO_POINT_POLY_FROM_PRODUCT_POLY=1281 +const.MEMORY_POINTER_OFFSET_OF_HASH_TO_POINT_POLY_FROM_PRODUCT_POLY=5124 # MODULAR REDUCTION FALCON PRIME # ============================================================================================= From 80cd4ceda589517034e4bb66de3a7027f9d939cb Mon Sep 17 00:00:00 2001 From: Bobbin Threadbare Date: Mon, 3 Feb 2025 11:51:34 -0800 Subject: [PATCH 6/6] chore: fix typos --- assembly/README.md | 2 +- docs/src/user_docs/assembly/code_organization.md | 2 +- processor/README.md | 2 +- stdlib/asm/crypto/hashes/rpo.masm | 4 ++-- stdlib/docs/crypto/hashes/rpo.md | 4 ++-- stdlib/docs/crypto/stark/public_inputs.md | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/assembly/README.md b/assembly/README.md index 2814bb79b..06420b6fb 100644 --- a/assembly/README.md +++ b/assembly/README.md @@ -44,7 +44,7 @@ let program = assembler.assemble_program(&Path::new("./example.masm")).unwrap(); As noted above, the default assembler is instantiated with nothing in it but the source code you provide. If you want to support more complex programs, you will want to factor code into libraries and modules, and then link all of them -together at once. This can be acheived using a set of builder methods of the +together at once. This can be achieved using a set of builder methods of the `Assembler` struct, e.g. `with_kernel_from_module`, `with_library`, etc. We'll look at a few of these in more detail below. See the module documentation diff --git a/docs/src/user_docs/assembly/code_organization.md b/docs/src/user_docs/assembly/code_organization.md index 626bb58b2..5dd961926 100644 --- a/docs/src/user_docs/assembly/code_organization.md +++ b/docs/src/user_docs/assembly/code_organization.md @@ -181,7 +181,7 @@ Miden assembly supports constant declarations. These constants are scoped to the Constants must be declared right after module imports and before any procedures or program bodies. A constant's name must start with an upper-case letter and can contain any combination of numbers, upper-case ASCII letters, and underscores (`_`). The number of characters in a constant name cannot exceed 100. -A constant's value must be in a decimal or hexidecimal form and be in the range between $0$ and $2^{64} - 2^{32}$ (both inclusive). Value can be defined by an arithmetic expression using `+`, `-`, `*`, `/`, `//`, `(`, `)` operators and references to the previously defined constants if it uses only decimal numbers. Here `/` is a field division and `//` is an integer division. Note that the arithmetic expression cannot contain spaces. +A constant's value must be in a decimal or hexadecimal form and be in the range between $0$ and $2^{64} - 2^{32}$ (both inclusive). Value can be defined by an arithmetic expression using `+`, `-`, `*`, `/`, `//`, `(`, `)` operators and references to the previously defined constants if it uses only decimal numbers. Here `/` is a field division and `//` is an integer division. Note that the arithmetic expression cannot contain spaces. ``` use.std::math::u64 diff --git a/processor/README.md b/processor/README.md index 92cebdc72..e59178bac 100644 --- a/processor/README.md +++ b/processor/README.md @@ -52,7 +52,7 @@ The processor is organized into several components: * The range-checker, which is responsible for checking whether values can fit into 16 bits. * The chiplets module, which contains specialized chiplets responsible for handling complex computations (e.g., hashing) as well as random access memory. -These components are connected via two busses: +These components are connected via two buses: * The range-checker bus, which links stack and chiplets modules with the range-checker. * The chiplet bus, which links stack and the decoder with the chiplets module. diff --git a/stdlib/asm/crypto/hashes/rpo.masm b/stdlib/asm/crypto/hashes/rpo.masm index f6a132e27..4494948f1 100644 --- a/stdlib/asm/crypto/hashes/rpo.masm +++ b/stdlib/asm/crypto/hashes/rpo.masm @@ -4,7 +4,7 @@ #! consume an amount of data which is a multiple of the rate (2 words). #! #! Input: [] -#! Ouptut: [PERM, PERM, PERM, ...] +#! Output: [PERM, PERM, PERM, ...] #! #! Cycles: 12 export.init_no_padding @@ -14,7 +14,7 @@ end #! Given the hasher state, returns the hash output. #! #! Input: [C, B, A, ...] -#! Ouptut: [HASH, ...] +#! Output: [HASH, ...] #! #! Where : #! - `A` is the capacity word that will be used by the hashing function. diff --git a/stdlib/docs/crypto/hashes/rpo.md b/stdlib/docs/crypto/hashes/rpo.md index da22de54c..85b998935 100644 --- a/stdlib/docs/crypto/hashes/rpo.md +++ b/stdlib/docs/crypto/hashes/rpo.md @@ -1,8 +1,8 @@ -Prepares the top of the stack with the hasher initial state.

This procedures does not handle padding, therefore, the user is expected to
consume an amount of data which is a multiple of the rate (2 words).

Input: []
Ouptut: [PERM, PERM, PERM, ...]
Cycles: 12
+Prepares the top of the stack with the hasher initial state.

This procedures does not handle padding, therefore, the user is expected to
consume an amount of data which is a multiple of the rate (2 words).

Input: []
Output: [PERM, PERM, PERM, ...]
Cycles: 12
## std::crypto::hashes::rpo | Procedure | Description | | ----------- | ------------- | -| squeeze_digest | Given the hasher state, returns the hash output.

Input: [C, B, A, ...]
Ouptut: [HASH, ...]
where: For the native RPO hasher HASH is B.
Cycles: 9
| +| squeeze_digest | Given the hasher state, returns the hash output.

Input: [C, B, A, ...]
Output: [HASH, ...]
where: For the native RPO hasher HASH is B.
Cycles: 9
| | absorb_double_words_from_memory | Hashes the memory `start_addr` to `end_addr` given an RPO state specified by 3 words.

This requires that `end_addr=start_addr + 2n + 1`, otherwise the procedure will enter an infinite
loop. `end_addr` is not inclusive.

Stack transition:
Input: [C, B, A, start_addr, end_addr, ...]
Output: [C', B', A', end_addr, end_addr ...]
Cycles: 4 + 3 * words, where `words` is the `start_addr - end_addr - 1`

Where `A` is the capacity word that will be used by the hashing function, and `B'` the hash output.
| | hash_memory_words | Hashes the memory `start_addr` to `end_addr`, handles odd number of elements.

Requires `start_addr < end_addr`, `end_addr` is not inclusive.

Stack transition:
Input: [start_addr, end_addr, ...]
Output: [H, ...]
Cycles:
even words: 49 cycles + 3 * words
odd words: 61 cycles + 3 * words
| | hash_memory | Computes hash of Felt values starting at the specified memory address.

This procedure divides the hashing process into two parts: hashing pairs of words using
`absorb_double_words_from_memory` procedure and hashing the remaining values using the `hperm`
instruction.

Inputs: [ptr, num_elements]
Outputs: [HASH]
Cycles:
- If number of elements divides by 8: 47 cycles + 3 * words
- Else: 180 cycles + 3 * words

Panics if number of inputs equals 0.
| diff --git a/stdlib/docs/crypto/stark/public_inputs.md b/stdlib/docs/crypto/stark/public_inputs.md index 96b67699f..cb6cfb298 100644 --- a/stdlib/docs/crypto/stark/public_inputs.md +++ b/stdlib/docs/crypto/stark/public_inputs.md @@ -2,4 +2,4 @@ ## std::crypto::stark::public_inputs | Procedure | Description | | ----------- | ------------- | -| load | Load the public inputs in memory starting from the address referenced by `public_inputs_ptr`.
In parallel, compute the hash of the public inputs being loaded. The hashing starts with
capacity registers of the hash function set to `C` resulting from hashing the proof context.
The ouptut D is the digest of the hashing.

Input: [public_inputs_ptr, C]
Output: [D]
Cycles: 38
| +| load | Load the public inputs in memory starting from the address referenced by `public_inputs_ptr`.
In parallel, compute the hash of the public inputs being loaded. The hashing starts with
capacity registers of the hash function set to `C` resulting from hashing the proof context.
The output D is the digest of the hashing.

Input: [public_inputs_ptr, C]
Output: [D]
Cycles: 38
|