0xWOLAND · 0xWOLAND · Dec 20, 2023 · Dec 20, 2023 · Dec 20, 2023
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -17,4 +17,6 @@ jobs:
     - name: Build
       run: cargo build --verbose
     - name: Run tests
-      run: cargo test --verbose
+      run: cargo test --verbose
+    - name: Run Parallel tests
+      run: cargo test --verbose --features=parallel
diff --git a/BENCHMARKS.md b/BENCHMARKS.md
@@ -1,58 +0,0 @@
-# Benchmarks
-
-## Table of Contents
-
-- [Overview](#overview)
-- [Benchmark Results](#benchmark-results)
-    - [Number-Theoretic Transform Benchmarks](#number-theoretic-transform-benchmarks)
-    - [Polynomial Multiplication Benchmarks](#polynomial-multiplication-benchmarks)
-
-## Overview
-
-This benchmark comparison report shows the difference in performance between parallel, NTT-based and serial, brute-force 
-polynomial multiplication algorithms. Each row entry in the first table is an n-degree forward NTT and each row entry in the second table represents an n-degree polynomial multiplication.
-
-Computer Stats:
-
-```
-CPU(s):                          16
-Thread(s) per core:              2
-Core(s) per socket:              8
-Socket(s):                       1
-```
-
-## Benchmark Results
-
-### Number-Theoretic Transform Benchmarks
-
-|             | `NTT`                      |
-|:------------|:-------------------------- |
-| **`64`**    | `187.17 us` (✅ **1.00x**)  |
-| **`128`**   | `231.50 us` (✅ **1.00x**)  |
-| **`256`**   | `333.26 us` (✅ **1.00x**)  |
-| **`512`**   | `623.88 us` (✅ **1.00x**)  |
-| **`1024`**  | `951.62 us` (✅ **1.00x**)  |
-| **`2048`**  | `1.48 ms` (✅ **1.00x**)    |
-| **`4096`**  | `2.78 ms` (✅ **1.00x**)    |
-| **`8192`**  | `5.48 ms` (✅ **1.00x**)    |
-| **`16384`** | `11.09 ms` (✅ **1.00x**)   |
-| **`32768`** | `23.08 ms` (✅ **1.00x**)   |
-
-### Polynomial Multiplication Benchmarks
-
-|             | `NTT-Based`               | `Brute-Force`                      |
-|:------------|:--------------------------|:---------------------------------- |
-| **`64`**    | `818.69 us` (✅ **1.00x**) | `494.52 us` (✅ **1.66x faster**)   |
-| **`128`**   | `1.12 ms` (✅ **1.00x**)   | `1.93 ms` (❌ *1.72x slower*)       |
-| **`256`**   | `1.74 ms` (✅ **1.00x**)   | `7.78 ms` (❌ *4.48x slower*)       |
-| **`512`**   | `2.69 ms` (✅ **1.00x**)   | `30.35 ms` (❌ *11.30x slower*)     |
-| **`1024`**  | `4.33 ms` (✅ **1.00x**)   | `121.49 ms` (❌ *28.05x slower*)    |
-| **`2048`**  | `7.47 ms` (✅ **1.00x**)   | `493.59 ms` (❌ *66.07x slower*)    |
-| **`4096`**  | `14.23 ms` (✅ **1.00x**)  | `1.98 s` (❌ *139.11x slower*)      |
-| **`8192`**  | `31.60 ms` (✅ **1.00x**)  | `7.88 s` (❌ *249.28x slower*)      |
-| **`16384`** | `65.51 ms` (✅ **1.00x**)  | `31.46 s` (❌ *480.32x slower*)     |
-| **`32768`** | `141.24 ms` (✅ **1.00x**) | `126.02 s` (❌ *892.30x slower*)    |
-
----
-Made with [criterion-table](https://github.com/nu11ptr/criterion-table)
-

diff --git a/src/ntt.rs b/src/ntt.rs
@@ -1,76 +1,79 @@
-use std::ops::Add;
-
-use crate::{numbers::BigInt, prime::is_prime};
+use crate::{numbers::BigInt, polynomial::PolynomialFieldElement, prime::is_prime};
 use crypto_bigint::Invert;
 use itertools::Itertools;
 use rayon::prelude::*;
 
 #[derive(Debug, Clone)]
-pub struct Constants {
-    pub N: BigInt,
-    pub w: BigInt,
+pub struct Constants<T: PolynomialFieldElement> {
+    pub N: T,
+    pub w: T,
 }
 
-fn prime_factors(a: BigInt) -> Vec<BigInt> {
-    let mut ans: Vec<BigInt> = Vec::new();
-    let mut x = BigInt::from(2);
+fn prime_factors<T: PolynomialFieldElement>(a: T) -> Vec<T> {
+    let mut ans: Vec<T> = Vec::new();
+    let ZERO = T::from(0);
+    let ONE = T::from(1);
+    let mut x = T::from(2);
     while x * x <= a {
-        if a.rem(x) == 0 {
+        if a.rem(x) == ZERO {
             ans.push(x);
         }
-        x += 1;
+        x += ONE;
     }
     ans
 }
 
 #[cfg(feature = "parallel")]
-fn is_primitive_root(a: BigInt, deg: BigInt, N: BigInt) -> bool {
+fn is_primitive_root<T: PolynomialFieldElement>(a: T, deg: T, N: T) -> bool {
     let lhs = a.mod_exp(deg, N);
-    let lhs = lhs == 1;
+    let ONE = T::from(1);
+    let lhs = lhs == ONE;
     let rhs = prime_factors(deg)
         .par_iter()
-        .map(|&x| a.mod_exp(deg / x, N) != 1)
+        .map(|&x| a.mod_exp(deg / x, N) != ONE)
         .all(|x| x);
     lhs && rhs
 }
 
 #[cfg(not(feature = "parallel"))]
-fn is_primitive_root(a: BigInt, deg: BigInt, N: BigInt) -> bool {
+fn is_primitive_root<T: PolynomialFieldElement>(a: T, deg: T, N: T) -> bool {
     let lhs = a.mod_exp(deg, N);
-    let lhs = lhs == 1;
+    let ONE = T::from(1);
+    let lhs = lhs == ONE;
     let rhs = prime_factors(deg)
         .iter()
-        .map(|&x| a.mod_exp(deg / x, N) != 1)
+        .map(|&x| a.mod_exp(deg / x, N) != ONE)
         .all(|x| x);
     lhs && rhs
 }
 
-pub fn working_modulus(n: BigInt, M: BigInt) -> Constants {
-    let ONE = BigInt::from(1);
+pub fn working_modulus<T: PolynomialFieldElement>(n: T, M: T) -> Constants<T> {
+    let ZERO = T::from(0);
+    let ONE = T::from(1);
     let mut N = M;
     if N >= ONE {
-        N = N * n + 1;
+        N = N * n + ONE;
         while !is_prime(N) {
             N += n;
         }
     }
     let totient = N - ONE;
     assert!(N >= M);
-    let mut gen = BigInt::from(0);
-    let mut g = BigInt::from(2);
+    let mut gen = T::from(0);
+    let mut g = T::from(2);
     while g < N {
         if is_primitive_root(g, totient, N) {
             gen = g;
             break;
         }
         g += ONE;
     }
-    assert!(gen > 0);
+    assert!(gen > ZERO);
     let w = gen.mod_exp(totient / n, N);
     Constants { N, w }
 }
 
-fn order_reverse(inp: &mut Vec<BigInt>) {
+fn order_reverse<T: PolynomialFieldElement>(inp: &mut Vec<T>) {
     let mut j = 0;
     let n = inp.len();
     (1..n).for_each(|i| {
@@ -88,19 +91,19 @@ fn order_reverse(inp: &mut Vec<BigInt>) {
 }
 
 #[cfg(feature = "parallel")]
-fn fft(inp: Vec<BigInt>, c: &Constants, w: BigInt) -> Vec<BigInt> {
+fn fft<T: PolynomialFieldElement>(inp: Vec<T>, c: &Constants<T>, w: T) -> Vec<T> {
     assert!(inp.len().is_power_of_two());
     let mut inp = inp.clone();
     let N = inp.len();
-    let MOD = BigInt::from(c.N);
-    let ONE = BigInt::from(1);
-    let mut pre: Vec<BigInt> = vec![ONE; N / 2];
+    let MOD = T::from(c.N);
+    let ONE = T::from(1);
+    let mut pre: Vec<T> = vec![ONE; N / 2];
     let CHUNK_COUNT = 128;
-    let chunk_count = BigInt::from(CHUNK_COUNT);
+    let chunk_count = T::from(CHUNK_COUNT);
 
     pre.par_chunks_mut(CHUNK_COUNT)
         .enumerate()
-        .for_each(|(i, arr)| arr[0] = w.mod_exp(BigInt::from(i) * chunk_count, MOD));
+        .for_each(|(i, arr)| arr[0] = w.mod_exp(T::from(i) * chunk_count, MOD));
     pre.par_chunks_mut(CHUNK_COUNT).for_each(|x| {
         (1..x.len()).for_each(|y| {
             let _x = x.to_vec();
@@ -139,19 +142,19 @@ fn fft(inp: Vec<BigInt>, c: &Constants, w: BigInt) -> Vec<BigInt> {
 }
 
 #[cfg(not(feature = "parallel"))]
-fn fft(inp: Vec<BigInt>, c: &Constants, w: BigInt) -> Vec<BigInt> {
+fn fft<T: PolynomialFieldElement>(inp: Vec<T>, c: &Constants<T>, w: T) -> Vec<T> {
     assert!(inp.len().is_power_of_two());
     let mut inp = inp.clone();
     let N = inp.len();
-    let MOD = BigInt::from(c.N);
-    let ONE = BigInt::from(1);
-    let mut pre: Vec<BigInt> = vec![ONE; N / 2];
+    let MOD = T::from(c.N);
+    let ONE = T::from(1);
+    let mut pre: Vec<T> = vec![ONE; N / 2];
     let CHUNK_COUNT = 128;
-    let chunk_count = BigInt::from(CHUNK_COUNT);
+    let chunk_count = T::from(CHUNK_COUNT);
 
     pre.chunks_mut(CHUNK_COUNT)
         .enumerate()
-        .for_each(|(i, arr)| arr[0] = w.mod_exp(BigInt::from(i) * chunk_count, MOD));
+        .for_each(|(i, arr)| arr[0] = w.mod_exp(T::from(i) * chunk_count, MOD));
     pre.chunks_mut(CHUNK_COUNT).for_each(|x| {
         (1..x.len()).for_each(|y| {
             let _x = x.to_vec();
@@ -189,13 +192,13 @@ fn fft(inp: Vec<BigInt>, c: &Constants, w: BigInt) -> Vec<BigInt> {
     inp
 }
 
-pub fn forward(inp: Vec<BigInt>, c: &Constants) -> Vec<BigInt> {
+pub fn forward<T: PolynomialFieldElement>(inp: Vec<T>, c: &Constants<T>) -> Vec<T> {
     fft(inp, c, c.w)
 }
 
 #[cfg(feature = "parallel")]
-pub fn inverse(inp: Vec<BigInt>, c: &Constants) -> Vec<BigInt> {
-    let mut inv = BigInt::from(inp.len());
+pub fn inverse<T: PolynomialFieldElement>(inp: Vec<T>, c: &Constants<T>) -> Vec<T> {
+    let mut inv = T::from(inp.len());
     let _ = inv.set_mod(c.N);
     let inv = inv.invert();
     let w = c.w.invert();
@@ -205,8 +208,8 @@ pub fn inverse(inp: Vec<BigInt>, c: &Constants) -> Vec<BigInt> {
 }
 
 #[cfg(not(feature = "parallel"))]
-pub fn inverse(inp: Vec<BigInt>, c: &Constants) -> Vec<BigInt> {
-    let mut inv = BigInt::from(inp.len());
+pub fn inverse<T: PolynomialFieldElement>(inp: Vec<T>, c: &Constants<T>) -> Vec<T> {
+    let mut inv = T::from(inp.len());
     let _ = inv.set_mod(c.N);
     let inv = inv.invert();
     let w = c.w.invert();

diff --git a/src/numbers.rs b/src/numbers.rs
@@ -18,13 +18,26 @@ use crypto_bigint::{
 use itertools::Itertools;
 use rand::{thread_rng, Error, Rng};
 
+use crate::polynomial::PolynomialFieldElement;
+
 pub enum BigIntType {
     U16(u16),
     U32(u32),
     U64(u64),
     U128(u128),
 }
 
+pub trait NttFieldElement {
+    // all operations should be under the modular group `M`
+    fn set_mod(&mut self, M: Self) -> Result<(), String>;
+    fn rem(&self, M: Self) -> Self;
+    fn pow(&self, n: u128) -> Self;
+    fn mod_exp(&self, exp: Self, M: Self) -> Self;
+    fn is_even(&self) -> bool;
+    fn is_zero(&self) -> bool;
+    fn to_bigint(&self) -> BigInt;
+}
+
 #[derive(Debug, Clone, Copy)]
 pub struct BigInt {
     pub v: DynResidue<4>,
@@ -130,6 +143,74 @@ impl BigInt {
     }
 }
 
+impl NttFieldElement for BigInt {
+    fn set_mod(&mut self, M: Self) -> Result<(), String> {
+        if M.is_even() {
+            return Err("modulus must be odd".to_string());
+        }
+        let params = DynResidueParams::new(&(U256::from(M.v.retrieve())));
+        self.v = DynResidue::new(&self.v.retrieve(), params);
+        Ok(())
+    }
+
+    fn rem(&self, M: Self) -> BigInt {
+        let mut res = self.clone();
+        if res < M {
+            return res;
+        }
+        res.v = DynResidue::new(
+            &res.v.retrieve().rem(&NonZero::from_uint(M.v.retrieve())),
+            res.params(),
+        );
+        res
+    }
+
+    fn pow(&self, n: u128) -> BigInt {
+        BigInt {
+            v: self.v.pow(&Uint::<4>::from_u128(n)),
+        }
+    }
+
+    fn mod_exp(&self, exp: BigInt, M: BigInt) -> BigInt {
+        let mut res: BigInt = if !exp.is_even() {
+            self.clone()
+        } else {
+            BigInt::from(1)
+        };
+        let mut b = self.clone();
+        let mut e = exp.clone();
+        res.set_mod(M);
+        b.set_mod(M);
+        while e > 0 {
+            e >>= 1;
+            b = b * b;
+            if M.is_even() {
+                b = b.rem(M);
+            }
+            if !e.is_even() && !e.is_zero() {
+                res = b * res;
+                if M.is_even() {
+                    res = res.rem(M);
+                }
+            }
+        }
+        res
+    }
+
+    fn is_zero(&self) -> bool {
+        self.v.retrieve().bits() == 0
+    }
+
+    fn is_even(&self) -> bool {
+        let is_odd: bool = self.v.retrieve().bit(0).into();
+        !is_odd
+    }
+
+    fn to_bigint(&self) -> BigInt {
+        *self
+    }
+}
+
 impl From<u16> for BigInt {
     fn from(value: u16) -> Self {
         BigInt::new(BigIntType::U16(value))
@@ -711,6 +792,8 @@ impl Display for BigInt {
     }
 }
 
+impl PolynomialFieldElement for BigInt {}
+
 #[cfg(test)]
 mod tests {
     use crate::numbers::BigInt;