Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Attempt to migrate away from bn_mul_mont into Rust #1278

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ include = [
"crypto/fipsmodule/ec/ecp_nistz.h",
"crypto/fipsmodule/ec/ecp_nistz384.h",
"crypto/fipsmodule/ec/ecp_nistz384.inl",
"crypto/fipsmodule/ec/gfp_p256.c",
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

\o/

"crypto/fipsmodule/ec/gfp_p384.c",
"crypto/fipsmodule/ec/p256.c",
"crypto/fipsmodule/ec/p256-x86_64-table.h",
Expand Down
1 change: 0 additions & 1 deletion build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ const RING_SRCS: &[(&[&str], &str)] = &[
(&[AARCH64, ARM, X86_64, X86], "crypto/crypto.c"),
(&[AARCH64, ARM, X86_64, X86], "crypto/curve25519/curve25519.c"),
(&[AARCH64, ARM, X86_64, X86], "crypto/fipsmodule/ec/ecp_nistz.c"),
(&[AARCH64, ARM, X86_64, X86], "crypto/fipsmodule/ec/gfp_p256.c"),
(&[AARCH64, ARM, X86_64, X86], "crypto/fipsmodule/ec/gfp_p384.c"),
(&[AARCH64, ARM, X86_64, X86], "crypto/fipsmodule/ec/p256.c"),

Expand Down
50 changes: 0 additions & 50 deletions crypto/fipsmodule/ec/gfp_p256.c

This file was deleted.

18 changes: 0 additions & 18 deletions crypto/fipsmodule/ec/gfp_p384.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,14 +154,6 @@ static void elem_div_by_2(Elem r, const Elem a) {
copy_conditional(r, adjusted, is_odd);
}

static inline void elem_mul_mont(Elem r, const Elem a, const Elem b) {
static const BN_ULONG Q_N0[] = {
BN_MONT_CTX_N0(0x1, 0x1)
};
/* XXX: Not (clearly) constant-time; inefficient.*/
bn_mul_mont(r, a, b, Q, Q_N0, P384_LIMBS);
}

static inline void elem_mul_by_2(Elem r, const Elem a) {
LIMBS_shl_mod(r, a, Q, P384_LIMBS);
}
Expand Down Expand Up @@ -201,16 +193,6 @@ void p384_elem_neg(Elem r, const Elem a) {
}


void p384_scalar_mul_mont(ScalarMont r, const ScalarMont a,
const ScalarMont b) {
static const BN_ULONG N_N0[] = {
BN_MONT_CTX_N0(0x6ed46089, 0xe88fdc45)
};
/* XXX: Inefficient. TODO: Add dedicated multiplication routine. */
bn_mul_mont(r, a, b, N, N_N0, P384_LIMBS);
}


/* TODO(perf): Optimize this. */

static void p384_point_select_w5(P384_POINT *out,
Expand Down
4 changes: 2 additions & 2 deletions src/arithmetic/bigint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1166,7 +1166,7 @@ fn greater_than(a: &Nonnegative, b: &Nonnegative) -> bool {

#[derive(Clone)]
#[repr(transparent)]
struct N0([Limb; 2]);
pub(crate) struct N0(pub(crate) [Limb; 2]);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of making the inner field pub(crate), we should replace the From<u64> for N0 with a const fn from(n0: u64) function so that we can use it instead.


const N0_LIMBS_USED: usize = 64 / LIMB_BITS;

Expand All @@ -1186,7 +1186,7 @@ impl From<u64> for N0 {
}

/// r *= a
fn limbs_mont_mul(r: &mut [Limb], a: &[Limb], m: &[Limb], n0: &N0) {
pub(crate) fn limbs_mont_mul(r: &mut [Limb], a: &[Limb], m: &[Limb], n0: &N0) {
debug_assert_eq!(r.len(), m.len());
debug_assert_eq!(a.len(), m.len());

Expand Down
10 changes: 5 additions & 5 deletions src/ec/suite_b/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ pub struct CommonOps {
pub b: Elem<R>,

// In all cases, `r`, `a`, and `b` may all alias each other.
elem_mul_mont: unsafe extern "C" fn(r: *mut Limb, a: *const Limb, b: *const Limb),
elem_mul_mont: fn(r: &mut [Limb], a: &[Limb], b: &[Limb]),
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll have to leave this as it was so that the P256 implementation can directly call the assembly/C code without any additional overhead.

elem_sqr_mont: unsafe extern "C" fn(r: *mut Limb, a: *const Limb),

point_add_jacobian_impl: unsafe extern "C" fn(r: *mut Limb, a: *const Limb, b: *const Limb),
Expand Down Expand Up @@ -225,9 +225,9 @@ impl PublicKeyOps {
// TODO: do something about this.
unsafe {
(self.common.elem_mul_mont)(
r.limbs.as_mut_ptr(),
parsed.limbs.as_ptr(),
self.common.q.rr.as_ptr(),
&mut r.limbs,
&parsed.limbs,
&self.common.q.rr,
)
}
Ok(r)
Expand All @@ -240,7 +240,7 @@ pub struct ScalarOps {
pub common: &'static CommonOps,

scalar_inv_to_mont_impl: fn(a: &Scalar) -> Scalar<R>,
scalar_mul_mont: unsafe extern "C" fn(r: *mut Limb, a: *const Limb, b: *const Limb),
scalar_mul_mont: fn(r: &mut [Limb], a: &[Limb], b: &[Limb]),
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto: We'll have to leave this as it was so that the P256 implementation can directly call the assembly/C code without any additional overhead.

}

impl ScalarOps {
Expand Down
10 changes: 5 additions & 5 deletions src/ec/suite_b/ops/elem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ impl<M, E: Encoding> Elem<M, E> {

#[inline]
pub fn mul_mont<M, EA: Encoding, EB: Encoding>(
f: unsafe extern "C" fn(r: *mut Limb, a: *const Limb, b: *const Limb),
f: fn(r: &mut[Limb], a: &[Limb], b: &[Limb]),
a: &Elem<M, EA>,
b: &Elem<M, EB>,
) -> Elem<M, <(EA, EB) as ProductEncoding>::Output>
Expand All @@ -61,7 +61,7 @@ where
// let r = f(a, b); return r;
#[inline]
pub fn binary_op<M, EA: Encoding, EB: Encoding, ER: Encoding>(
f: unsafe extern "C" fn(r: *mut Limb, a: *const Limb, b: *const Limb),
f: fn(r: &mut[Limb], a: &[Limb], b: &[Limb]),
a: &Elem<M, EA>,
b: &Elem<M, EB>,
) -> Elem<M, ER> {
Expand All @@ -70,18 +70,18 @@ pub fn binary_op<M, EA: Encoding, EB: Encoding, ER: Encoding>(
m: PhantomData,
encoding: PhantomData,
};
unsafe { f(r.limbs.as_mut_ptr(), a.limbs.as_ptr(), b.limbs.as_ptr()) }
f(&mut r.limbs, &a.limbs, &b.limbs);
r
}

// a := f(a, b);
#[inline]
pub fn binary_op_assign<M, EA: Encoding, EB: Encoding>(
f: unsafe extern "C" fn(r: *mut Limb, a: *const Limb, b: *const Limb),
f: fn(r: &mut[Limb], a: &[Limb], b: &[Limb]),
a: &mut Elem<M, EA>,
b: &Elem<M, EB>,
) {
unsafe { f(a.limbs.as_mut_ptr(), a.limbs.as_ptr(), b.limbs.as_ptr()) }
f(&mut a.limbs, &a.limbs, &b.limbs);
}

// let r = f(a); return r;
Expand Down
37 changes: 22 additions & 15 deletions src/ec/suite_b/ops/p256.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use super::{
elem::{binary_op, binary_op_assign},
elem_sqr_mul, elem_sqr_mul_acc, Modulus, *,
};
use crate::arithmetic::bigint::{limbs_mont_mul, N0};
use core::marker::PhantomData;

macro_rules! p256_limbs {
Expand Down Expand Up @@ -64,7 +65,7 @@ pub static COMMON_OPS: CommonOps = CommonOps {
encoding: PhantomData, // R
},

elem_mul_mont: p256_mul_mont,
elem_mul_mont: p256_mul_mont_rs,
elem_sqr_mont: p256_sqr_mont,

point_add_jacobian_impl: p256_point_add,
Expand Down Expand Up @@ -190,22 +191,22 @@ fn p256_scalar_inv_to_mont(a: &Scalar<Unencoded>) -> Scalar<R> {
#[inline]
fn sqr(a: &Scalar<R>) -> Scalar<R> {
let mut tmp = Scalar::zero();
unsafe { p256_scalar_sqr_rep_mont(tmp.limbs.as_mut_ptr(), a.limbs.as_ptr(), 1) }
p256_scalar_sqr_rep_mont(&mut tmp.limbs, &a.limbs, 1);
tmp
}

// Returns (`a` squared `squarings` times) * `b`.
fn sqr_mul(a: &Scalar<R>, squarings: Limb, b: &Scalar<R>) -> Scalar<R> {
debug_assert!(squarings >= 1);
let mut tmp = Scalar::zero();
unsafe { p256_scalar_sqr_rep_mont(tmp.limbs.as_mut_ptr(), a.limbs.as_ptr(), squarings) }
p256_scalar_sqr_rep_mont(&mut tmp.limbs, &a.limbs, squarings);
mul(&tmp, b)
}

// Sets `acc` = (`acc` squared `squarings` times) * `b`.
fn sqr_mul_acc(acc: &mut Scalar<R>, squarings: Limb, b: &Scalar<R>) {
debug_assert!(squarings >= 1);
unsafe { p256_scalar_sqr_rep_mont(acc.limbs.as_mut_ptr(), acc.limbs.as_ptr(), squarings) }
p256_scalar_sqr_rep_mont(&mut acc.limbs, &acc.limbs, squarings);
binary_op_assign(p256_scalar_mul_mont, acc, b);
}

Expand Down Expand Up @@ -297,6 +298,23 @@ fn p256_scalar_inv_to_mont(a: &Scalar<Unencoded>) -> Scalar<R> {
acc
}

const N_N0: N0 = N0([0xccd1c8aa, 0xee00bc4f]);
fn p256_scalar_mul_mont(r: &mut [Limb], a: &[Limb], b: &[Limb]) {
limbs_mont_mul(r, a, b, &N_N0)
}

fn p256_scalar_sqr_rep_mont(r: &mut [Limb], a: &[Limb], rep: Limb) {
debug_assert!(rep >= 1);
p256_scalar_mul_mont(r, a, a);
for i in 1..rep {
p256_scalar_mul_mont(r, r, r);
}
}
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doing this would regress the performance on x86-64 and AArch64 targets that have an optimized implementation of this.


fn p256_mul_mont_rs(r: &mut [Limb], a: &[Limb], b: &[Limb]) {
unsafe { p256_mul_mont(r.as_mut_ptr(), a.as_ptr(), b.as_ptr()) }
}

prefixed_extern! {
pub(super) fn p256_mul_mont(
r: *mut Limb, // [COMMON_OPS.num_limbs]
Expand All @@ -319,15 +337,4 @@ prefixed_extern! {
p_x: *const Limb, // [COMMON_OPS.num_limbs]
p_y: *const Limb, // [COMMON_OPS.num_limbs]
);

fn p256_scalar_mul_mont(
r: *mut Limb, // [COMMON_OPS.num_limbs]
a: *const Limb, // [COMMON_OPS.num_limbs]
b: *const Limb, // [COMMON_OPS.num_limbs]
);
fn p256_scalar_sqr_rep_mont(
r: *mut Limb, // [COMMON_OPS.num_limbs]
a: *const Limb, // [COMMON_OPS.num_limbs]
rep: Limb,
);
}
24 changes: 12 additions & 12 deletions src/ec/suite_b/ops/p384.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

use crate::arithmetic::bigint::{limbs_mont_mul, N0};

use super::{
elem::{binary_op, binary_op_assign},
elem_sqr_mul, elem_sqr_mul_acc, Modulus, *,
Expand Down Expand Up @@ -337,13 +339,17 @@ const N_RR_LIMBS: [Limb; MAX_LIMBS] = p384_limbs![
0x28266895, 0x3fb05b7a, 0x2b39bf21, 0x0c84ee01
];

prefixed_extern! {
fn p384_elem_mul_mont(
r: *mut Limb, // [COMMON_OPS.num_limbs]
a: *const Limb, // [COMMON_OPS.num_limbs]
b: *const Limb, // [COMMON_OPS.num_limbs]
);
const Q_N0: N0 = N0([0x1, 0x1]);
fn elem_mul_mont(r: &mut [Limb], a: &[Limb], b: &[Limb]) {
limbs_mont_mul(r, a, b, Q_N0);
}

const N_N0: N0 = N0([0x6ed46089, 0xe88fdc45]);
fn p384_scalar_mul_mont(r: &mut [Limb], a: &[Limb], b: &[Limb]) {
limbs_mont_mul(r, a, b, N_N0);
}

prefixed_extern! {
fn nistz384_point_add(
r: *mut Limb, // [3][COMMON_OPS.num_limbs]
a: *const Limb, // [3][COMMON_OPS.num_limbs]
Expand All @@ -355,10 +361,4 @@ prefixed_extern! {
p_x: *const Limb, // [COMMON_OPS.num_limbs]
p_y: *const Limb, // [COMMON_OPS.num_limbs]
);

fn p384_scalar_mul_mont(
r: *mut Limb, // [COMMON_OPS.num_limbs]
a: *const Limb, // [COMMON_OPS.num_limbs]
b: *const Limb, // [COMMON_OPS.num_limbs]
);
}