Skip to content

Commit

Permalink
[EC] Unify point addition for P-256/384/521 (aws#1602)
Browse files Browse the repository at this point in the history
Implement and use a single version of point addition
for implementations of NIST curves P-384, P-521, and
Fiat-crypto based implementation of P-256. The change
does not affect performance.
  • Loading branch information
dkostic authored Jun 12, 2024
1 parent 5f78ef3 commit 37ba0e2
Show file tree
Hide file tree
Showing 5 changed files with 177 additions and 334 deletions.
151 changes: 150 additions & 1 deletion crypto/fipsmodule/ec/ec_nistp.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
//
// | op | P-521 | P-384 | P-256 |
// |----------------------------|
// | 1. | | | |
// | 1. | x | x | x* |
// | 2. | x | x | x* |
// | 3. | | | |
// | 4. | | | |
Expand All @@ -36,6 +36,18 @@
#endif
typedef ec_nistp_felem_limb ec_nistp_felem[NISTP_FELEM_MAX_NUM_OF_LIMBS];

// Conditional copy in constant-time (out = t == 0 ? z : nz).
static void cmovznz(ec_nistp_felem_limb *out,
size_t num_limbs,
ec_nistp_felem_limb t,
const ec_nistp_felem_limb *z,
const ec_nistp_felem_limb *nz) {
ec_nistp_felem_limb mask = constant_time_is_zero_w(t);
for (size_t i = 0; i < num_limbs; i++) {
out[i] = constant_time_select_w(mask, z[i], nz[i]);
}
}

// Group operations
// ----------------
//
Expand Down Expand Up @@ -110,3 +122,140 @@ void ec_nistp_point_double(const ec_nistp_felem_meth *ctx,
ctx->add(gamma, gamma, gamma);
ctx->sub(y_out, y_out, gamma);
}

// ec_nistp_point_add calculates (x1, y1, z1) + (x2, y2, z2)
//
// The method is taken from:
// http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#addition-add-2007-bl
// adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
//
// Coq transcription and correctness proof:
// <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L467>
// <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L544>
//
// This function includes a branch for checking whether the two input points
// are equal, (while not equal to the point at infinity). This case should
// never happen during single point multiplication, so there is no timing leak
// for ECDH and ECDSA.
void ec_nistp_point_add(const ec_nistp_felem_meth *ctx,
ec_nistp_felem_limb *x3,
ec_nistp_felem_limb *y3,
ec_nistp_felem_limb *z3,
const ec_nistp_felem_limb *x1,
const ec_nistp_felem_limb *y1,
const ec_nistp_felem_limb *z1,
const int mixed,
const ec_nistp_felem_limb *x2,
const ec_nistp_felem_limb *y2,
const ec_nistp_felem_limb *z2) {
ec_nistp_felem x_out, y_out, z_out;

ec_nistp_felem_limb z1nz = ctx->nz(z1);
ec_nistp_felem_limb z2nz = ctx->nz(z2);

// z1z1 = z1**2
ec_nistp_felem z1z1;
ctx->sqr(z1z1, z1);

ec_nistp_felem u1, s1, two_z1z2;
if (!mixed) {
// z2z2 = z2**2
ec_nistp_felem z2z2;
ctx->sqr(z2z2, z2);

// u1 = x1*z2z2
ctx->mul(u1, x1, z2z2);

// two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2
ctx->add(two_z1z2, z1, z2);
ctx->sqr(two_z1z2, two_z1z2);
ctx->sub(two_z1z2, two_z1z2, z1z1);
ctx->sub(two_z1z2, two_z1z2, z2z2);

// s1 = y1 * z2**3
ctx->mul(s1, z2, z2z2);
ctx->mul(s1, s1, y1);
} else {
// We'll assume z2 = 1 (special case z2 = 0 is handled later).

// u1 = x1*z2z2
OPENSSL_memcpy(u1, x1, ctx->felem_num_limbs * sizeof(ec_nistp_felem_limb));
// two_z1z2 = 2z1z2
ctx->add(two_z1z2, z1, z1);
// s1 = y1 * z2**3
OPENSSL_memcpy(s1, y1, ctx->felem_num_limbs * sizeof(ec_nistp_felem_limb));
}

// u2 = x2*z1z1
ec_nistp_felem u2;
ctx->mul(u2, x2, z1z1);

// h = u2 - u1
ec_nistp_felem h;
ctx->sub(h, u2, u1);

ec_nistp_felem_limb xneq = ctx->nz(h);

// z_out = two_z1z2 * h
ctx->mul(z_out, h, two_z1z2);

// z1z1z1 = z1 * z1z1
ec_nistp_felem z1z1z1;
ctx->mul(z1z1z1, z1, z1z1);

// s2 = y2 * z1**3
ec_nistp_felem s2;
ctx->mul(s2, y2, z1z1z1);

// r = (s2 - s1)*2
ec_nistp_felem r;
ctx->sub(r, s2, s1);
ctx->add(r, r, r);

ec_nistp_felem_limb yneq = ctx->nz(r);

// This case will never occur in the constant-time |ec_GFp_mont_mul|.
ec_nistp_felem_limb is_nontrivial_double =
constant_time_is_zero_w(xneq | yneq) &
~constant_time_is_zero_w(z1nz) &
~constant_time_is_zero_w(z2nz);
if (constant_time_declassify_w(is_nontrivial_double)) {
ec_nistp_point_double(ctx, x3, y3, z3, x1, y1, z1);
return;
}

// I = (2h)**2
ec_nistp_felem i;
ctx->add(i, h, h);
ctx->sqr(i, i);

// J = h * I
ec_nistp_felem j;
ctx->mul(j, h, i);

// V = U1 * I
ec_nistp_felem v;
ctx->mul(v, u1, i);

// x_out = r**2 - J - 2V
ctx->sqr(x_out, r);
ctx->sub(x_out, x_out, j);
ctx->sub(x_out, x_out, v);
ctx->sub(x_out, x_out, v);

// y_out = r(V-x_out) - 2 * s1 * J
ctx->sub(y_out, v, x_out);
ctx->mul(y_out, y_out, r);
ec_nistp_felem s1j;
ctx->mul(s1j, s1, j);
ctx->sub(y_out, y_out, s1j);
ctx->sub(y_out, y_out, s1j);

cmovznz(x_out, ctx->felem_num_limbs, z1nz, x2, x_out);
cmovznz(y_out, ctx->felem_num_limbs, z1nz, y2, y_out);
cmovznz(z_out, ctx->felem_num_limbs, z1nz, z2, z_out);
cmovznz(x3, ctx->felem_num_limbs, z2nz, x1, x_out);
cmovznz(y3, ctx->felem_num_limbs, z2nz, y1, y_out);
cmovznz(z3, ctx->felem_num_limbs, z2nz, z1, z_out);
}

14 changes: 14 additions & 0 deletions crypto/fipsmodule/ec/ec_nistp.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,12 @@ typedef uint32_t ec_nistp_felem_limb;
// This makes the functions reusable between different curves by simply
// providing an appropriate methods object.
typedef struct {
size_t felem_num_limbs;
void (*add)(ec_nistp_felem_limb *c, const ec_nistp_felem_limb *a, const ec_nistp_felem_limb *b);
void (*sub)(ec_nistp_felem_limb *c, const ec_nistp_felem_limb *a, const ec_nistp_felem_limb *b);
void (*mul)(ec_nistp_felem_limb *c, const ec_nistp_felem_limb *a, const ec_nistp_felem_limb *b);
void (*sqr)(ec_nistp_felem_limb *c, const ec_nistp_felem_limb *a);
ec_nistp_felem_limb (*nz)(const ec_nistp_felem_limb *a);
} ec_nistp_felem_meth;

const ec_nistp_felem_meth *p256_felem_methods(void);
Expand All @@ -61,5 +63,17 @@ void ec_nistp_point_double(const ec_nistp_felem_meth *ctx,
const ec_nistp_felem_limb *x_in,
const ec_nistp_felem_limb *y_in,
const ec_nistp_felem_limb *z_in);

void ec_nistp_point_add(const ec_nistp_felem_meth *ctx,
ec_nistp_felem_limb *x3,
ec_nistp_felem_limb *y3,
ec_nistp_felem_limb *z3,
const ec_nistp_felem_limb *x1,
const ec_nistp_felem_limb *y1,
const ec_nistp_felem_limb *z1,
const int mixed,
const ec_nistp_felem_limb *x2,
const ec_nistp_felem_limb *y2,
const ec_nistp_felem_limb *z2);
#endif // EC_NISTP_H

123 changes: 3 additions & 120 deletions crypto/fipsmodule/ec/p256.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,12 @@ static void fiat_p256_inv_square(fiat_p256_felem out,
}

DEFINE_METHOD_FUNCTION(ec_nistp_felem_meth, p256_felem_methods) {
out->felem_num_limbs = FIAT_P256_NLIMBS;
out->add = fiat_p256_add;
out->sub = fiat_p256_sub;
out->mul = fiat_p256_mul;
out->sqr = fiat_p256_square;
out->nz = fiat_p256_nz;
}

static void fiat_p256_point_double(fiat_p256_felem x_out,
Expand All @@ -183,133 +185,14 @@ static void fiat_p256_point_double(fiat_p256_felem x_out,
ec_nistp_point_double(p256_felem_methods(), x_out, y_out, z_out, x_in, y_in, z_in);
}

// fiat_p256_point_add calculates (x1, y1, z1) + (x2, y2, z2)
//
// The method is taken from:
// http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
// adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
//
// Coq transcription and correctness proof:
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L135>
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L205>
//
// This function includes a branch for checking whether the two input points
// are equal, (while not equal to the point at infinity). This case never
// happens during single point multiplication, so there is no timing leak for
// ECDH or ECDSA signing.
static void fiat_p256_point_add(fiat_p256_felem x3, fiat_p256_felem y3,
fiat_p256_felem z3, const fiat_p256_felem x1,
const fiat_p256_felem y1,
const fiat_p256_felem z1, const int mixed,
const fiat_p256_felem x2,
const fiat_p256_felem y2,
const fiat_p256_felem z2) {
fiat_p256_felem x_out, y_out, z_out;
fiat_p256_limb_t z1nz = fiat_p256_nz(z1);
fiat_p256_limb_t z2nz = fiat_p256_nz(z2);

// z1z1 = z1z1 = z1**2
fiat_p256_felem z1z1;
fiat_p256_square(z1z1, z1);

fiat_p256_felem u1, s1, two_z1z2;
if (!mixed) {
// z2z2 = z2**2
fiat_p256_felem z2z2;
fiat_p256_square(z2z2, z2);

// u1 = x1*z2z2
fiat_p256_mul(u1, x1, z2z2);

// two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2
fiat_p256_add(two_z1z2, z1, z2);
fiat_p256_square(two_z1z2, two_z1z2);
fiat_p256_sub(two_z1z2, two_z1z2, z1z1);
fiat_p256_sub(two_z1z2, two_z1z2, z2z2);

// s1 = y1 * z2**3
fiat_p256_mul(s1, z2, z2z2);
fiat_p256_mul(s1, s1, y1);
} else {
// We'll assume z2 = 1 (special case z2 = 0 is handled later).

// u1 = x1*z2z2
fiat_p256_copy(u1, x1);
// two_z1z2 = 2z1z2
fiat_p256_add(two_z1z2, z1, z1);
// s1 = y1 * z2**3
fiat_p256_copy(s1, y1);
}

// u2 = x2*z1z1
fiat_p256_felem u2;
fiat_p256_mul(u2, x2, z1z1);

// h = u2 - u1
fiat_p256_felem h;
fiat_p256_sub(h, u2, u1);

fiat_p256_limb_t xneq = fiat_p256_nz(h);

// z_out = two_z1z2 * h
fiat_p256_mul(z_out, h, two_z1z2);

// z1z1z1 = z1 * z1z1
fiat_p256_felem z1z1z1;
fiat_p256_mul(z1z1z1, z1, z1z1);

// s2 = y2 * z1**3
fiat_p256_felem s2;
fiat_p256_mul(s2, y2, z1z1z1);

// r = (s2 - s1)*2
fiat_p256_felem r;
fiat_p256_sub(r, s2, s1);
fiat_p256_add(r, r, r);

fiat_p256_limb_t yneq = fiat_p256_nz(r);

fiat_p256_limb_t is_nontrivial_double = constant_time_is_zero_w(xneq | yneq) &
~constant_time_is_zero_w(z1nz) &
~constant_time_is_zero_w(z2nz);
if (constant_time_declassify_w(is_nontrivial_double)) {
fiat_p256_point_double(x3, y3, z3, x1, y1, z1);
return;
}

// I = (2h)**2
fiat_p256_felem i;
fiat_p256_add(i, h, h);
fiat_p256_square(i, i);

// J = h * I
fiat_p256_felem j;
fiat_p256_mul(j, h, i);

// V = U1 * I
fiat_p256_felem v;
fiat_p256_mul(v, u1, i);

// x_out = r**2 - J - 2V
fiat_p256_square(x_out, r);
fiat_p256_sub(x_out, x_out, j);
fiat_p256_sub(x_out, x_out, v);
fiat_p256_sub(x_out, x_out, v);

// y_out = r(V-x_out) - 2 * s1 * J
fiat_p256_sub(y_out, v, x_out);
fiat_p256_mul(y_out, y_out, r);
fiat_p256_felem s1j;
fiat_p256_mul(s1j, s1, j);
fiat_p256_sub(y_out, y_out, s1j);
fiat_p256_sub(y_out, y_out, s1j);

fiat_p256_cmovznz(x_out, z1nz, x2, x_out);
fiat_p256_cmovznz(x3, z2nz, x1, x_out);
fiat_p256_cmovznz(y_out, z1nz, y2, y_out);
fiat_p256_cmovznz(y3, z2nz, y1, y_out);
fiat_p256_cmovznz(z_out, z1nz, z2, z_out);
fiat_p256_cmovznz(z3, z2nz, z1, z_out);
ec_nistp_point_add(p256_felem_methods(), x3, y3, z3, x1, y1, z1, mixed, x2, y2, z2);
}

#include "./p256_table.h"
Expand Down
Loading

0 comments on commit 37ba0e2

Please sign in to comment.