Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EC] Unify point addition for P-256/384/521 #1602

Merged
merged 3 commits into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 150 additions & 1 deletion crypto/fipsmodule/ec/ec_nistp.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
//
// | op | P-521 | P-384 | P-256 |
// |----------------------------|
// | 1. | | | |
// | 1. | x | x | x* |
// | 2. | x | x | x* |
// | 3. | | | |
// | 4. | | | |
Expand All @@ -36,6 +36,18 @@
#endif
typedef ec_nistp_felem_limb ec_nistp_felem[NISTP_FELEM_MAX_NUM_OF_LIMBS];

// Conditional copy in constant-time (out = t == 0 ? z : nz).
static void cmovznz(ec_nistp_felem_limb *out,
size_t num_limbs,
ec_nistp_felem_limb t,
const ec_nistp_felem_limb *z,
const ec_nistp_felem_limb *nz) {
ec_nistp_felem_limb mask = constant_time_is_zero_w(t);
for (size_t i = 0; i < num_limbs; i++) {
out[i] = constant_time_select_w(mask, z[i], nz[i]);
}
}

// Group operations
// ----------------
//
Expand Down Expand Up @@ -110,3 +122,140 @@ void ec_nistp_point_double(const ec_nistp_felem_meth *ctx,
ctx->add(gamma, gamma, gamma);
ctx->sub(y_out, y_out, gamma);
}

// ec_nistp_point_add calculates (x1, y1, z1) + (x2, y2, z2)
//
// The method is taken from:
// http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#addition-add-2007-bl
// adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
//
// Coq transcription and correctness proof:
// <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L467>
// <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L544>
//
// This function includes a branch for checking whether the two input points
// are equal, (while not equal to the point at infinity). This case should
// never happen during single point multiplication, so there is no timing leak
// for ECDH and ECDSA.
void ec_nistp_point_add(const ec_nistp_felem_meth *ctx,
ec_nistp_felem_limb *x3,
ec_nistp_felem_limb *y3,
ec_nistp_felem_limb *z3,
const ec_nistp_felem_limb *x1,
const ec_nistp_felem_limb *y1,
const ec_nistp_felem_limb *z1,
const int mixed,
const ec_nistp_felem_limb *x2,
const ec_nistp_felem_limb *y2,
const ec_nistp_felem_limb *z2) {
ec_nistp_felem x_out, y_out, z_out;

ec_nistp_felem_limb z1nz = ctx->nz(z1);
ec_nistp_felem_limb z2nz = ctx->nz(z2);

// z1z1 = z1**2
ec_nistp_felem z1z1;
ctx->sqr(z1z1, z1);

ec_nistp_felem u1, s1, two_z1z2;
if (!mixed) {
// z2z2 = z2**2
ec_nistp_felem z2z2;
ctx->sqr(z2z2, z2);

// u1 = x1*z2z2
ctx->mul(u1, x1, z2z2);

// two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2
ctx->add(two_z1z2, z1, z2);
ctx->sqr(two_z1z2, two_z1z2);
ctx->sub(two_z1z2, two_z1z2, z1z1);
ctx->sub(two_z1z2, two_z1z2, z2z2);

// s1 = y1 * z2**3
ctx->mul(s1, z2, z2z2);
ctx->mul(s1, s1, y1);
} else {
// We'll assume z2 = 1 (special case z2 = 0 is handled later).

// u1 = x1*z2z2
OPENSSL_memcpy(u1, x1, ctx->felem_num_limbs * sizeof(ec_nistp_felem_limb));
// two_z1z2 = 2z1z2
ctx->add(two_z1z2, z1, z1);
// s1 = y1 * z2**3
OPENSSL_memcpy(s1, y1, ctx->felem_num_limbs * sizeof(ec_nistp_felem_limb));
}

// u2 = x2*z1z1
ec_nistp_felem u2;
ctx->mul(u2, x2, z1z1);

// h = u2 - u1
ec_nistp_felem h;
ctx->sub(h, u2, u1);

ec_nistp_felem_limb xneq = ctx->nz(h);

// z_out = two_z1z2 * h
ctx->mul(z_out, h, two_z1z2);

// z1z1z1 = z1 * z1z1
ec_nistp_felem z1z1z1;
ctx->mul(z1z1z1, z1, z1z1);

// s2 = y2 * z1**3
ec_nistp_felem s2;
ctx->mul(s2, y2, z1z1z1);

// r = (s2 - s1)*2
ec_nistp_felem r;
ctx->sub(r, s2, s1);
ctx->add(r, r, r);

ec_nistp_felem_limb yneq = ctx->nz(r);

// This case will never occur in the constant-time |ec_GFp_mont_mul|.
ec_nistp_felem_limb is_nontrivial_double =
constant_time_is_zero_w(xneq | yneq) &
~constant_time_is_zero_w(z1nz) &
~constant_time_is_zero_w(z2nz);
if (constant_time_declassify_w(is_nontrivial_double)) {
ec_nistp_point_double(ctx, x3, y3, z3, x1, y1, z1);
return;
}

// I = (2h)**2
ec_nistp_felem i;
ctx->add(i, h, h);
ctx->sqr(i, i);

// J = h * I
ec_nistp_felem j;
ctx->mul(j, h, i);

// V = U1 * I
ec_nistp_felem v;
ctx->mul(v, u1, i);

// x_out = r**2 - J - 2V
ctx->sqr(x_out, r);
ctx->sub(x_out, x_out, j);
ctx->sub(x_out, x_out, v);
ctx->sub(x_out, x_out, v);

// y_out = r(V-x_out) - 2 * s1 * J
ctx->sub(y_out, v, x_out);
ctx->mul(y_out, y_out, r);
ec_nistp_felem s1j;
ctx->mul(s1j, s1, j);
ctx->sub(y_out, y_out, s1j);
ctx->sub(y_out, y_out, s1j);

cmovznz(x_out, ctx->felem_num_limbs, z1nz, x2, x_out);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may be slightly more efficient to keep the original order as, for example, x_out would remain in registers/cache before sending it or x1 to x_3.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure it can make any difference in performance because: 1) the data is so small that everything is in cache anyway; 2) cmovznz is a function call so if not inlined it can't reuse values in registers. I'd like to keep this order for clarity if you don't mind?

cmovznz(y_out, ctx->felem_num_limbs, z1nz, y2, y_out);
cmovznz(z_out, ctx->felem_num_limbs, z1nz, z2, z_out);
cmovznz(x3, ctx->felem_num_limbs, z2nz, x1, x_out);
cmovznz(y3, ctx->felem_num_limbs, z2nz, y1, y_out);
cmovznz(z3, ctx->felem_num_limbs, z2nz, z1, z_out);
}

14 changes: 14 additions & 0 deletions crypto/fipsmodule/ec/ec_nistp.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,12 @@ typedef uint32_t ec_nistp_felem_limb;
// This makes the functions reusable between different curves by simply
// providing an appropriate methods object.
typedef struct {
size_t felem_num_limbs;
void (*add)(ec_nistp_felem_limb *c, const ec_nistp_felem_limb *a, const ec_nistp_felem_limb *b);
void (*sub)(ec_nistp_felem_limb *c, const ec_nistp_felem_limb *a, const ec_nistp_felem_limb *b);
void (*mul)(ec_nistp_felem_limb *c, const ec_nistp_felem_limb *a, const ec_nistp_felem_limb *b);
void (*sqr)(ec_nistp_felem_limb *c, const ec_nistp_felem_limb *a);
ec_nistp_felem_limb (*nz)(const ec_nistp_felem_limb *a);
} ec_nistp_felem_meth;

const ec_nistp_felem_meth *p256_felem_methods(void);
Expand All @@ -61,5 +63,17 @@ void ec_nistp_point_double(const ec_nistp_felem_meth *ctx,
const ec_nistp_felem_limb *x_in,
const ec_nistp_felem_limb *y_in,
const ec_nistp_felem_limb *z_in);

void ec_nistp_point_add(const ec_nistp_felem_meth *ctx,
ec_nistp_felem_limb *x3,
ec_nistp_felem_limb *y3,
ec_nistp_felem_limb *z3,
const ec_nistp_felem_limb *x1,
const ec_nistp_felem_limb *y1,
const ec_nistp_felem_limb *z1,
const int mixed,
const ec_nistp_felem_limb *x2,
const ec_nistp_felem_limb *y2,
const ec_nistp_felem_limb *z2);
#endif // EC_NISTP_H

123 changes: 3 additions & 120 deletions crypto/fipsmodule/ec/p256.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,12 @@ static void fiat_p256_inv_square(fiat_p256_felem out,
}

DEFINE_METHOD_FUNCTION(ec_nistp_felem_meth, p256_felem_methods) {
out->felem_num_limbs = FIAT_P256_NLIMBS;
out->add = fiat_p256_add;
out->sub = fiat_p256_sub;
out->mul = fiat_p256_mul;
out->sqr = fiat_p256_square;
out->nz = fiat_p256_nz;
}

static void fiat_p256_point_double(fiat_p256_felem x_out,
Expand All @@ -183,133 +185,14 @@ static void fiat_p256_point_double(fiat_p256_felem x_out,
ec_nistp_point_double(p256_felem_methods(), x_out, y_out, z_out, x_in, y_in, z_in);
}

// fiat_p256_point_add calculates (x1, y1, z1) + (x2, y2, z2)
//
// The method is taken from:
// http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
// adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
//
// Coq transcription and correctness proof:
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L135>
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L205>
//
// This function includes a branch for checking whether the two input points
dkostic marked this conversation as resolved.
Show resolved Hide resolved
// are equal, (while not equal to the point at infinity). This case never
// happens during single point multiplication, so there is no timing leak for
// ECDH or ECDSA signing.
static void fiat_p256_point_add(fiat_p256_felem x3, fiat_p256_felem y3,
fiat_p256_felem z3, const fiat_p256_felem x1,
const fiat_p256_felem y1,
const fiat_p256_felem z1, const int mixed,
const fiat_p256_felem x2,
const fiat_p256_felem y2,
const fiat_p256_felem z2) {
fiat_p256_felem x_out, y_out, z_out;
fiat_p256_limb_t z1nz = fiat_p256_nz(z1);
fiat_p256_limb_t z2nz = fiat_p256_nz(z2);

// z1z1 = z1z1 = z1**2
fiat_p256_felem z1z1;
fiat_p256_square(z1z1, z1);

fiat_p256_felem u1, s1, two_z1z2;
if (!mixed) {
// z2z2 = z2**2
fiat_p256_felem z2z2;
fiat_p256_square(z2z2, z2);

// u1 = x1*z2z2
fiat_p256_mul(u1, x1, z2z2);

// two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2
fiat_p256_add(two_z1z2, z1, z2);
fiat_p256_square(two_z1z2, two_z1z2);
fiat_p256_sub(two_z1z2, two_z1z2, z1z1);
fiat_p256_sub(two_z1z2, two_z1z2, z2z2);

// s1 = y1 * z2**3
fiat_p256_mul(s1, z2, z2z2);
fiat_p256_mul(s1, s1, y1);
} else {
// We'll assume z2 = 1 (special case z2 = 0 is handled later).

// u1 = x1*z2z2
fiat_p256_copy(u1, x1);
// two_z1z2 = 2z1z2
fiat_p256_add(two_z1z2, z1, z1);
// s1 = y1 * z2**3
fiat_p256_copy(s1, y1);
}

// u2 = x2*z1z1
fiat_p256_felem u2;
fiat_p256_mul(u2, x2, z1z1);

// h = u2 - u1
fiat_p256_felem h;
fiat_p256_sub(h, u2, u1);

fiat_p256_limb_t xneq = fiat_p256_nz(h);

// z_out = two_z1z2 * h
fiat_p256_mul(z_out, h, two_z1z2);

// z1z1z1 = z1 * z1z1
fiat_p256_felem z1z1z1;
fiat_p256_mul(z1z1z1, z1, z1z1);

// s2 = y2 * z1**3
fiat_p256_felem s2;
fiat_p256_mul(s2, y2, z1z1z1);

// r = (s2 - s1)*2
fiat_p256_felem r;
fiat_p256_sub(r, s2, s1);
fiat_p256_add(r, r, r);

fiat_p256_limb_t yneq = fiat_p256_nz(r);

fiat_p256_limb_t is_nontrivial_double = constant_time_is_zero_w(xneq | yneq) &
~constant_time_is_zero_w(z1nz) &
~constant_time_is_zero_w(z2nz);
if (constant_time_declassify_w(is_nontrivial_double)) {
fiat_p256_point_double(x3, y3, z3, x1, y1, z1);
return;
}

// I = (2h)**2
fiat_p256_felem i;
fiat_p256_add(i, h, h);
fiat_p256_square(i, i);

// J = h * I
fiat_p256_felem j;
fiat_p256_mul(j, h, i);

// V = U1 * I
fiat_p256_felem v;
fiat_p256_mul(v, u1, i);

// x_out = r**2 - J - 2V
fiat_p256_square(x_out, r);
fiat_p256_sub(x_out, x_out, j);
fiat_p256_sub(x_out, x_out, v);
fiat_p256_sub(x_out, x_out, v);

// y_out = r(V-x_out) - 2 * s1 * J
fiat_p256_sub(y_out, v, x_out);
fiat_p256_mul(y_out, y_out, r);
fiat_p256_felem s1j;
fiat_p256_mul(s1j, s1, j);
fiat_p256_sub(y_out, y_out, s1j);
fiat_p256_sub(y_out, y_out, s1j);

fiat_p256_cmovznz(x_out, z1nz, x2, x_out);
fiat_p256_cmovznz(x3, z2nz, x1, x_out);
fiat_p256_cmovznz(y_out, z1nz, y2, y_out);
fiat_p256_cmovznz(y3, z2nz, y1, y_out);
fiat_p256_cmovznz(z_out, z1nz, z2, z_out);
fiat_p256_cmovznz(z3, z2nz, z1, z_out);
ec_nistp_point_add(p256_felem_methods(), x3, y3, z3, x1, y1, z1, mixed, x2, y2, z2);
}

#include "./p256_table.h"
Expand Down
Loading
Loading