Skip to content

Commit

Permalink
[EC] Use s2n-bignum point doubling for P-384 and P-521 (#2011)
Browse files Browse the repository at this point in the history
Use s2n-bignum point doubling for P-384 and P-521.
This brings the following performance improvements:
```
----|---- P-384 ---| before | after | speedup |
    | ECDH         |  4710  |  5256 |  1.11x  |
 M1 | ECDSA sign   | 12925  | 13322 |  1.03x  |
    | ECDSA verify |  5358  |  5884 |  1.10x  |
----|--------------|--------|-------|---------|
    | ECDH         |  3569  |  3822 |  1.07x  |
GV3 | ECDSA sign   |  9938  |  9915 |  1.00x  |
    | ECDSA verify |  4156  |  4466 |  1.07x  |
----|--------------|--------|-------|---------|
    | ECDH         |  4374  |  4777 |  1.09x  |
x86 | ECDSA sign   | 11982  | 12110 |  1.01x  |
    | ECDSA verify |  5102  |  5693 |  1.11x  |
----|--------------|--------|-------|---------|


----|---- P-521 ---| before | after | speedup |
    | ECDH         |  3406  |  3623 |  1.06x  |
 M1 | ECDSA sign   |  7060  |  7097 |  1.00x  |
    | ECDSA verify |  3199  |  3269 |  1.02x  |
----|--------------|--------|-------|---------|
    | ECDH         |  2297  |  2457 |  1.07x  |
GV3 | ECDSA sign   |  5163  |  5188 |  1.00x  |
    | ECDSA verify |  2346  |  2512 |  1.07x  |
----|--------------|--------|-------|---------|
    | ECDH         |  2981  |  3272 |  1.10x  |
x86 | ECDSA sign   |  6550  |  6575 |  1.00x  |
    | ECDSA verify |  3121  |  3352 |  1.07x  |
----|--------------|--------|-------|---------|
```
The x86 processor is Intel(R) Xeon(R) Platinum 8488C.
  • Loading branch information
dkostic authored Nov 27, 2024
1 parent de9891a commit d1a4768
Show file tree
Hide file tree
Showing 8 changed files with 1,481 additions and 1,731 deletions.
4 changes: 4 additions & 0 deletions crypto/fipsmodule/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ if((((ARCH STREQUAL "x86_64") AND NOT MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) OR
p384/bignum_montsqr_p384_alt.S
p384/bignum_nonzero_6.S
p384/bignum_littleendian_6.S
p384/p384_montjdouble.S
p384/p384_montjdouble_alt.S

p521/bignum_add_p521.S
p521/bignum_sub_p521.S
Expand All @@ -226,6 +228,8 @@ if((((ARCH STREQUAL "x86_64") AND NOT MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) OR
p521/bignum_sqr_p521_alt.S
p521/bignum_tolebytes_p521.S
p521/bignum_fromlebytes_p521.S
p521/p521_jdouble.S
p521/p521_jdouble_alt.S

curve25519/bignum_mod_n25519.S
curve25519/bignum_neg_p25519.S
Expand Down
22 changes: 22 additions & 0 deletions crypto/fipsmodule/ec/ec_nistp.c
Original file line number Diff line number Diff line change
Expand Up @@ -769,3 +769,25 @@ void ec_nistp_scalar_mul_public(const ec_nistp_meth *ctx,
}
}
}

void ec_nistp_point_to_coordinates(ec_nistp_felem_limb *x_out,
ec_nistp_felem_limb *y_out,
ec_nistp_felem_limb *z_out,
const ec_nistp_felem_limb *xyz_in,
size_t num_limbs_per_coord) {
size_t num_bytes_per_coord = num_limbs_per_coord * sizeof(ec_nistp_felem_limb);
OPENSSL_memcpy(x_out, xyz_in, num_bytes_per_coord);
OPENSSL_memcpy(y_out, &xyz_in[num_limbs_per_coord], num_bytes_per_coord);
OPENSSL_memcpy(z_out, &xyz_in[num_limbs_per_coord * 2], num_bytes_per_coord);
}

void ec_nistp_coordinates_to_point(ec_nistp_felem_limb *xyz_out,
const ec_nistp_felem_limb *x_in,
const ec_nistp_felem_limb *y_in,
const ec_nistp_felem_limb *z_in,
size_t num_limbs_per_coord) {
size_t num_bytes_per_coord = num_limbs_per_coord * sizeof(ec_nistp_felem_limb);
OPENSSL_memcpy(xyz_out, x_in, num_bytes_per_coord);
OPENSSL_memcpy(&xyz_out[num_limbs_per_coord], y_in, num_bytes_per_coord);
OPENSSL_memcpy(&xyz_out[num_limbs_per_coord * 2], z_in, num_bytes_per_coord);
}
12 changes: 12 additions & 0 deletions crypto/fipsmodule/ec/ec_nistp.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,5 +124,17 @@ void ec_nistp_scalar_mul_public(const ec_nistp_meth *ctx,
const ec_nistp_felem_limb *y_p,
const ec_nistp_felem_limb *z_p,
const EC_SCALAR *p_scalar);

void ec_nistp_point_to_coordinates(ec_nistp_felem_limb *x_out,
ec_nistp_felem_limb *y_out,
ec_nistp_felem_limb *z_out,
const ec_nistp_felem_limb *xyz_in,
size_t num_limbs_per_coord);

void ec_nistp_coordinates_to_point(ec_nistp_felem_limb *xyz_out,
const ec_nistp_felem_limb *x_in,
const ec_nistp_felem_limb *y_in,
const ec_nistp_felem_limb *z_in,
size_t num_limbs_per_coord);
#endif // EC_NISTP_H

8 changes: 8 additions & 0 deletions crypto/fipsmodule/ec/p384.c
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,15 @@ static void p384_point_double(p384_felem x_out,
const p384_felem x_in,
const p384_felem y_in,
const p384_felem z_in) {
#if defined(EC_NISTP_USE_S2N_BIGNUM)
ec_nistp_felem_limb in[P384_NLIMBS * 3];
ec_nistp_felem_limb out[P384_NLIMBS * 3];
ec_nistp_coordinates_to_point(in, x_in, y_in, z_in, P384_NLIMBS);
p384_montjdouble_selector(out, in);
ec_nistp_point_to_coordinates(x_out, y_out, z_out, out, P384_NLIMBS);
#else
ec_nistp_point_double(p384_methods(), x_out, y_out, z_out, x_in, y_in, z_in);
#endif
}

// p384_point_add calculates (x1, y1, z1) + (x2, y2, z2)
Expand Down
8 changes: 8 additions & 0 deletions crypto/fipsmodule/ec/p521.c
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,15 @@ static void p521_point_double(p521_felem x_out,
const p521_felem x_in,
const p521_felem y_in,
const p521_felem z_in) {
#if defined(EC_NISTP_USE_S2N_BIGNUM)
ec_nistp_felem_limb in[P521_NLIMBS * 3];
ec_nistp_felem_limb out[P521_NLIMBS * 3];
ec_nistp_coordinates_to_point(in, x_in, y_in, z_in, P521_NLIMBS);
p521_jdouble_selector(out, in);
ec_nistp_point_to_coordinates(x_out, y_out, z_out, out, P521_NLIMBS);
#else
ec_nistp_point_double(p521_methods(), x_out, y_out, z_out, x_in, y_in, z_in);
#endif
}

// p521_point_add calculates (x1, y1, z1) + (x2, y2, z2)
Expand Down
13 changes: 13 additions & 0 deletions third_party/s2n-bignum/include/s2n-bignum_aws-lc.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ static inline void bignum_tomont_p384_selector(uint64_t z[S2N_BIGNUM_STATIC 6],
if (use_s2n_bignum_alt()) { bignum_tomont_p384_alt(z, x); }
else { bignum_tomont_p384(z, x); }
}
extern void p384_montjdouble(uint64_t p3[S2N_BIGNUM_STATIC 18],uint64_t p1[S2N_BIGNUM_STATIC 18]);
extern void p384_montjdouble_alt(uint64_t p3[S2N_BIGNUM_STATIC 18],uint64_t p1[S2N_BIGNUM_STATIC 18]);
static inline void p384_montjdouble_selector(uint64_t p3[S2N_BIGNUM_STATIC 18],uint64_t p1[S2N_BIGNUM_STATIC 18]) {
if (use_s2n_bignum_alt()) { p384_montjdouble_alt(p3, p1); }
else { p384_montjdouble(p3, p1); }
}

// Convert 6-digit (384-bit) bignum from little-endian form
// Input x[6]; output z[6]
Expand Down Expand Up @@ -152,6 +158,13 @@ extern void bignum_fromlebytes_p521(uint64_t z[S2N_BIGNUM_STATIC 9], const uint8
// Convert 9-digit 528-bit bignum to little-endian bytes
extern void bignum_tolebytes_p521(uint8_t z[S2N_BIGNUM_STATIC 66], const uint64_t x[S2N_BIGNUM_STATIC 9]);

extern void p521_jdouble(uint64_t p3[static 27],uint64_t p1[static 27]);
extern void p521_jdouble_alt(uint64_t p3[static 27],uint64_t p1[static 27]);
static inline void p521_jdouble_selector(uint64_t p3[S2N_BIGNUM_STATIC 27],uint64_t p1[S2N_BIGNUM_STATIC 27]) {
if (use_s2n_bignum_alt()) { p521_jdouble_alt(p3, p1); }
else { p521_jdouble(p3, p1); }
}

// curve25519_x25519_byte and curve25519_x25519_byte_alt computes the x25519
// function specified in https://www.rfc-editor.org/rfc/rfc7748. |scalar| is the
// scalar, |point| is the u-coordinate of the elliptic curve
Expand Down
1 change: 1 addition & 0 deletions util/fipstools/delocate/delocate.peg
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ RegisterOrConstant <- (('%'[[A-Z]][[A-Z0-9]]*) /
('#' '~'? '(' [0-9] WS? "<<" WS? [0-9] [0-9]? ')' ) /
(('#' / '$') '~'? '0x'? [[0-9A-F]]+ ) /
('$(-' [0-9]+ ')') /
('#(' [0-9]+ ')') /
ARMRegister)
![fb:(+\-]
ARMConstantTweak <- ((([us] "xt" [xwhb]) / "lsl" / "lsr" / "ror" / "asr") (WS '#'? Offset)?)/
Expand Down
Loading

0 comments on commit d1a4768

Please sign in to comment.