From 47a765238473d9539779c72abffcf61e1d7dd7ec Mon Sep 17 00:00:00 2001 From: Peter Dettman Date: Tue, 4 Nov 2014 19:16:55 +0700 Subject: [PATCH] Use Co-Z arithmetic for precomputations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Selected Co-Z formulas from "Scalar Multiplication on Weierstraß Elliptic Curves from Co-Z Arithmetic" (Goundar, Joye, et. al.) added as group methods with new type sep256k1_coz_t. - Co-Z methods used for A and G point precomputations. - WINDOW_A size increased to 6 since the precomputation is much faster per-point. - DBLU cost: 3M+4S, ZADDU cost: 5M+2S. - Take advantage of z-ratios from Co-Z to speed up table inversion. --- src/ecmult_impl.h | 33 ++++++++------- src/group.h | 23 ++++++++++ src/group_impl.h | 104 +++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 139 insertions(+), 21 deletions(-) diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h index 345cfae733..dfe99741a5 100644 --- a/src/ecmult_impl.h +++ b/src/ecmult_impl.h @@ -12,7 +12,7 @@ #include "ecmult.h" /* optimal for 128-bit and 256-bit exponents. */ -#define WINDOW_A 5 +#define WINDOW_A 6 /** larger numbers may result in slightly better performance, at the cost of exponentially larger precomputed tables. */ @@ -24,6 +24,9 @@ #define WINDOW_G 16 #endif +/** The number of entries a table with precomputed multiples needs to have. */ +#define ECMULT_TABLE_SIZE(w) (1 << ((w)-2)) + /** Fill a table 'pre' with precomputed odd multiples of a. W determines the size of the table. * pre will contains the values [1*a,3*a,5*a,...,(2^(w-1)-1)*a], so it needs place for * 2^(w-2) entries. @@ -36,28 +39,26 @@ * To compute a*P + b*G, we use the jacobian version for P, and the affine version for G, as * G is constant, so it only needs to be done once in advance. */ -static void secp256k1_ecmult_table_precomp_gej_var(secp256k1_gej_t *pre, const secp256k1_gej_t *a, int w) { - pre[0] = *a; - secp256k1_gej_t d; secp256k1_gej_double_var(&d, &pre[0]); - for (int i=1; i<(1 << (w-2)); i++) - secp256k1_gej_add_var(&pre[i], &d, &pre[i-1]); +static void secp256k1_ecmult_table_precomp_gej_var(secp256k1_gej_t *prej, const secp256k1_gej_t *a, int w) { + secp256k1_coz_t d; secp256k1_coz_dblu_var(&d, &prej[0], a); + secp256k1_fe_t zr; + for (int i=1; iz:b->z */ +static void secp256k1_coz_zaddu_var(secp256k1_gej_t *r, secp256k1_coz_t *ra, secp256k1_fe_t *rzr, + const secp256k1_gej_t *b); + #endif diff --git a/src/group_impl.h b/src/group_impl.h index fef06df289..8aadade0e0 100644 --- a/src/group_impl.h +++ b/src/group_impl.h @@ -13,6 +13,16 @@ #include "field.h" #include "group.h" +/* TODO Consider whether this should be in the API. */ +static void secp256k1_ge_set_gej_zinv(secp256k1_ge_t *r, const secp256k1_gej_t *a, + const secp256k1_fe_t *zi) { + secp256k1_fe_t zi2; secp256k1_fe_sqr(&zi2, zi); + secp256k1_fe_t zi3; secp256k1_fe_mul(&zi3, &zi2, zi); + secp256k1_fe_mul(&r->x, &a->x, &zi2); + secp256k1_fe_mul(&r->y, &a->y, &zi3); + r->infinity = a->infinity; +} + static void secp256k1_ge_set_infinity(secp256k1_ge_t *r) { r->infinity = 1; } @@ -98,16 +108,26 @@ static void secp256k1_ge_set_all_gej_var(size_t len, secp256k1_ge_t r[len], cons for (size_t i=0; i= 0) { + secp256k1_fe_mul(&zi, &zi, &zr[i]); + secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zi); + } +} + static void secp256k1_gej_set_infinity(secp256k1_gej_t *r) { r->infinity = 1; secp256k1_fe_set_int(&r->x, 0); @@ -402,6 +422,80 @@ static void secp256k1_gej_mul_lambda(secp256k1_gej_t *r, const secp256k1_gej_t * } #endif +static void secp256k1_coz_dblu_impl_var(secp256k1_coz_t *r, secp256k1_coz_t *ra, secp256k1_fe_t *rzr, + const secp256k1_gej_t *a) { + secp256k1_fe_t E; secp256k1_fe_sqr(&E, &a->y); + secp256k1_fe_t L; secp256k1_fe_sqr(&L, &E); + secp256k1_fe_t M; secp256k1_fe_sqr(&M, &a->x); secp256k1_fe_mul_int(&M, 3); + secp256k1_fe_t *S = &ra->x; secp256k1_fe_mul(S, &a->x, &E); secp256k1_fe_mul_int(S, 4); + secp256k1_fe_normalize_weak(S); + *rzr = a->y; secp256k1_fe_mul_int(rzr, 2); + secp256k1_fe_t t; secp256k1_fe_negate(&t, S, 1); secp256k1_fe_mul_int(&t, 2); + secp256k1_fe_sqr(&r->x, &M); secp256k1_fe_add(&r->x, &t); + secp256k1_fe_negate(&t, &r->x, 5); secp256k1_fe_add(&t, S); + secp256k1_fe_mul(&r->y, &M, &t); + ra->y = L; secp256k1_fe_mul_int(&ra->y, 8); secp256k1_fe_normalize_weak(&ra->y); + secp256k1_fe_negate(&t, &ra->y, 1); secp256k1_fe_add(&r->y, &t); +} + +static void secp256k1_coz_dblu_var(secp256k1_coz_t *r, secp256k1_gej_t *ra, const secp256k1_gej_t *a) { + ra->infinity = a->infinity; + if (a->infinity) { + return; + } + secp256k1_fe_t zr; + secp256k1_coz_dblu_impl_var(r, (secp256k1_coz_t*)ra, &zr, a); + secp256k1_fe_mul(&ra->z, &a->z, &zr); +} + +static void secp256k1_coz_zaddu_var(secp256k1_gej_t *r, secp256k1_coz_t *ra, secp256k1_fe_t *rzr, + const secp256k1_gej_t *b) { + VERIFY_CHECK(rzr != &r->z); + /* Note that when b is infinity, ra is also infinity per the co-z definition */ + r->infinity = b->infinity; + if (b->infinity) { + secp256k1_fe_set_int(rzr, 0); + return; + } + + secp256k1_fe_t X1 = ra->x; secp256k1_fe_normalize_weak(&X1); + secp256k1_fe_t Y1 = ra->y; secp256k1_fe_normalize_weak(&Y1); + secp256k1_fe_t X2 = b->x; secp256k1_fe_normalize_weak(&X2); + secp256k1_fe_t Y2 = b->y; secp256k1_fe_normalize_weak(&Y2); + + secp256k1_fe_t dX; secp256k1_fe_negate(&dX, &X2, 1); secp256k1_fe_add(&dX, &X1); + secp256k1_fe_t dY; secp256k1_fe_negate(&dY, &Y1, 1); secp256k1_fe_add(&dY, &Y2); + + if (secp256k1_fe_normalizes_to_zero_var(&dX)) { + if (secp256k1_fe_normalizes_to_zero_var(&dY)) { + secp256k1_coz_dblu_impl_var((secp256k1_coz_t*)r, ra, rzr, b); + secp256k1_fe_mul(&r->z, &b->z, rzr); + } else { + r->infinity = 1; + secp256k1_fe_set_int(rzr, 0); + } + return; + } + + secp256k1_fe_t C; secp256k1_fe_sqr(&C, &dX); + secp256k1_fe_t D; secp256k1_fe_sqr(&D, &dY); + + secp256k1_fe_t W1; secp256k1_fe_mul(&W1, &X1, &C); ra->x = W1; + secp256k1_fe_t W2; secp256k1_fe_mul(&W2, &X2, &C); + + secp256k1_fe_negate(&W1, &W1, 1); + secp256k1_fe_negate(&r->x, &W2, 1); secp256k1_fe_add(&r->x, &W1); secp256k1_fe_add(&r->x, &D); + + secp256k1_fe_add(&W2, &W1); + secp256k1_fe_t A1; secp256k1_fe_mul(&A1, &W2, &Y1); secp256k1_fe_negate(&ra->y, &A1, 1); + + r->y = r->x; secp256k1_fe_add(&r->y, &W1); secp256k1_fe_mul(&r->y, &r->y, &dY); + secp256k1_fe_add(&r->y, &A1); + + secp256k1_fe_mul(&r->z, &b->z, &dX); + *rzr = dX; +} + static void secp256k1_ge_start(void) { static const unsigned char secp256k1_ge_consts_g_x[] = { 0x79,0xBE,0x66,0x7E,0xF9,0xDC,0xBB,0xAC,