From 08d97981696babbd6a54da656ff4cd71929d8ea6 Mon Sep 17 00:00:00 2001 From: Peter Dettman Date: Wed, 11 Feb 2015 15:08:12 -0800 Subject: [PATCH] Optionally use Co-Z arithmetic for precomputations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Selected Co-Z formulas from "Scalar Multiplication on Weierstraß Elliptic Curves from Co-Z Arithmetic" (Goundar, Joye, et. al.) added as group methods with new type sep256k1_coz_t. - Co-Z methods used for A and G point precomputations. - DBLU cost: 3M+4S, ZADDU cost: 5M+2S. Original idea and code by Peter Dettman. Refactored by Pieter Wuille. --- .travis.yml | 5 +-- configure.ac | 10 ++++++ src/ecmult_impl.h | 11 +++++++ src/group.h | 21 +++++++++++++ src/group_impl.h | 78 +++++++++++++++++++++++++++++++++++++++++++++++ src/tests.c | 42 +++++++++++++++++++++++++ 6 files changed, 165 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 40f8dae23f..76cb4fa24b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ install: - if [ -n "$EXTRAPACKAGES" ]; then sudo apt-get update && sudo apt-get install --no-install-recommends --no-upgrade $EXTRAPACKAGES; fi env: global: - - FIELD=auto BIGNUM=auto SCALAR=auto ENDOMORPHISM=no ASM=no BUILD=check EXTRAFLAGS= HOST= EXTRAPACKAGES= + - FIELD=auto BIGNUM=auto SCALAR=auto ENDOMORPHISM=no ASM=no COZ=no BUILD=check EXTRAFLAGS= HOST= EXTRAPACKAGES= matrix: - SCALAR=32bit - SCALAR=64bit @@ -16,6 +16,7 @@ env: - FIELD=64bit ENDOMORPHISM=yes - FIELD=64bit ASM=x86_64 - FIELD=64bit ENDOMORPHISM=yes ASM=x86_64 + - FIELD=64bit ENDOMORPHISM=yes ASM=x86_64 COZ=yes - FIELD=32bit - FIELD=32bit ENDOMORPHISM=yes - BIGNUM=no @@ -28,5 +29,5 @@ before_script: ./autogen.sh script: - if [ -n "$HOST" ]; then export USE_HOST="--host=$HOST"; fi - if [ "x$HOST" = "xi686-linux-gnu" ]; then export CC="$CC -m32"; fi - - ./configure --enable-endomorphism=$ENDOMORPHISM --with-field=$FIELD --with-bignum=$BIGNUM --with-scalar=$SCALAR $EXTRAFLAGS $USE_HOST && make -j2 $BUILD + - ./configure --enable-endomorphism=$ENDOMORPHISM --enable-coz=$COZ --with-field=$FIELD --with-bignum=$BIGNUM --with-scalar=$SCALAR $EXTRAFLAGS $USE_HOST && make -j2 $BUILD os: linux diff --git a/configure.ac b/configure.ac index 3dc1829516..2e63a7b394 100644 --- a/configure.ac +++ b/configure.ac @@ -96,6 +96,11 @@ AC_ARG_ENABLE(endomorphism, [use_endomorphism=$enableval], [use_endomorphism=no]) +AC_ARG_ENABLE(coz, + AS_HELP_STRING([--enable-coz],[enable co-z precomputation (default is no)]), + [use_coz=$enableval], + [use_coz=no]) + AC_ARG_WITH([field], [AS_HELP_STRING([--with-field=64bit|32bit|auto], [Specify Field Implementation. Default is auto])],[req_field=$withval], [req_field=auto]) @@ -307,11 +312,16 @@ fi AC_C_BIGENDIAN() +if test x"$use_coz" = x"yes"; then + AC_DEFINE(USE_COZ, 1, [Define this symbol to use co-z optimization]) +fi + AC_MSG_NOTICE([Using assembly optimizations: $set_asm]) AC_MSG_NOTICE([Using field implementation: $set_field]) AC_MSG_NOTICE([Using bignum implementation: $set_bignum]) AC_MSG_NOTICE([Using scalar implementation: $set_scalar]) AC_MSG_NOTICE([Using endomorphism optimizations: $use_endomorphism]) +AC_MSG_NOTICE([Using co-z precomputation optimization: $use_coz]) AC_CONFIG_HEADERS([src/libsecp256k1-config.h]) AC_CONFIG_FILES([Makefile libsecp256k1.pc]) diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h index 813be6d184..5c02d10e0b 100644 --- a/src/ecmult_impl.h +++ b/src/ecmult_impl.h @@ -33,6 +33,16 @@ * Prej's Z values are undefined, except for the last value. */ static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej_t *prej, secp256k1_fe_t *zr, const secp256k1_gej_t *a) { +#ifdef USE_COZ + secp256k1_coz_t d; + int i; + + VERIFY_CHECK(!a->infinity); + + secp256k1_coz_dblu_var(&d, &prej[0], a, &zr[0]); + for (i = 1; i < n; i++) + secp256k1_coz_zaddu_var(&prej[i], &d, &zr[i], &prej[i-1]); +#else secp256k1_gej_t d; secp256k1_ge_t a_ge, d_ge; int i; @@ -64,6 +74,7 @@ static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej_t *prej, s * the final point's z coordinate is actually used though, so just update that. */ secp256k1_fe_mul(&prej[n-1].z, &prej[n-1].z, &d.z); +#endif } /** Fill a table 'pre' with precomputed odd multiples of a. diff --git a/src/group.h b/src/group.h index 396cfb948f..a50c4e8413 100644 --- a/src/group.h +++ b/src/group.h @@ -25,6 +25,17 @@ typedef struct { int infinity; /* whether this represents the point at infinity */ } secp256k1_gej_t; +#ifdef USE_COZ +/** A group element of the secp256k1 curve, with an implicit z coordinate (and infinity flag). + * An instance of secp256k1_coz_t is always "co-z" with some instance of secp256k1_gej_t, from + * which it inherits its implied z coordinate and infinity flag. */ +typedef struct { + secp256k1_fe_t x; /* actual X: x/z^2 (z implied) */ + secp256k1_fe_t y; /* actual Y: y/z^3 (z implied) */ +} secp256k1_coz_t; +#endif + +/** Global constants related to the group */ typedef struct { secp256k1_fe_storage_t x; secp256k1_fe_storage_t y; @@ -127,4 +138,14 @@ static void secp256k1_ge_from_storage(secp256k1_ge_t *r, const secp256k1_ge_stor /** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. */ static void secp256k1_ge_storage_cmov(secp256k1_ge_storage_t *r, const secp256k1_ge_storage_t *a, int flag); +#ifdef USE_COZ +/** Set r equal to the double of a, and ra equal to a, such that r is co-z with ra. rzr + * returns the ratio ra->z:a->z. */ +static void secp256k1_coz_dblu_var(secp256k1_coz_t *r, secp256k1_gej_t *ra, const secp256k1_gej_t *a, secp256k1_fe_t *rzr); + +/** Set r equal to the sum of ra and b. ra is initially co-z with b and finally co-z with r. rzr + returns the ratio r->z:b->z */ +static void secp256k1_coz_zaddu_var(secp256k1_gej_t *r, secp256k1_coz_t *ra, secp256k1_fe_t *rzr, const secp256k1_gej_t *b); +#endif + #endif diff --git a/src/group_impl.h b/src/group_impl.h index 2be61d5b81..39365b7981 100644 --- a/src/group_impl.h +++ b/src/group_impl.h @@ -571,6 +571,84 @@ static SECP256K1_INLINE void secp256k1_ge_storage_cmov(secp256k1_ge_storage_t *r secp256k1_fe_storage_cmov(&r->y, &a->y, flag); } +#ifdef USE_COZ +static void secp256k1_coz_dblu_impl_var(secp256k1_coz_t *r, secp256k1_coz_t *ra, secp256k1_fe_t *rzr, const secp256k1_gej_t *a) { + /* 2 mul, 4 sqr, 3 normalize, 11 mul_int/add/negate */ + secp256k1_fe_t E, L, M, *S, t; + secp256k1_fe_sqr(&E, &a->y); /* E = y1^2 */ + secp256k1_fe_sqr(&L, &E); /* L = y1^4 */ + secp256k1_fe_sqr(&M, &a->x); secp256k1_fe_mul_int(&M, 3); /* M = 3*x1^2 */ + S = &ra->x; secp256k1_fe_mul(S, &a->x, &E); secp256k1_fe_mul_int(S, 4); /* x3 = S = 4*x1*y1^2 */ + secp256k1_fe_normalize_weak(S); + *rzr = a->y; secp256k1_fe_normalize_weak(rzr); secp256k1_fe_mul_int(rzr, 2); /* z2 = 2*y1*z1 (implicitly) */ + secp256k1_fe_negate(&t, S, 1); secp256k1_fe_mul_int(&t, 2); /* t = -8*x1*y1^2 */ + secp256k1_fe_sqr(&r->x, &M); secp256k1_fe_add(&r->x, &t); /* x2 = 9*x1^4 - 8*x1*y1^2 */ + secp256k1_fe_negate(&t, &r->x, 5); secp256k1_fe_add(&t, S); /* t = 12*x1*y1^2 - 9*x1^4 */ + secp256k1_fe_mul(&r->y, &M, &t); /* y2 = 36*x1^3*y1^2 - 27*x1^6 */ + ra->y = L; secp256k1_fe_mul_int(&ra->y, 8); secp256k1_fe_normalize_weak(&ra->y); /* y3 = 8*y1^4 */ + secp256k1_fe_negate(&t, &ra->y, 1); secp256k1_fe_add(&r->y, &t); /* y2 = 36*x1^3*y1^2 - 27*x1^6 - 8*y1^4 */ +} + +static void secp256k1_coz_dblu_var(secp256k1_coz_t *r, secp256k1_gej_t *ra, const secp256k1_gej_t *a, secp256k1_fe_t *rzr) { + ra->infinity = a->infinity; + if (a->infinity) { + return; + } + secp256k1_coz_dblu_impl_var(r, (secp256k1_coz_t*)ra, rzr, a); + secp256k1_fe_mul(&ra->z, &a->z, rzr); +} + +static void secp256k1_coz_zaddu_var(secp256k1_gej_t *r, secp256k1_coz_t *ra, secp256k1_fe_t *rzr, const secp256k1_gej_t *b) { + /* 5 mul, 2 sqr, 6 normalize, 12 add/negate/mul_int */ + secp256k1_fe_t X1, Y1, X2, Y2, dX, dY, C, D, W1, W2, A1; + + VERIFY_CHECK(rzr != &r->z); + /* Note that when b is infinity, ra is also infinity per the co-z definition */ + r->infinity = b->infinity; + if (b->infinity) { + secp256k1_fe_set_int(rzr, 0); + return; + } + + X1 = ra->x; secp256k1_fe_normalize_weak(&X1); + Y1 = ra->y; secp256k1_fe_normalize_weak(&Y1); + X2 = b->x; secp256k1_fe_normalize_weak(&X2); + Y2 = b->y; secp256k1_fe_normalize_weak(&Y2); + + secp256k1_fe_negate(&dX, &X2, 1); secp256k1_fe_add(&dX, &X1); + secp256k1_fe_negate(&dY, &Y1, 1); secp256k1_fe_add(&dY, &Y2); + + if (secp256k1_fe_normalizes_to_zero_var(&dX)) { + if (secp256k1_fe_normalizes_to_zero_var(&dY)) { + secp256k1_coz_dblu_impl_var((secp256k1_coz_t*)r, ra, rzr, b); + secp256k1_fe_mul(&r->z, &b->z, rzr); + } else { + r->infinity = 1; + secp256k1_fe_set_int(rzr, 0); + } + return; + } + + secp256k1_fe_sqr(&C, &dX); + secp256k1_fe_sqr(&D, &dY); + + secp256k1_fe_mul(&W1, &X1, &C); ra->x = W1; + secp256k1_fe_mul(&W2, &X2, &C); + + secp256k1_fe_negate(&W1, &W1, 1); + secp256k1_fe_negate(&r->x, &W2, 1); secp256k1_fe_add(&r->x, &W1); secp256k1_fe_add(&r->x, &D); + + secp256k1_fe_add(&W2, &W1); + secp256k1_fe_mul(&A1, &W2, &Y1); secp256k1_fe_negate(&ra->y, &A1, 1); + + r->y = r->x; secp256k1_fe_add(&r->y, &W1); secp256k1_fe_mul(&r->y, &r->y, &dY); + secp256k1_fe_add(&r->y, &A1); + + secp256k1_fe_mul(&r->z, &b->z, &dX); + *rzr = dX; +} +#endif + #ifdef USE_ENDOMORPHISM static void secp256k1_ge_mul_lambda(secp256k1_ge_t *r, const secp256k1_ge_t *a) { static const secp256k1_fe_t beta = SECP256K1_FE_CONST( diff --git a/src/tests.c b/src/tests.c index 9ff182bc0d..806eb28212 100644 --- a/src/tests.c +++ b/src/tests.c @@ -970,6 +970,29 @@ void test_ge(void) { ge_equals_gej(&ref, &resj); } +#ifdef USE_COZ + /* Test Co-Z gej + ge. */ + if ((i1 == 0) == (i2 == 0)) { + /* ra is initially co-z with b (=gej[i2]). */ + secp256k1_coz_t ra; + secp256k1_fe_t zr2; + secp256k1_fe_mul(&ra.x, &ge[i1].x, &gej[i2].z); secp256k1_fe_mul(&ra.x, &ra.x, &gej[i2].z); + secp256k1_fe_mul(&ra.y, &ge[i1].y, &gej[i2].z); secp256k1_fe_mul(&ra.y, &ra.y, &gej[i2].z); secp256k1_fe_mul(&ra.y, &ra.y, &gej[i2].z); + secp256k1_coz_zaddu_var(&resj, &ra, &zr2, &gej[i2]); + ge_equals_gej(&ref, &resj); /* Check sum */ + if (!secp256k1_gej_is_infinity(&resj)) { + /* Check that ra still represents the same point, but now Co-Z with r. */ + secp256k1_gej_t ra2; + secp256k1_fe_t zz; + ra2.x = ra.x; ra2.y = ra.y; ra2.z = resj.z; ra2.infinity = resj.infinity; + ge_equals_gej(&ge[i1], &ra2); + /* Check that zr * b.z = r.z */ + secp256k1_fe_mul(&zz, &gej[i2].z, &zr2); + CHECK(secp256k1_fe_equal_var(&zz, &resj.z)); + } + } +#endif + /* Test gej + ge (const). */ if (i2 != 0) { /* secp256k1_gej_add_ge does not support its second argument being infinity. */ @@ -980,6 +1003,10 @@ void test_ge(void) { /* Test doubling (var). */ if ((i1 == 0 && i2 == 0) || ((i1 + 3)/4 == (i2 + 3)/4 && ((i1 + 3)%4)/2 == ((i2 + 3)%4)/2)) { secp256k1_fe_t zr2; +#ifdef USE_COZ + secp256k1_gej_t ra; + secp256k1_coz_t r; +#endif /* Normal doubling with Z ratio result. */ secp256k1_gej_double_var(&resj, &gej[i1], &zr2); ge_equals_gej(&ref, &resj); @@ -989,6 +1016,21 @@ void test_ge(void) { /* Normal doubling. */ secp256k1_gej_double_var(&resj, &gej[i2], NULL); ge_equals_gej(&ref, &resj); +#ifdef USE_COZ + /* Co-Z doubling with Z ratio result. */ + secp256k1_coz_dblu_var(&r, &ra, &gej[i1], &zr2); + resj.x = r.x; resj.y = r.y; resj.z = ra.z; resj.infinity = ra.infinity; + ge_equals_gej(&ref, &resj); + ge_equals_gej(&ge[i1], &ra); + /* Check Z ratio. */ + secp256k1_fe_mul(&zr2, &zr2, &gej[i1].z); + CHECK(resj.infinity || secp256k1_fe_equal_var(&zr2, &ra.z)); + /* Co-Z doubling. */ + secp256k1_coz_dblu_var(&r, &ra, &gej[i2], &zr2); + resj.x = r.x; resj.y = r.y; resj.z = ra.z; resj.infinity = ra.infinity; + ge_equals_gej(&ref, &resj); + ge_equals_gej(&ge[i2], &ra); +#endif } /* Test adding opposites. */