From 08d97981696babbd6a54da656ff4cd71929d8ea6 Mon Sep 17 00:00:00 2001
From: Peter Dettman <peter.dettman@gmail.com>
Date: Wed, 11 Feb 2015 15:08:12 -0800
Subject: [PATCH]  Optionally use Co-Z arithmetic for precomputations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Selected Co-Z formulas from "Scalar Multiplication on Weierstraß Elliptic Curves from Co-Z Arithmetic" (Goundar, Joye, et. al.) added as group methods with new type sep256k1_coz_t.
- Co-Z methods used for A and G point precomputations.
- DBLU cost: 3M+4S, ZADDU cost: 5M+2S.

Original idea and code by Peter Dettman. Refactored by Pieter Wuille.
---
 .travis.yml       |  5 +--
 configure.ac      | 10 ++++++
 src/ecmult_impl.h | 11 +++++++
 src/group.h       | 21 +++++++++++++
 src/group_impl.h  | 78 +++++++++++++++++++++++++++++++++++++++++++++++
 src/tests.c       | 42 +++++++++++++++++++++++++
 6 files changed, 165 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 40f8dae23f..76cb4fa24b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -8,7 +8,7 @@ install:
   - if [ -n "$EXTRAPACKAGES" ]; then sudo apt-get update && sudo apt-get install --no-install-recommends --no-upgrade $EXTRAPACKAGES; fi
 env:
   global:
-    - FIELD=auto  BIGNUM=auto  SCALAR=auto  ENDOMORPHISM=no  ASM=no  BUILD=check  EXTRAFLAGS= HOST= EXTRAPACKAGES=
+    - FIELD=auto  BIGNUM=auto  SCALAR=auto  ENDOMORPHISM=no  ASM=no  COZ=no  BUILD=check  EXTRAFLAGS= HOST= EXTRAPACKAGES=
   matrix:
     - SCALAR=32bit
     - SCALAR=64bit
@@ -16,6 +16,7 @@ env:
     - FIELD=64bit     ENDOMORPHISM=yes
     - FIELD=64bit                       ASM=x86_64
     - FIELD=64bit     ENDOMORPHISM=yes  ASM=x86_64
+    - FIELD=64bit     ENDOMORPHISM=yes  ASM=x86_64  COZ=yes
     - FIELD=32bit
     - FIELD=32bit     ENDOMORPHISM=yes
     - BIGNUM=no
@@ -28,5 +29,5 @@ before_script: ./autogen.sh
 script:
  - if [ -n "$HOST" ]; then export USE_HOST="--host=$HOST"; fi
  - if [ "x$HOST" = "xi686-linux-gnu" ]; then export CC="$CC -m32"; fi
- - ./configure --enable-endomorphism=$ENDOMORPHISM --with-field=$FIELD --with-bignum=$BIGNUM --with-scalar=$SCALAR $EXTRAFLAGS $USE_HOST && make -j2 $BUILD
+ - ./configure --enable-endomorphism=$ENDOMORPHISM --enable-coz=$COZ --with-field=$FIELD --with-bignum=$BIGNUM --with-scalar=$SCALAR $EXTRAFLAGS $USE_HOST && make -j2 $BUILD
 os: linux
diff --git a/configure.ac b/configure.ac
index 3dc1829516..2e63a7b394 100644
--- a/configure.ac
+++ b/configure.ac
@@ -96,6 +96,11 @@ AC_ARG_ENABLE(endomorphism,
     [use_endomorphism=$enableval],
     [use_endomorphism=no])
 
+AC_ARG_ENABLE(coz,
+    AS_HELP_STRING([--enable-coz],[enable co-z precomputation (default is no)]),
+    [use_coz=$enableval],
+    [use_coz=no])
+
 AC_ARG_WITH([field], [AS_HELP_STRING([--with-field=64bit|32bit|auto],
 [Specify Field Implementation. Default is auto])],[req_field=$withval], [req_field=auto])
 
@@ -307,11 +312,16 @@ fi
 
 AC_C_BIGENDIAN()
 
+if test x"$use_coz" = x"yes"; then
+  AC_DEFINE(USE_COZ, 1, [Define this symbol to use co-z optimization])
+fi
+
 AC_MSG_NOTICE([Using assembly optimizations: $set_asm])
 AC_MSG_NOTICE([Using field implementation: $set_field])
 AC_MSG_NOTICE([Using bignum implementation: $set_bignum])
 AC_MSG_NOTICE([Using scalar implementation: $set_scalar])
 AC_MSG_NOTICE([Using endomorphism optimizations: $use_endomorphism])
+AC_MSG_NOTICE([Using co-z precomputation optimization: $use_coz])
 
 AC_CONFIG_HEADERS([src/libsecp256k1-config.h])
 AC_CONFIG_FILES([Makefile libsecp256k1.pc])
diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
index 813be6d184..5c02d10e0b 100644
--- a/src/ecmult_impl.h
+++ b/src/ecmult_impl.h
@@ -33,6 +33,16 @@
  *  Prej's Z values are undefined, except for the last value.
  */
 static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej_t *prej, secp256k1_fe_t *zr, const secp256k1_gej_t *a) {
+#ifdef USE_COZ
+    secp256k1_coz_t d;
+    int i;
+
+    VERIFY_CHECK(!a->infinity);
+
+    secp256k1_coz_dblu_var(&d, &prej[0], a, &zr[0]);
+    for (i = 1; i < n; i++)
+        secp256k1_coz_zaddu_var(&prej[i], &d, &zr[i], &prej[i-1]);
+#else
     secp256k1_gej_t d;
     secp256k1_ge_t a_ge, d_ge;
     int i;
@@ -64,6 +74,7 @@ static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej_t *prej, s
      * the final point's z coordinate is actually used though, so just update that.
      */
     secp256k1_fe_mul(&prej[n-1].z, &prej[n-1].z, &d.z);
+#endif
 }
 
 /** Fill a table 'pre' with precomputed odd multiples of a.
diff --git a/src/group.h b/src/group.h
index 396cfb948f..a50c4e8413 100644
--- a/src/group.h
+++ b/src/group.h
@@ -25,6 +25,17 @@ typedef struct {
     int infinity; /* whether this represents the point at infinity */
 } secp256k1_gej_t;
 
+#ifdef USE_COZ
+/** A group element of the secp256k1 curve, with an implicit z coordinate (and infinity flag).
+ *  An instance of secp256k1_coz_t is always "co-z" with some instance of secp256k1_gej_t, from
+ *  which it inherits its implied z coordinate and infinity flag. */
+typedef struct {
+    secp256k1_fe_t x; /* actual X: x/z^2 (z implied) */
+    secp256k1_fe_t y; /* actual Y: y/z^3 (z implied) */
+} secp256k1_coz_t;
+#endif
+
+/** Global constants related to the group */
 typedef struct {
     secp256k1_fe_storage_t x;
     secp256k1_fe_storage_t y;
@@ -127,4 +138,14 @@ static void secp256k1_ge_from_storage(secp256k1_ge_t *r, const secp256k1_ge_stor
 /** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. */
 static void secp256k1_ge_storage_cmov(secp256k1_ge_storage_t *r, const secp256k1_ge_storage_t *a, int flag);
 
+#ifdef USE_COZ
+/** Set r equal to the double of a, and ra equal to a, such that r is co-z with ra. rzr
+ *  returns the ratio ra->z:a->z. */
+static void secp256k1_coz_dblu_var(secp256k1_coz_t *r, secp256k1_gej_t *ra, const secp256k1_gej_t *a, secp256k1_fe_t *rzr);
+
+/** Set r equal to the sum of ra and b. ra is initially co-z with b and finally co-z with r. rzr
+    returns the ratio r->z:b->z */
+static void secp256k1_coz_zaddu_var(secp256k1_gej_t *r, secp256k1_coz_t *ra, secp256k1_fe_t *rzr, const secp256k1_gej_t *b);
+#endif
+
 #endif
diff --git a/src/group_impl.h b/src/group_impl.h
index 2be61d5b81..39365b7981 100644
--- a/src/group_impl.h
+++ b/src/group_impl.h
@@ -571,6 +571,84 @@ static SECP256K1_INLINE void secp256k1_ge_storage_cmov(secp256k1_ge_storage_t *r
     secp256k1_fe_storage_cmov(&r->y, &a->y, flag);
 }
 
+#ifdef USE_COZ
+static void secp256k1_coz_dblu_impl_var(secp256k1_coz_t *r, secp256k1_coz_t *ra, secp256k1_fe_t *rzr, const secp256k1_gej_t *a) {
+    /* 2 mul, 4 sqr, 3 normalize, 11 mul_int/add/negate */
+    secp256k1_fe_t E, L, M, *S, t;
+    secp256k1_fe_sqr(&E, &a->y); /* E = y1^2 */
+    secp256k1_fe_sqr(&L, &E); /* L = y1^4 */
+    secp256k1_fe_sqr(&M, &a->x); secp256k1_fe_mul_int(&M, 3); /* M = 3*x1^2 */
+    S = &ra->x; secp256k1_fe_mul(S, &a->x, &E); secp256k1_fe_mul_int(S, 4); /* x3 = S = 4*x1*y1^2 */
+    secp256k1_fe_normalize_weak(S);
+    *rzr = a->y; secp256k1_fe_normalize_weak(rzr); secp256k1_fe_mul_int(rzr, 2); /* z2 = 2*y1*z1 (implicitly) */
+    secp256k1_fe_negate(&t, S, 1); secp256k1_fe_mul_int(&t, 2); /* t = -8*x1*y1^2 */
+    secp256k1_fe_sqr(&r->x, &M); secp256k1_fe_add(&r->x, &t); /* x2 = 9*x1^4 - 8*x1*y1^2 */
+    secp256k1_fe_negate(&t, &r->x, 5); secp256k1_fe_add(&t, S); /* t = 12*x1*y1^2 - 9*x1^4 */
+    secp256k1_fe_mul(&r->y, &M, &t); /* y2 = 36*x1^3*y1^2 - 27*x1^6 */
+    ra->y = L; secp256k1_fe_mul_int(&ra->y, 8); secp256k1_fe_normalize_weak(&ra->y); /* y3 = 8*y1^4 */
+    secp256k1_fe_negate(&t, &ra->y, 1); secp256k1_fe_add(&r->y, &t); /* y2 = 36*x1^3*y1^2 - 27*x1^6 - 8*y1^4 */
+}
+
+static void secp256k1_coz_dblu_var(secp256k1_coz_t *r, secp256k1_gej_t *ra, const secp256k1_gej_t *a, secp256k1_fe_t *rzr) {
+    ra->infinity = a->infinity;
+    if (a->infinity) {
+        return;
+    }
+    secp256k1_coz_dblu_impl_var(r, (secp256k1_coz_t*)ra, rzr, a);
+    secp256k1_fe_mul(&ra->z, &a->z, rzr);
+}
+
+static void secp256k1_coz_zaddu_var(secp256k1_gej_t *r, secp256k1_coz_t *ra, secp256k1_fe_t *rzr, const secp256k1_gej_t *b) {
+    /* 5 mul, 2 sqr, 6 normalize, 12 add/negate/mul_int */
+    secp256k1_fe_t X1, Y1, X2, Y2, dX, dY, C, D, W1, W2, A1;
+
+    VERIFY_CHECK(rzr != &r->z);
+    /* Note that when b is infinity, ra is also infinity per the co-z definition */
+    r->infinity = b->infinity;
+    if (b->infinity) {
+        secp256k1_fe_set_int(rzr, 0);
+        return;
+    }
+
+    X1 = ra->x; secp256k1_fe_normalize_weak(&X1);
+    Y1 = ra->y; secp256k1_fe_normalize_weak(&Y1);
+    X2 = b->x; secp256k1_fe_normalize_weak(&X2);
+    Y2 = b->y; secp256k1_fe_normalize_weak(&Y2);
+
+    secp256k1_fe_negate(&dX, &X2, 1); secp256k1_fe_add(&dX, &X1);
+    secp256k1_fe_negate(&dY, &Y1, 1); secp256k1_fe_add(&dY, &Y2);
+
+    if (secp256k1_fe_normalizes_to_zero_var(&dX)) {
+        if (secp256k1_fe_normalizes_to_zero_var(&dY)) {
+            secp256k1_coz_dblu_impl_var((secp256k1_coz_t*)r, ra, rzr, b);
+            secp256k1_fe_mul(&r->z, &b->z, rzr);
+        } else {
+            r->infinity = 1;
+            secp256k1_fe_set_int(rzr, 0);
+        }
+        return;
+    }
+
+    secp256k1_fe_sqr(&C, &dX);
+    secp256k1_fe_sqr(&D, &dY);
+
+    secp256k1_fe_mul(&W1, &X1, &C); ra->x = W1;
+    secp256k1_fe_mul(&W2, &X2, &C);
+
+    secp256k1_fe_negate(&W1, &W1, 1);
+    secp256k1_fe_negate(&r->x, &W2, 1); secp256k1_fe_add(&r->x, &W1); secp256k1_fe_add(&r->x, &D);
+
+    secp256k1_fe_add(&W2, &W1);
+    secp256k1_fe_mul(&A1, &W2, &Y1); secp256k1_fe_negate(&ra->y, &A1, 1);
+
+    r->y = r->x; secp256k1_fe_add(&r->y, &W1); secp256k1_fe_mul(&r->y, &r->y, &dY);
+    secp256k1_fe_add(&r->y, &A1);
+
+    secp256k1_fe_mul(&r->z, &b->z, &dX);
+    *rzr = dX;
+}
+#endif
+
 #ifdef USE_ENDOMORPHISM
 static void secp256k1_ge_mul_lambda(secp256k1_ge_t *r, const secp256k1_ge_t *a) {
     static const secp256k1_fe_t beta = SECP256K1_FE_CONST(
diff --git a/src/tests.c b/src/tests.c
index 9ff182bc0d..806eb28212 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -970,6 +970,29 @@ void test_ge(void) {
                 ge_equals_gej(&ref, &resj);
             }
 
+#ifdef USE_COZ
+            /* Test Co-Z gej + ge. */
+            if ((i1 == 0) == (i2 == 0)) {
+                /* ra is initially co-z with b (=gej[i2]). */
+                secp256k1_coz_t ra;
+                secp256k1_fe_t zr2;
+                secp256k1_fe_mul(&ra.x, &ge[i1].x, &gej[i2].z); secp256k1_fe_mul(&ra.x, &ra.x, &gej[i2].z);
+                secp256k1_fe_mul(&ra.y, &ge[i1].y, &gej[i2].z); secp256k1_fe_mul(&ra.y, &ra.y, &gej[i2].z); secp256k1_fe_mul(&ra.y, &ra.y, &gej[i2].z);
+                secp256k1_coz_zaddu_var(&resj, &ra, &zr2, &gej[i2]);
+                ge_equals_gej(&ref, &resj); /* Check sum */
+                if (!secp256k1_gej_is_infinity(&resj)) {
+                    /* Check that ra still represents the same point, but now Co-Z with r. */
+                    secp256k1_gej_t ra2;
+                    secp256k1_fe_t zz;
+                    ra2.x = ra.x; ra2.y = ra.y; ra2.z = resj.z; ra2.infinity = resj.infinity;
+                    ge_equals_gej(&ge[i1], &ra2);
+                    /* Check that zr * b.z = r.z */
+                    secp256k1_fe_mul(&zz, &gej[i2].z, &zr2);
+                    CHECK(secp256k1_fe_equal_var(&zz, &resj.z));
+                }
+            }
+#endif
+
             /* Test gej + ge (const). */
             if (i2 != 0) {
                 /* secp256k1_gej_add_ge does not support its second argument being infinity. */
@@ -980,6 +1003,10 @@ void test_ge(void) {
             /* Test doubling (var). */
             if ((i1 == 0 && i2 == 0) || ((i1 + 3)/4 == (i2 + 3)/4 && ((i1 + 3)%4)/2 == ((i2 + 3)%4)/2)) {
                 secp256k1_fe_t zr2;
+#ifdef USE_COZ
+                secp256k1_gej_t ra;
+                secp256k1_coz_t r;
+#endif
                 /* Normal doubling with Z ratio result. */
                 secp256k1_gej_double_var(&resj, &gej[i1], &zr2);
                 ge_equals_gej(&ref, &resj);
@@ -989,6 +1016,21 @@ void test_ge(void) {
                 /* Normal doubling. */
                 secp256k1_gej_double_var(&resj, &gej[i2], NULL);
                 ge_equals_gej(&ref, &resj);
+#ifdef USE_COZ
+                /* Co-Z doubling with Z ratio result. */
+                secp256k1_coz_dblu_var(&r, &ra, &gej[i1], &zr2);
+                resj.x = r.x; resj.y = r.y; resj.z = ra.z; resj.infinity = ra.infinity;
+                ge_equals_gej(&ref, &resj);
+                ge_equals_gej(&ge[i1], &ra);
+                /* Check Z ratio. */
+                secp256k1_fe_mul(&zr2, &zr2, &gej[i1].z);
+                CHECK(resj.infinity || secp256k1_fe_equal_var(&zr2, &ra.z));
+                /* Co-Z doubling. */
+                secp256k1_coz_dblu_var(&r, &ra, &gej[i2], &zr2);
+                resj.x = r.x; resj.y = r.y; resj.z = ra.z; resj.infinity = ra.infinity;
+                ge_equals_gej(&ref, &resj);
+                ge_equals_gej(&ge[i2], &ra);
+#endif
             }
 
             /* Test adding opposites. */