diff --git a/.cirrus.yml b/.cirrus.yml
index 9399fbda47..506a860336 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -1,6 +1,5 @@
env:
WIDEMUL: auto
- BIGNUM: auto
STATICPRECOMPUTATION: yes
ECMULTGENPRECISION: auto
ASM: no
@@ -59,9 +58,8 @@ task:
- env: {WIDEMUL: int128, RECOVERY: yes, EXPERIMENTAL: yes, SCHNORRSIG: yes}
- env: {WIDEMUL: int128, ECDH: yes, EXPERIMENTAL: yes, SCHNORRSIG: yes}
- env: {WIDEMUL: int128, ASM: x86_64}
- - env: {BIGNUM: no}
- - env: {BIGNUM: no, RECOVERY: yes, EXPERIMENTAL: yes, SCHNORRSIG: yes}
- - env: {BIGNUM: no, STATICPRECOMPUTATION: no}
+ - env: { RECOVERY: yes, EXPERIMENTAL: yes, SCHNORRSIG: yes}
+ - env: { STATICPRECOMPUTATION: no}
- env: {BUILD: distcheck, WITH_VALGRIND: no, CTIMETEST: no, BENCH: no}
- env: {CPPFLAGS: -DDETERMINISTIC}
- env: {CFLAGS: -O0, CTIMETEST: no}
@@ -69,7 +67,6 @@ task:
CFLAGS: "-fsanitize=undefined -fno-omit-frame-pointer"
LDFLAGS: "-fsanitize=undefined -fno-omit-frame-pointer"
UBSAN_OPTIONS: "print_stacktrace=1:halt_on_error=1"
- BIGNUM: no
ASM: x86_64
ECDH: yes
RECOVERY: yes
@@ -80,7 +77,6 @@ task:
- env: { ECMULTGENPRECISION: 8 }
- env:
RUN_VALGRIND: yes
- BIGNUM: no
ASM: x86_64
ECDH: yes
RECOVERY: yes
@@ -115,12 +111,6 @@ task:
CC: i686-linux-gnu-gcc
- env:
CC: clang --target=i686-pc-linux-gnu -isystem /usr/i686-linux-gnu/include
- matrix:
- - env:
- BIGNUM: gmp
- - env:
- BIGNUM: no
- << : *MERGE_BASE
test_script:
- ./ci/cirrus.sh
<< : *CAT_LOGS
@@ -178,7 +168,7 @@ task:
# If we haven't restored from cached (and just run brew install), this is a no-op.
- brew link valgrind
brew_script:
- - brew install automake libtool gmp gcc@9
+ - brew install automake libtool gcc@9
<< : *MERGE_BASE
test_script:
- ./ci/cirrus.sh
@@ -195,7 +185,6 @@ task:
HOST: s390x-linux-gnu
BUILD:
WITH_VALGRIND: no
- BIGNUM: no
ECDH: yes
RECOVERY: yes
EXPERIMENTAL: yes
diff --git a/Makefile.am b/Makefile.am
index 023fa6067f..58c9635e53 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -14,8 +14,6 @@ noinst_HEADERS += src/scalar_8x32_impl.h
noinst_HEADERS += src/scalar_low_impl.h
noinst_HEADERS += src/group.h
noinst_HEADERS += src/group_impl.h
-noinst_HEADERS += src/num_gmp.h
-noinst_HEADERS += src/num_gmp_impl.h
noinst_HEADERS += src/ecdsa.h
noinst_HEADERS += src/ecdsa_impl.h
noinst_HEADERS += src/eckey.h
@@ -26,14 +24,16 @@ noinst_HEADERS += src/ecmult_const.h
noinst_HEADERS += src/ecmult_const_impl.h
noinst_HEADERS += src/ecmult_gen.h
noinst_HEADERS += src/ecmult_gen_impl.h
-noinst_HEADERS += src/num.h
-noinst_HEADERS += src/num_impl.h
noinst_HEADERS += src/field_10x26.h
noinst_HEADERS += src/field_10x26_impl.h
noinst_HEADERS += src/field_5x52.h
noinst_HEADERS += src/field_5x52_impl.h
noinst_HEADERS += src/field_5x52_int128_impl.h
noinst_HEADERS += src/field_5x52_asm_impl.h
+noinst_HEADERS += src/modinv32.h
+noinst_HEADERS += src/modinv32_impl.h
+noinst_HEADERS += src/modinv64.h
+noinst_HEADERS += src/modinv64_impl.h
noinst_HEADERS += src/assumptions.h
noinst_HEADERS += src/util.h
noinst_HEADERS += src/scratch.h
diff --git a/README.md b/README.md
index 9918678e20..197a56fff8 100644
--- a/README.md
+++ b/README.md
@@ -34,11 +34,11 @@ Implementation details
* Optimized implementation of arithmetic modulo the curve's field size (2^256 - 0x1000003D1).
* Using 5 52-bit limbs (including hand-optimized assembly for x86_64, by Diederik Huys).
* Using 10 26-bit limbs (including hand-optimized assembly for 32-bit ARM, by Wladimir J. van der Laan).
- * Field inverses and square roots using a sliding window over blocks of 1s (by Peter Dettman).
* Scalar operations
* Optimized implementation without data-dependent branches of arithmetic modulo the curve's order.
* Using 4 64-bit limbs (relying on __int128 support in the compiler).
* Using 8 32-bit limbs.
+* Modular inverses (both field elements and scalars) based on [safegcd](https://gcd.cr.yp.to/index.html) with some modifications, and a variable-time variant (by Peter Dettman).
* Group operations
* Point addition formula specifically simplified for the curve equation (y^2 = x^3 + 7).
* Use addition between points in Jacobian and affine coordinates where possible.
diff --git a/build-aux/m4/bitcoin_secp.m4 b/build-aux/m4/bitcoin_secp.m4
index 7b48a5e587..e57888ca18 100644
--- a/build-aux/m4/bitcoin_secp.m4
+++ b/build-aux/m4/bitcoin_secp.m4
@@ -75,19 +75,6 @@ if test x"$has_libcrypto" = x"yes" && test x"$has_openssl_ec" = x; then
fi
])
-dnl
-AC_DEFUN([SECP_GMP_CHECK],[
-if test x"$has_gmp" != x"yes"; then
- CPPFLAGS_TEMP="$CPPFLAGS"
- CPPFLAGS="$GMP_CPPFLAGS $CPPFLAGS"
- LIBS_TEMP="$LIBS"
- LIBS="$GMP_LIBS $LIBS"
- AC_CHECK_HEADER(gmp.h,[AC_CHECK_LIB(gmp, __gmpz_init,[has_gmp=yes; GMP_LIBS="$GMP_LIBS -lgmp"; AC_DEFINE(HAVE_LIBGMP,1,[Define this symbol if libgmp is installed])])])
- CPPFLAGS="$CPPFLAGS_TEMP"
- LIBS="$LIBS_TEMP"
-fi
-])
-
AC_DEFUN([SECP_VALGRIND_CHECK],[
if test x"$has_valgrind" != x"yes"; then
CPPFLAGS_TEMP="$CPPFLAGS"
diff --git a/ci/cirrus.sh b/ci/cirrus.sh
index f223a91ca0..f26ca98d1d 100755
--- a/ci/cirrus.sh
+++ b/ci/cirrus.sh
@@ -14,7 +14,7 @@ valgrind --version || true
./configure \
--enable-experimental="$EXPERIMENTAL" \
- --with-test-override-wide-multiply="$WIDEMUL" --with-bignum="$BIGNUM" --with-asm="$ASM" \
+ --with-test-override-wide-multiply="$WIDEMUL" --with-asm="$ASM" \
--enable-ecmult-static-precomputation="$STATICPRECOMPUTATION" --with-ecmult-gen-precision="$ECMULTGENPRECISION" \
--enable-module-ecdh="$ECDH" --enable-module-recovery="$RECOVERY" \
--enable-module-schnorrsig="$SCHNORRSIG" \
diff --git a/ci/linux-debian.Dockerfile b/ci/linux-debian.Dockerfile
index 201ace4f69..5967cf8b31 100644
--- a/ci/linux-debian.Dockerfile
+++ b/ci/linux-debian.Dockerfile
@@ -8,6 +8,6 @@ RUN apt-get update
RUN apt-get install --no-install-recommends --no-upgrade -y \
git ca-certificates \
make automake libtool pkg-config dpkg-dev valgrind qemu-user \
- gcc clang libc6-dbg libgmp-dev \
- gcc-i686-linux-gnu libc6-dev-i386-cross libc6-dbg:i386 libgmp-dev:i386 \
+ gcc clang libc6-dbg \
+ gcc-i686-linux-gnu libc6-dev-i386-cross libc6-dbg:i386 \
gcc-s390x-linux-gnu libc6-dev-s390x-cross libc6-dbg:s390x
diff --git a/configure.ac b/configure.ac
index fd15d34139..e84005edf4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -48,17 +48,12 @@ case $host_os in
# in expected paths because they may conflict with system files. Ask
# Homebrew where each one is located, then adjust paths accordingly.
openssl_prefix=`$BREW --prefix openssl 2>/dev/null`
- gmp_prefix=`$BREW --prefix gmp 2>/dev/null`
valgrind_prefix=`$BREW --prefix valgrind 2>/dev/null`
if test x$openssl_prefix != x; then
PKG_CONFIG_PATH="$openssl_prefix/lib/pkgconfig:$PKG_CONFIG_PATH"
export PKG_CONFIG_PATH
CRYPTO_CPPFLAGS="-I$openssl_prefix/include"
fi
- if test x$gmp_prefix != x; then
- GMP_CPPFLAGS="-I$gmp_prefix/include"
- GMP_LIBS="-L$gmp_prefix/lib"
- fi
if test x$valgrind_prefix != x; then
VALGRIND_CPPFLAGS="-I$valgrind_prefix/include"
fi
@@ -164,9 +159,6 @@ AC_ARG_ENABLE(external_default_callbacks,
# Legal values are int64 (for [u]int64_t), int128 (for [unsigned] __int128), and auto (the default).
AC_ARG_WITH([test-override-wide-multiply], [] ,[set_widemul=$withval], [set_widemul=auto])
-AC_ARG_WITH([bignum], [AS_HELP_STRING([--with-bignum=gmp|no|auto],
-[bignum implementation to use [default=auto]])],[req_bignum=$withval], [req_bignum=auto])
-
AC_ARG_WITH([asm], [AS_HELP_STRING([--with-asm=x86_64|arm|no|auto],
[assembly optimizations to useĀ (experimental: arm) [default=auto]])],[req_asm=$withval], [req_asm=auto])
@@ -245,32 +237,6 @@ else
esac
fi
-if test x"$req_bignum" = x"auto"; then
- SECP_GMP_CHECK
- if test x"$has_gmp" = x"yes"; then
- set_bignum=gmp
- fi
-
- if test x"$set_bignum" = x; then
- set_bignum=no
- fi
-else
- set_bignum=$req_bignum
- case $set_bignum in
- gmp)
- SECP_GMP_CHECK
- if test x"$has_gmp" != x"yes"; then
- AC_MSG_ERROR([gmp bignum explicitly requested but libgmp not available])
- fi
- ;;
- no)
- ;;
- *)
- AC_MSG_ERROR([invalid bignum implementation selection])
- ;;
- esac
-fi
-
# Select assembly optimization
use_external_asm=no
@@ -308,24 +274,6 @@ auto)
;;
esac
-# Select bignum implementation
-case $set_bignum in
-gmp)
- AC_DEFINE(HAVE_LIBGMP, 1, [Define this symbol if libgmp is installed])
- AC_DEFINE(USE_NUM_GMP, 1, [Define this symbol to use the gmp implementation for num])
- AC_DEFINE(USE_FIELD_INV_NUM, 1, [Define this symbol to use the num-based field inverse implementation])
- AC_DEFINE(USE_SCALAR_INV_NUM, 1, [Define this symbol to use the num-based scalar inverse implementation])
- ;;
-no)
- AC_DEFINE(USE_NUM_NONE, 1, [Define this symbol to use no num implementation])
- AC_DEFINE(USE_FIELD_INV_BUILTIN, 1, [Define this symbol to use the native field inverse implementation])
- AC_DEFINE(USE_SCALAR_INV_BUILTIN, 1, [Define this symbol to use the native scalar inverse implementation])
- ;;
-*)
- AC_MSG_ERROR([invalid bignum implementation])
- ;;
-esac
-
# Set ecmult window size
if test x"$req_ecmult_window" = x"auto"; then
set_ecmult_window=15
@@ -390,11 +338,6 @@ else
enable_openssl_tests=no
fi
-if test x"$set_bignum" = x"gmp"; then
- SECP_LIBS="$SECP_LIBS $GMP_LIBS"
- SECP_INCLUDES="$SECP_INCLUDES $GMP_CPPFLAGS"
-fi
-
if test x"$enable_valgrind" = x"yes"; then
SECP_INCLUDES="$SECP_INCLUDES $VALGRIND_CPPFLAGS"
fi
@@ -571,7 +514,6 @@ echo " module extrakeys = $enable_module_extrakeys"
echo " module schnorrsig = $enable_module_schnorrsig"
echo
echo " asm = $set_asm"
-echo " bignum = $set_bignum"
echo " ecmult window size = $set_ecmult_window"
echo " ecmult gen prec. bits = $set_ecmult_gen_precision"
# Hide test-only options unless they're used.
diff --git a/doc/safegcd_implementation.md b/doc/safegcd_implementation.md
new file mode 100644
index 0000000000..8346d22e57
--- /dev/null
+++ b/doc/safegcd_implementation.md
@@ -0,0 +1,750 @@
+# The safegcd implementation in libsecp256k1 explained
+
+This document explains the modular inverse implementation in the `src/modinv*.h` files. It is based
+on the paper
+["Fast constant-time gcd computation and modular inversion"](https://gcd.cr.yp.to/papers.html#safegcd)
+by Daniel J. Bernstein and Bo-Yin Yang. The references below are for the Date: 2019.04.13 version.
+
+The actual implementation is in C of course, but for demonstration purposes Python3 is used here.
+Most implementation aspects and optimizations are explained, except those that depend on the specific
+number representation used in the C code.
+
+## 1. Computing the Greatest Common Divisor (GCD) using divsteps
+
+The algorithm from the paper (section 11), at a very high level, is this:
+
+```python
+def gcd(f, g):
+ """Compute the GCD of an odd integer f and another integer g."""
+ assert f & 1 # require f to be odd
+ delta = 1 # additional state variable
+ while g != 0:
+ assert f & 1 # f will be odd in every iteration
+ if delta > 0 and g & 1:
+ delta, f, g = 1 - delta, g, (g - f) // 2
+ elif g & 1:
+ delta, f, g = 1 + delta, f, (g + f) // 2
+ else:
+ delta, f, g = 1 + delta, f, (g ) // 2
+ return abs(f)
+```
+
+It computes the greatest common divisor of an odd integer *f* and any integer *g*. Its inner loop
+keeps rewriting the variables *f* and *g* alongside a state variable *δ* that starts at *1*, until
+*g=0* is reached. At that point, *|f|* gives the GCD. Each of the transitions in the loop is called a
+"division step" (referred to as divstep in what follows).
+
+For example, *gcd(21, 14)* would be computed as:
+- Start with *δ=1 f=21 g=14*
+- Take the third branch: *δ=2 f=21 g=7*
+- Take the first branch: *δ=-1 f=7 g=-7*
+- Take the second branch: *δ=0 f=7 g=0*
+- The answer *|f| = 7*.
+
+Why it works:
+- Divsteps can be decomposed into two steps (see paragraph 8.2 in the paper):
+ - (a) If *g* is odd, replace *(f,g)* with *(g,g-f)* or (f,g+f), resulting in an even *g*.
+ - (b) Replace *(f,g)* with *(f,g/2)* (where *g* is guaranteed to be even).
+- Neither of those two operations change the GCD:
+ - For (a), assume *gcd(f,g)=c*, then it must be the case that *f=a c* and *g=b c* for some integers *a*
+ and *b*. As *(g,g-f)=(b c,(b-a)c)* and *(f,f+g)=(a c,(a+b)c)*, the result clearly still has
+ common factor *c*. Reasoning in the other direction shows that no common factor can be added by
+ doing so either.
+ - For (b), we know that *f* is odd, so *gcd(f,g)* clearly has no factor *2*, and we can remove
+ it from *g*.
+- The algorithm will eventually converge to *g=0*. This is proven in the paper (see theorem G.3).
+- It follows that eventually we find a final value *f'* for which *gcd(f,g) = gcd(f',0)*. As the
+ gcd of *f'* and *0* is *|f'|* by definition, that is our answer.
+
+Compared to more [traditional GCD algorithms](https://en.wikipedia.org/wiki/Euclidean_algorithm), this one has the property of only ever looking at
+the low-order bits of the variables to decide the next steps, and being easy to make
+constant-time (in more low-level languages than Python). The *δ* parameter is necessary to
+guide the algorithm towards shrinking the numbers' magnitudes without explicitly needing to look
+at high order bits.
+
+Properties that will become important later:
+- Performing more divsteps than needed is not a problem, as *f* does not change anymore after *g=0*.
+- Only even numbers are divided by *2*. This means that when reasoning about it algebraically we
+ do not need to worry about rounding.
+- At every point during the algorithm's execution the next *N* steps only depend on the bottom *N*
+ bits of *f* and *g*, and on *δ*.
+
+
+## 2. From GCDs to modular inverses
+
+We want an algorithm to compute the inverse *a* of *x* modulo *M*, i.e. the number a such that *a x=1
+mod M*. This inverse only exists if the GCD of *x* and *M* is *1*, but that is always the case if *M* is
+prime and *0 < x < M*. In what follows, assume that the modular inverse exists.
+It turns out this inverse can be computed as a side effect of computing the GCD by keeping track
+of how the internal variables can be written as linear combinations of the inputs at every step
+(see the [extended Euclidean algorithm](https://en.wikipedia.org/wiki/Extended_Euclidean_algorithm)).
+Since the GCD is *1*, such an algorithm will compute numbers *a* and *b* such that a x + b M = 1*.
+Taking that expression *mod M* gives *a x mod M = 1*, and we see that *a* is the modular inverse of *x
+mod M*.
+
+A similar approach can be used to calculate modular inverses using the divsteps-based GCD
+algorithm shown above, if the modulus *M* is odd. To do so, compute *gcd(f=M,g=x)*, while keeping
+track of extra variables *d* and *e*, for which at every step *d = f/x (mod M)* and *e = g/x (mod M)*.
+*f/x* here means the number which multiplied with *x* gives *f mod M*. As *f* and *g* are initialized to *M*
+and *x* respectively, *d* and *e* just start off being *0* (*M/x mod M = 0/x mod M = 0*) and *1* (*x/x mod M
+= 1*).
+
+```python
+def div2(M, x):
+ """Helper routine to compute x/2 mod M (where M is odd)."""
+ assert M & 1
+ if x & 1: # If x is odd, make it even by adding M.
+ x += M
+ # x must be even now, so a clean division by 2 is possible.
+ return x // 2
+
+def modinv(M, x):
+ """Compute the inverse of x mod M (given that it exists, and M is odd)."""
+ assert M & 1
+ delta, f, g, d, e = 1, M, x, 0, 1
+ while g != 0:
+ # Note that while division by two for f and g is only ever done on even inputs, this is
+ # not true for d and e, so we need the div2 helper function.
+ if delta > 0 and g & 1:
+ delta, f, g, d, e = 1 - delta, g, (g - f) // 2, e, div2(M, e - d)
+ elif g & 1:
+ delta, f, g, d, e = 1 + delta, f, (g + f) // 2, d, div2(M, e + d)
+ else:
+ delta, f, g, d, e = 1 + delta, f, (g ) // 2, d, div2(M, e )
+ # Verify that the invariants d=f/x mod M, e=g/x mod M are maintained.
+ assert f % M == (d * x) % M
+ assert g % M == (e * x) % M
+ assert f == 1 or f == -1 # |f| is the GCD, it must be 1
+ # Because of invariant d = f/x (mod M), 1/x = d/f (mod M). As |f|=1, d/f = d*f.
+ return (d * f) % M
+```
+
+Also note that this approach to track *d* and *e* throughout the computation to determine the inverse
+is different from the paper. There (see paragraph 12.1 in the paper) a transition matrix for the
+entire computation is determined (see section 3 below) and the inverse is computed from that.
+The approach here avoids the need for 2x2 matrix multiplications of various sizes, and appears to
+be faster at the level of optimization we're able to do in C.
+
+
+## 3. Batching multiple divsteps
+
+Every divstep can be expressed as a matrix multiplication, applying a transition matrix *(1/2 t)*
+to both vectors *[f, g]* and *[d, e]* (see paragraph 8.1 in the paper):
+
+```
+ t = [ u, v ]
+ [ q, r ]
+
+ [ out_f ] = (1/2 * t) * [ in_f ]
+ [ out_g ] = [ in_g ]
+
+ [ out_d ] = (1/2 * t) * [ in_d ] (mod M)
+ [ out_e ] [ in_e ]
+```
+
+where *(u, v, q, r)* is *(0, 2, -1, 1)*, *(2, 0, 1, 1)*, or *(2, 0, 0, 1)*, depending on which branch is
+taken. As above, the resulting *f* and *g* are always integers.
+
+Performing multiple divsteps corresponds to a multiplication with the product of all the
+individual divsteps' transition matrices. As each transition matrix consists of integers
+divided by *2*, the product of these matrices will consist of integers divided by *2N* (see also
+theorem 9.2 in the paper). These divisions are expensive when updating *d* and *e*, so we delay
+them: we compute the integer coefficients of the combined transition matrix scaled by *2N*, and
+do one division by *2N* as a final step:
+
+```python
+def divsteps_n_matrix(delta, f, g):
+ """Compute delta and transition matrix t after N divsteps (multiplied by 2^N)."""
+ u, v, q, r = 1, 0, 0, 1 # start with identity matrix
+ for _ in range(N):
+ if delta > 0 and g & 1:
+ delta, f, g, u, v, q, r = 1 - delta, g, (g - f) // 2, 2*q, 2*r, q-u, r-v
+ elif g & 1:
+ delta, f, g, u, v, q, r = 1 + delta, f, (g + f) // 2, 2*u, 2*v, q+u, r+v
+ else:
+ delta, f, g, u, v, q, r = 1 + delta, f, (g ) // 2, 2*u, 2*v, q , r
+ return delta, (u, v, q, r)
+```
+
+As the branches in the divsteps are completely determined by the bottom *N* bits of *f* and *g*, this
+function to compute the transition matrix only needs to see those bottom bits. Furthermore all
+intermediate results and outputs fit in *(N+1)*-bit numbers (unsigned for *f* and *g*; signed for *u*, *v*,
+*q*, and *r*) (see also paragraph 8.3 in the paper). This means that an implementation using 64-bit
+integers could set *N=62* and compute the full transition matrix for 62 steps at once without any
+big integer arithmetic at all. This is the reason why this algorithm is efficient: it only needs
+to update the full-size *f*, *g*, *d*, and *e* numbers once every *N* steps.
+
+We still need functions to compute:
+
+```
+ [ out_f ] = (1/2^N * [ u, v ]) * [ in_f ]
+ [ out_g ] ( [ q, r ]) [ in_g ]
+
+ [ out_d ] = (1/2^N * [ u, v ]) * [ in_d ] (mod M)
+ [ out_e ] ( [ q, r ]) [ in_e ]
+```
+
+Because the divsteps transformation only ever divides even numbers by two, the result of *t [f,g]* is always even. When *t* is a composition of *N* divsteps, it follows that the resulting *f*
+and *g* will be multiple of *2N*, and division by *2N* is simply shifting them down:
+
+```python
+def update_fg(f, g, t):
+ """Multiply matrix t/2^N with [f, g]."""
+ u, v, q, r = t
+ cf, cg = u*f + v*g, q*f + r*g
+ # (t / 2^N) should cleanly apply to [f,g] so the result of t*[f,g] should have N zero
+ # bottom bits.
+ assert cf % 2**N == 0
+ assert cg % 2**N == 0
+ return cf >> N, cg >> N
+```
+
+The same is not true for *d* and *e*, and we need an equivalent of the `div2` function for division by *2N mod M*.
+This is easy if we have precomputed *1/M mod 2N* (which always exists for odd *M*):
+
+```python
+def div2n(M, Mi, x):
+ """Compute x/2^N mod M, given Mi = 1/M mod 2^N."""
+ assert (M * Mi) % 2**N == 1
+ # Find a factor m such that m*M has the same bottom N bits as x. We want:
+ # (m * M) mod 2^N = x mod 2^N
+ # <=> m mod 2^N = (x / M) mod 2^N
+ # <=> m mod 2^N = (x * Mi) mod 2^N
+ m = (Mi * x) % 2**N
+ # Subtract that multiple from x, cancelling its bottom N bits.
+ x -= m * M
+ # Now a clean division by 2^N is possible.
+ assert x % 2**N == 0
+ return (x >> N) % M
+
+def update_de(d, e, t, M, Mi):
+ """Multiply matrix t/2^N with [d, e], modulo M."""
+ u, v, q, r = t
+ cd, ce = u*d + v*e, q*d + r*e
+ return div2n(M, Mi, cd), div2n(M, Mi, ce)
+```
+
+With all of those, we can write a version of `modinv` that performs *N* divsteps at once:
+
+```python3
+def modinv(M, Mi, x):
+ """Compute the modular inverse of x mod M, given Mi=1/M mod 2^N."""
+ assert M & 1
+ delta, f, g, d, e = 1, M, x, 0, 1
+ while g != 0:
+ # Compute the delta and transition matrix t for the next N divsteps (this only needs
+ # (N+1)-bit signed integer arithmetic).
+ delta, t = divsteps_n_matrix(delta, f % 2**N, g % 2**N)
+ # Apply the transition matrix t to [f, g]:
+ f, g = update_fg(f, g, t)
+ # Apply the transition matrix t to [d, e]:
+ d, e = update_de(d, e, t, M, Mi)
+ return (d * f) % M
+```
+
+This means that in practice we'll always perform a multiple of *N* divsteps. This is not a problem
+because once *g=0*, further divsteps do not affect *f*, *g*, *d*, or *e* anymore (only *δ* keeps
+increasing). For variable time code such excess iterations will be mostly optimized away in
+section 6.
+
+
+## 4. Avoiding modulus operations
+
+So far, there are two places where we compute a remainder of big numbers modulo *M*: at the end of
+`div2n` in every `update_de`, and at the very end of `modinv` after potentially negating *d* due to the
+sign of *f*. These are relatively expensive operations when done generically.
+
+To deal with the modulus operation in `div2n`, we simply stop requiring *d* and *e* to be in range
+*[0,M)* all the time. Let's start by inlining `div2n` into `update_de`, and dropping the modulus
+operation at the end:
+
+```python
+def update_de(d, e, t, M, Mi):
+ """Multiply matrix t/2^N with [d, e] mod M, given Mi=1/M mod 2^N."""
+ u, v, q, r = t
+ cd, ce = u*d + v*e, q*d + r*e
+ # Cancel out bottom N bits of cd and ce.
+ md = -((Mi * cd) % 2**N)
+ me = -((Mi * ce) % 2**N)
+ cd += md * M
+ ce += me * M
+ # And cleanly divide by 2**N.
+ return cd >> N, ce >> N
+```
+
+Let's look at bounds on the ranges of these numbers. It can be shown that *|u|+|v|* and *|q|+|r|*
+never exceed *2N* (see paragraph 8.3 in the paper), and thus a multiplication with *t* will have
+outputs whose absolute values are at most *2N* times the maximum absolute input value. In case the
+inputs *d* and *e* are in *(-M,M)*, which is certainly true for the initial values *d=0* and *e=1* assuming
+*M > 1*, the multiplication results in numbers in range *(-2NM,2NM)*. Subtracting less than *2N*
+times *M* to cancel out *N* bits brings that up to *(-2N+1M,2NM)*, and
+dividing by *2N* at the end takes it to *(-2M,M)*. Another application of `update_de` would take that
+to *(-3M,2M)*, and so forth. This progressive expansion of the variables' ranges can be
+counteracted by incrementing *d* and *e* by *M* whenever they're negative:
+
+```python
+ ...
+ if d < 0:
+ d += M
+ if e < 0:
+ e += M
+ cd, ce = u*d + v*e, q*d + r*e
+ # Cancel out bottom N bits of cd and ce.
+ ...
+```
+
+With inputs in *(-2M,M)*, they will first be shifted into range *(-M,M)*, which means that the
+output will again be in *(-2M,M)*, and this remains the case regardless of how many `update_de`
+invocations there are. In what follows, we will try to make this more efficient.
+
+Note that increasing *d* by *M* is equal to incrementing *cd* by *u M* and *ce* by *q M*. Similarly,
+increasing *e* by *M* is equal to incrementing *cd* by *v M* and *ce* by *r M*. So we could instead write:
+
+```python
+ ...
+ cd, ce = u*d + v*e, q*d + r*e
+ # Perform the equivalent of incrementing d, e by M when they're negative.
+ if d < 0:
+ cd += u*M
+ ce += q*M
+ if e < 0:
+ cd += v*M
+ ce += r*M
+ # Cancel out bottom N bits of cd and ce.
+ md = -((Mi * cd) % 2**N)
+ me = -((Mi * ce) % 2**N)
+ cd += md * M
+ ce += me * M
+ ...
+```
+
+Now note that we have two steps of corrections to *cd* and *ce* that add multiples of *M*: this
+increment, and the decrement that cancels out bottom bits. The second one depends on the first
+one, but they can still be efficiently combined by only computing the bottom bits of *cd* and *ce*
+at first, and using that to compute the final *md*, *me* values:
+
+```python
+def update_de(d, e, t, M, Mi):
+ """Multiply matrix t/2^N with [d, e], modulo M."""
+ u, v, q, r = t
+ md, me = 0, 0
+ # Compute what multiples of M to add to cd and ce.
+ if d < 0:
+ md += u
+ me += q
+ if e < 0:
+ md += v
+ me += r
+ # Compute bottom N bits of t*[d,e] + M*[md,me].
+ cd, ce = (u*d + v*e + md*M) % 2**N, (q*d + r*e + me*M) % 2**N
+ # Correct md and me such that the bottom N bits of t*[d,e] + M*[md,me] are zero.
+ md -= (Mi * cd) % 2**N
+ me -= (Mi * ce) % 2**N
+ # Do the full computation.
+ cd, ce = u*d + v*e + md*M, q*d + r*e + me*M
+ # And cleanly divide by 2**N.
+ return cd >> N, ce >> N
+```
+
+One last optimization: we can avoid the *md M* and *me M* multiplications in the bottom bits of *cd*
+and *ce* by moving them to the *md* and *me* correction:
+
+```python
+ ...
+ # Compute bottom N bits of t*[d,e].
+ cd, ce = (u*d + v*e) % 2**N, (q*d + r*e) % 2**N
+ # Correct md and me such that the bottom N bits of t*[d,e]+M*[md,me] are zero.
+ # Note that this is not the same as {md = (-Mi * cd) % 2**N} etc. That would also result in N
+ # zero bottom bits, but isn't guaranteed to be a reduction of [0,2^N) compared to the
+ # previous md and me values, and thus would violate our bounds analysis.
+ md -= (Mi*cd + md) % 2**N
+ me -= (Mi*ce + me) % 2**N
+ ...
+```
+
+The resulting function takes *d* and *e* in range *(-2M,M)* as inputs, and outputs values in the same
+range. That also means that the *d* value at the end of `modinv` will be in that range, while we want
+a result in *[0,M)*. To do that, we need a normalization function. It's easy to integrate the
+conditional negation of *d* (based on the sign of *f*) into it as well:
+
+```python
+def normalize(sign, v, M):
+ """Compute sign*v mod M, where v is in range (-2*M,M); output in [0,M)."""
+ assert sign == 1 or sign == -1
+ # v in (-2*M,M)
+ if v < 0:
+ v += M
+ # v in (-M,M). Now multiply v with sign (which can only be 1 or -1).
+ if sign == -1:
+ v = -v
+ # v in (-M,M)
+ if v < 0:
+ v += M
+ # v in [0,M)
+ return v
+```
+
+And calling it in `modinv` is simply:
+
+```python
+ ...
+ return normalize(f, d, M)
+```
+
+
+## 5. Constant-time operation
+
+The primary selling point of the algorithm is fast constant-time operation. What code flow still
+depends on the input data so far?
+
+- the number of iterations of the while *g ≠ 0* loop in `modinv`
+- the branches inside `divsteps_n_matrix`
+- the sign checks in `update_de`
+- the sign checks in `normalize`
+
+To make the while loop in `modinv` constant time it can be replaced with a constant number of
+iterations. The paper proves (Theorem 11.2) that *741* divsteps are sufficient for any *256*-bit
+inputs, and [safegcd-bounds](https://github.com/sipa/safegcd-bounds) shows that the slightly better bound *724* is
+sufficient even. Given that every loop iteration performs *N* divsteps, it will run a total of
+*⌈724/N⌉* times.
+
+To deal with the branches in `divsteps_n_matrix` we will replace them with constant-time bitwise
+operations (and hope the C compiler isn't smart enough to turn them back into branches; see
+`valgrind_ctime_test.c` for automated tests that this isn't the case). To do so, observe that a
+divstep can be written instead as (compare to the inner loop of `gcd` in section 1).
+
+```python
+ x = -f if delta > 0 else f # set x equal to (input) -f or f
+ if g & 1:
+ g += x # set g to (input) g-f or g+f
+ if delta > 0:
+ delta = -delta
+ f += g # set f to (input) g (note that g was set to g-f before)
+ delta += 1
+ g >>= 1
+```
+
+To convert the above to bitwise operations, we rely on a trick to negate conditionally: per the
+definition of negative numbers in two's complement, (*-v == ~v + 1*) holds for every number *v*. As
+*-1* in two's complement is all *1* bits, bitflipping can be expressed as xor with *-1*. It follows
+that *-v == (v ^ -1) - (-1)*. Thus, if we have a variable *c* that takes on values *0* or *-1*, then
+*(v ^ c) - c* is *v* if *c=0* and *-v* if *c=-1*.
+
+Using this we can write:
+
+```python
+ x = -f if delta > 0 else f
+```
+
+in constant-time form as:
+
+```python
+ c1 = (-delta) >> 63
+ # Conditionally negate f based on c1:
+ x = (f ^ c1) - c1
+```
+
+To use that trick, we need a helper mask variable *c1* that resolves the condition *δ>0* to *-1*
+(if true) or *0* (if false). We compute *c1* using right shifting, which is equivalent to dividing by
+the specified power of *2* and rounding down (in Python, and also in C under the assumption of a typical two's complement system; see
+`assumptions.h` for tests that this is the case). Right shifting by *63* thus maps all
+numbers in range *[-263,0)* to *-1*, and numbers in range *[0,263)* to *0*.
+
+Using the facts that *x&0=0* and *x&(-1)=x* (on two's complement systems again), we can write:
+
+```python
+ if g & 1:
+ g += x
+```
+
+as:
+
+```python
+ # Compute c2=0 if g is even and c2=-1 if g is odd.
+ c2 = -(g & 1)
+ # This masks out x if g is even, and leaves x be if g is odd.
+ g += x & c2
+```
+
+Using the conditional negation trick again we can write:
+
+```python
+ if g & 1:
+ if delta > 0:
+ delta = -delta
+```
+
+as:
+
+```python
+ # Compute c3=-1 if g is odd and delta>0, and 0 otherwise.
+ c3 = c1 & c2
+ # Conditionally negate delta based on c3:
+ delta = (delta ^ c3) - c3
+```
+
+Finally:
+
+```python
+ if g & 1:
+ if delta > 0:
+ f += g
+```
+
+becomes:
+
+```python
+ f += g & c3
+```
+
+It turns out that this can be implemented more efficiently by applying the substitution
+*η=-δ*. In this representation, negating *δ* corresponds to negating *η*, and incrementing
+*δ* corresponds to decrementing *η*. This allows us to remove the negation in the *c1*
+computation:
+
+```python
+ # Compute a mask c1 for eta < 0, and compute the conditional negation x of f:
+ c1 = eta >> 63
+ x = (f ^ c1) - c1
+ # Compute a mask c2 for odd g, and conditionally add x to g:
+ c2 = -(g & 1)
+ g += x & c2
+ # Compute a mask c for (eta < 0) and odd (input) g, and use it to conditionally negate eta,
+ # and add g to f:
+ c3 = c1 & c2
+ eta = (eta ^ c3) - c3
+ f += g & c3
+ # Incrementing delta corresponds to decrementing eta.
+ eta -= 1
+ g >>= 1
+```
+
+By replacing the loop in `divsteps_n_matrix` with a variant of the divstep code above (extended to
+also apply all *f* operations to *u*, *v* and all *g* operations to *q*, *r*), a constant-time version of
+`divsteps_n_matrix` is obtained. The full code will be in section 7.
+
+These bit fiddling tricks can also be used to make the conditional negations and additions in
+`update_de` and `normalize` constant-time.
+
+
+## 6. Variable-time optimizations
+
+In section 5, we modified the `divsteps_n_matrix` function (and a few others) to be constant time.
+Constant time operations are only necessary when computing modular inverses of secret data. In
+other cases, it slows down calculations unnecessarily. In this section, we will construct a
+faster non-constant time `divsteps_n_matrix` function.
+
+To do so, first consider yet another way of writing the inner loop of divstep operations in
+`gcd` from section 1. This decomposition is also explained in the paper in section 8.2.
+
+```python
+for _ in range(N):
+ if g & 1 and eta < 0:
+ eta, f, g = -eta, g, -f
+ if g & 1:
+ g += f
+ eta -= 1
+ g >>= 1
+```
+
+Whenever *g* is even, the loop only shifts *g* down and decreases *η*. When *g* ends in multiple zero
+bits, these iterations can be consolidated into one step. This requires counting the bottom zero
+bits efficiently, which is possible on most platforms; it is abstracted here as the function
+`count_trailing_zeros`.
+
+```python
+def count_trailing_zeros(v):
+ """For a non-zero value v, find z such that v=(d<>= zeros
+ i -= zeros
+ if i == 0:
+ break
+ # We know g is odd now
+ if eta < 0:
+ eta, f, g = -eta, g, -f
+ g += f
+ # g is even now, and the eta decrement and g shift will happen in the next loop.
+```
+
+We can now remove multiple bottom *0* bits from *g* at once, but still need a full iteration whenever
+there is a bottom *1* bit. In what follows, we will get rid of multiple *1* bits simultaneously as
+well.
+
+Observe that as long as *η ≥ 0*, the loop does not modify *f*. Instead, it cancels out bottom
+bits of *g* and shifts them out, and decreases *η* and *i* accordingly - interrupting only when *η*
+becomes negative, or when *i* reaches *0*. Combined, this is equivalent to adding a multiple of *f* to
+*g* to cancel out multiple bottom bits, and then shifting them out.
+
+It is easy to find what that multiple is: we want a number *w* such that *g+w f* has a few bottom
+zero bits. If that number of bits is *L*, we want *g+w f mod 2L = 0*, or *w = -g/f mod 2L*. Since *f*
+is odd, such a *w* exists for any *L*. *L* cannot be more than *i* steps (as we'd finish the loop before
+doing more) or more than *η+1* steps (as we'd run `eta, f, g = -eta, g, f` at that point), but
+apart from that, we're only limited by the complexity of computing *w*.
+
+This code demonstrates how to cancel up to 4 bits per step:
+
+```python
+NEGINV16 = [15, 5, 3, 9, 7, 13, 11, 1] # NEGINV16[n//2] = (-n)^-1 mod 16, for odd n
+i = N
+while True:
+ zeros = min(i, count_trailing_zeros(g))
+ eta -= zeros
+ g >>= zeros
+ i -= zeros
+ if i == 0:
+ break
+ # We know g is odd now
+ if eta < 0:
+ eta, f, g = -eta, g, f
+ # Compute limit on number of bits to cancel
+ limit = min(min(eta + 1, i), 4)
+ # Compute w = -g/f mod 2**limit, using the table value for -1/f mod 2**4. Note that f is
+ # always odd, so its inverse modulo a power of two always exists.
+ w = (g * NEGINV16[(f & 15) // 2]) % (2**limit)
+ # As w = -g/f mod (2**limit), g+w*f mod 2**limit = 0 mod 2**limit.
+ g += w * f
+ assert g % (2**limit) == 0
+ # The next iteration will now shift out at least limit bottom zero bits from g.
+```
+
+By using a bigger table more bits can be cancelled at once. The table can also be implemented
+as a formula. Several formulas are known for computing modular inverses modulo powers of two;
+some can be found in Hacker's Delight second edition by Henry S. Warren, Jr. pages 245-247.
+Here we need the negated modular inverse, which is a simple transformation of those:
+
+- Instead of a 3-bit table:
+ - *-f* or *f ^ 6*
+- Instead of a 4-bit table:
+ - *1 - f(f + 1)*
+ - *-(f + (((f + 1) & 4) << 1))*
+- For larger tables the following technique can be used: if *w=-1/f mod 2L*, then *w(w f+2)* is
+ *-1/f mod 22L*. This allows extending the previous formulas (or tables). In particular we
+ have this 6-bit function (based on the 3-bit function above):
+ - *f(f2 - 2)*
+
+This loop, again extended to also handle *u*, *v*, *q*, and *r* alongside *f* and *g*, placed in
+`divsteps_n_matrix`, gives a significantly faster, but non-constant time version.
+
+
+## 7. Final Python version
+
+All together we need the following functions:
+
+- A way to compute the transition matrix in constant time, using the `divsteps_n_matrix` function
+ from section 2, but with its loop replaced by a variant of the constant-time divstep from
+ section 5, extended to handle *u*, *v*, *q*, *r*:
+
+```python
+def divsteps_n_matrix(eta, f, g):
+ """Compute eta and transition matrix t after N divsteps (multiplied by 2^N)."""
+ u, v, q, r = 1, 0, 0, 1 # start with identity matrix
+ for _ in range(N):
+ c1 = eta >> 63
+ # Compute x, y, z as conditionally-negated versions of f, u, v.
+ x, y, z = (f ^ c1) - c1, (u ^ c1) - c1, (v ^ c1) - c1
+ c2 = -(g & 1)
+ # Conditionally add x, y, z to g, q, r.
+ g, q, r = g + (x & c2), q + (y & c2), r + (z & c2)
+ c1 &= c2 # reusing c1 here for the earlier c3 variable
+ eta = (eta ^ c1) - (c1 + 1) # inlining the unconditional eta decrement here
+ # Conditionally add g, q, r to f, u, v.
+ f, u, v = f + (g & c1), u + (q & c1), v + (r & c1)
+ # When shifting g down, don't shift q, r, as we construct a transition matrix multiplied
+ # by 2^N. Instead, shift f's coefficients u and v up.
+ g, u, v = g >> 1, u << 1, v << 1
+ return eta, (u, v, q, r)
+```
+
+- The functions to update *f* and *g*, and *d* and *e*, from section 2 and section 4, with the constant-time
+ changes to `update_de` from section 5:
+
+```python
+def update_fg(f, g, t):
+ """Multiply matrix t/2^N with [f, g]."""
+ u, v, q, r = t
+ cf, cg = u*f + v*g, q*f + r*g
+ return cf >> N, cg >> N
+
+def update_de(d, e, t, M, Mi):
+ """Multiply matrix t/2^N with [d, e], modulo M."""
+ u, v, q, r = t
+ d_sign, e_sign = d >> 257, e >> 257
+ md, me = (u & d_sign) + (v & e_sign), (q & d_sign) + (r & e_sign)
+ cd, ce = (u*d + v*e) % 2**N, (q*d + r*e) % 2**N
+ md -= (Mi*cd + md) % 2**N
+ me -= (Mi*ce + me) % 2**N
+ cd, ce = u*d + v*e + Mi*md, q*d + r*e + Mi*me
+ return cd >> N, ce >> N
+```
+
+- The `normalize` function from section 4, made constant time as well:
+
+```python
+def normalize(sign, v, M):
+ """Compute sign*v mod M, where v in (-2*M,M); output in [0,M)."""
+ v_sign = v >> 257
+ # Conditionally add M to v.
+ v += M & v_sign
+ c = (sign - 1) >> 1
+ # Conditionally negate v.
+ v = (v ^ c) - c
+ v_sign = v >> 257
+ # Conditionally add M to v again.
+ v += M & v_sign
+ return v
+```
+
+- And finally the `modinv` function too, adapted to use *η* instead of *δ*, and using the fixed
+ iteration count from section 5:
+
+```python
+def modinv(M, Mi, x):
+ """Compute the modular inverse of x mod M, given Mi=1/M mod 2^N."""
+ eta, f, g, d, e = -1, M, x, 0, 1
+ for _ in range((724 + N - 1) // N):
+ eta, t = divsteps_n_matrix(-eta, f % 2**N, g % 2**N)
+ f, g = update_fg(f, g, t)
+ d, e = update_de(d, e, t, M, Mi)
+ return normalize(f, d, M)
+```
+
+- To get a variable time version, replace the `divsteps_n_matrix` function with one that uses the
+ divsteps loop from section 5, and a `modinv` version that calls it without the fixed iteration
+ count:
+
+```python
+NEGINV16 = [15, 5, 3, 9, 7, 13, 11, 1] # NEGINV16[n//2] = (-n)^-1 mod 16, for odd n
+def divsteps_n_matrix_var(eta, f, g):
+ """Compute eta and transition matrix t after N divsteps (multiplied by 2^N)."""
+ u, v, q, r = 1, 0, 0, 1
+ i = N
+ while True:
+ zeros = min(i, count_trailing_zeros(g))
+ eta, i = eta - zeros, i - zeros
+ g, u, v = g >> zeros, u << zeros, v << zeros
+ if i == 0:
+ break
+ if eta < 0:
+ eta, f, u, v, g, q, r = -eta, g, q, r, -f, -u, -v
+ limit = min(min(eta + 1, i), 4)
+ w = (g * NEGINV16[(f & 15) // 2]) % (2**limit)
+ g, q, r = g + w*f, q + w*u, r + w*v
+ return eta, (u, v, q, r)
+
+def modinv_var(M, Mi, x):
+ """Compute the modular inverse of x mod M, given Mi = 1/M mod 2^N."""
+ eta, f, g, d, e = -1, M, x, 0, 1
+ while g != 0:
+ eta, t = divsteps_n_matrix_var(eta, f % 2**N, g % 2**N)
+ f, g = update_fg(f, g, t)
+ d, e = update_de(d, e, t, M, Mi)
+ return normalize(f, d, Mi)
+```
diff --git a/src/basic-config.h b/src/basic-config.h
index bb6b58259b..e4b1b8b056 100644
--- a/src/basic-config.h
+++ b/src/basic-config.h
@@ -13,19 +13,10 @@
#undef USE_ECMULT_STATIC_PRECOMPUTATION
#undef USE_EXTERNAL_ASM
#undef USE_EXTERNAL_DEFAULT_CALLBACKS
-#undef USE_FIELD_INV_BUILTIN
-#undef USE_FIELD_INV_NUM
-#undef USE_NUM_GMP
-#undef USE_NUM_NONE
-#undef USE_SCALAR_INV_BUILTIN
-#undef USE_SCALAR_INV_NUM
#undef USE_FORCE_WIDEMUL_INT64
#undef USE_FORCE_WIDEMUL_INT128
#undef ECMULT_WINDOW_SIZE
-#define USE_NUM_NONE 1
-#define USE_FIELD_INV_BUILTIN 1
-#define USE_SCALAR_INV_BUILTIN 1
#define USE_WIDEMUL_64 1
#define ECMULT_WINDOW_SIZE 15
diff --git a/src/bench_ecmult.c b/src/bench_ecmult.c
index 85b9e439ee..204e85a5dd 100644
--- a/src/bench_ecmult.c
+++ b/src/bench_ecmult.c
@@ -9,7 +9,6 @@
#include "util.h"
#include "hash_impl.h"
-#include "num_impl.h"
#include "field_impl.h"
#include "group_impl.h"
#include "scalar_impl.h"
diff --git a/src/bench_internal.c b/src/bench_internal.c
index 7fa6882c16..73b8a24ccb 100644
--- a/src/bench_internal.c
+++ b/src/bench_internal.c
@@ -10,7 +10,6 @@
#include "assumptions.h"
#include "util.h"
#include "hash_impl.h"
-#include "num_impl.h"
#include "field_impl.h"
#include "group_impl.h"
#include "scalar_impl.h"
@@ -99,15 +98,6 @@ void bench_scalar_negate(void* arg, int iters) {
}
}
-void bench_scalar_sqr(void* arg, int iters) {
- int i;
- bench_inv *data = (bench_inv*)arg;
-
- for (i = 0; i < iters; i++) {
- secp256k1_scalar_sqr(&data->scalar[0], &data->scalar[0]);
- }
-}
-
void bench_scalar_mul(void* arg, int iters) {
int i;
bench_inv *data = (bench_inv*)arg;
@@ -255,26 +245,6 @@ void bench_group_add_affine_var(void* arg, int iters) {
}
}
-void bench_group_jacobi_var(void* arg, int iters) {
- int i, j = 0;
- bench_inv *data = (bench_inv*)arg;
-
- for (i = 0; i < iters; i++) {
- j += secp256k1_gej_has_quad_y_var(&data->gej[0]);
- /* Vary the Y and Z coordinates of the input (the X coordinate doesn't matter to
- secp256k1_gej_has_quad_y_var). Note that the resulting coordinates will
- generally not correspond to a point on the curve, but this is not a problem
- for the code being benchmarked here. Adding and normalizing have less
- overhead than EC operations (which could guarantee the point remains on the
- curve). */
- secp256k1_fe_add(&data->gej[0].y, &data->fe[1]);
- secp256k1_fe_add(&data->gej[0].z, &data->fe[2]);
- secp256k1_fe_normalize_var(&data->gej[0].y);
- secp256k1_fe_normalize_var(&data->gej[0].z);
- }
- CHECK(j <= iters);
-}
-
void bench_group_to_affine_var(void* arg, int iters) {
int i;
bench_inv *data = (bench_inv*)arg;
@@ -282,8 +252,10 @@ void bench_group_to_affine_var(void* arg, int iters) {
for (i = 0; i < iters; ++i) {
secp256k1_ge_set_gej_var(&data->ge[1], &data->gej[0]);
/* Use the output affine X/Y coordinates to vary the input X/Y/Z coordinates.
- Similar to bench_group_jacobi_var, this approach does not result in
- coordinates of points on the curve. */
+ Note that the resulting coordinates will generally not correspond to a point
+ on the curve, but this is not a problem for the code being benchmarked here.
+ Adding and normalizing have less overhead than EC operations (which could
+ guarantee the point remains on the curve). */
secp256k1_fe_add(&data->gej[0].x, &data->ge[1].y);
secp256k1_fe_add(&data->gej[0].y, &data->fe[2]);
secp256k1_fe_add(&data->gej[0].z, &data->ge[1].x);
@@ -369,35 +341,16 @@ void bench_context_sign(void* arg, int iters) {
}
}
-#ifndef USE_NUM_NONE
-void bench_num_jacobi(void* arg, int iters) {
- int i, j = 0;
- bench_inv *data = (bench_inv*)arg;
- secp256k1_num nx, na, norder;
-
- secp256k1_scalar_get_num(&nx, &data->scalar[0]);
- secp256k1_scalar_order_get_num(&norder);
- secp256k1_scalar_get_num(&na, &data->scalar[1]);
-
- for (i = 0; i < iters; i++) {
- j += secp256k1_num_jacobi(&nx, &norder);
- secp256k1_num_add(&nx, &nx, &na);
- }
- CHECK(j <= iters);
-}
-#endif
-
int main(int argc, char **argv) {
bench_inv data;
int iters = get_iters(20000);
if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "add")) run_benchmark("scalar_add", bench_scalar_add, bench_setup, NULL, &data, 10, iters*100);
if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "negate")) run_benchmark("scalar_negate", bench_scalar_negate, bench_setup, NULL, &data, 10, iters*100);
- if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "sqr")) run_benchmark("scalar_sqr", bench_scalar_sqr, bench_setup, NULL, &data, 10, iters*10);
if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "mul")) run_benchmark("scalar_mul", bench_scalar_mul, bench_setup, NULL, &data, 10, iters*10);
if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "split")) run_benchmark("scalar_split", bench_scalar_split, bench_setup, NULL, &data, 10, iters);
- if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse", bench_scalar_inverse, bench_setup, NULL, &data, 10, 2000);
- if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse_var", bench_scalar_inverse_var, bench_setup, NULL, &data, 10, 2000);
+ if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse", bench_scalar_inverse, bench_setup, NULL, &data, 10, iters);
+ if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse_var", bench_scalar_inverse_var, bench_setup, NULL, &data, 10, iters);
if (have_flag(argc, argv, "field") || have_flag(argc, argv, "normalize")) run_benchmark("field_normalize", bench_field_normalize, bench_setup, NULL, &data, 10, iters*100);
if (have_flag(argc, argv, "field") || have_flag(argc, argv, "normalize")) run_benchmark("field_normalize_weak", bench_field_normalize_weak, bench_setup, NULL, &data, 10, iters*100);
@@ -411,7 +364,6 @@ int main(int argc, char **argv) {
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_var", bench_group_add_var, bench_setup, NULL, &data, 10, iters*10);
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine", bench_group_add_affine, bench_setup, NULL, &data, 10, iters*10);
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine_var", bench_group_add_affine_var, bench_setup, NULL, &data, 10, iters*10);
- if (have_flag(argc, argv, "group") || have_flag(argc, argv, "jacobi")) run_benchmark("group_jacobi_var", bench_group_jacobi_var, bench_setup, NULL, &data, 10, iters);
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "to_affine")) run_benchmark("group_to_affine_var", bench_group_to_affine_var, bench_setup, NULL, &data, 10, iters);
if (have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("wnaf_const", bench_wnaf_const, bench_setup, NULL, &data, 10, iters);
@@ -424,8 +376,5 @@ int main(int argc, char **argv) {
if (have_flag(argc, argv, "context") || have_flag(argc, argv, "verify")) run_benchmark("context_verify", bench_context_verify, bench_setup, NULL, &data, 10, 1 + iters/1000);
if (have_flag(argc, argv, "context") || have_flag(argc, argv, "sign")) run_benchmark("context_sign", bench_context_sign, bench_setup, NULL, &data, 10, 1 + iters/100);
-#ifndef USE_NUM_NONE
- if (have_flag(argc, argv, "num") || have_flag(argc, argv, "jacobi")) run_benchmark("num_jacobi", bench_num_jacobi, bench_setup, NULL, &data, 10, iters*10);
-#endif
return 0;
}
diff --git a/src/ecmult.h b/src/ecmult.h
index 7aa394a113..7ab617e20e 100644
--- a/src/ecmult.h
+++ b/src/ecmult.h
@@ -7,7 +7,6 @@
#ifndef SECP256K1_ECMULT_H
#define SECP256K1_ECMULT_H
-#include "num.h"
#include "group.h"
#include "scalar.h"
#include "scratch.h"
diff --git a/src/field.h b/src/field.h
index ee222ee5d7..c58554b53e 100644
--- a/src/field.h
+++ b/src/field.h
@@ -104,9 +104,6 @@ static void secp256k1_fe_sqr(secp256k1_fe *r, const secp256k1_fe *a);
* itself. */
static int secp256k1_fe_sqrt(secp256k1_fe *r, const secp256k1_fe *a);
-/** Checks whether a field element is a quadratic residue. */
-static int secp256k1_fe_is_quad_var(const secp256k1_fe *a);
-
/** Sets a field element to be the (modular) inverse of another. Requires the input's magnitude to be
* at most 8. The output magnitude is 1 (but not guaranteed to be normalized). */
static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *a);
diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h
index 62bffdc21b..c2802514dc 100644
--- a/src/field_10x26_impl.h
+++ b/src/field_10x26_impl.h
@@ -9,6 +9,7 @@
#include "util.h"
#include "field.h"
+#include "modinv32_impl.h"
#ifdef VERIFY
static void secp256k1_fe_verify(const secp256k1_fe *a) {
@@ -1164,4 +1165,92 @@ static SECP256K1_INLINE void secp256k1_fe_from_storage(secp256k1_fe *r, const se
#endif
}
+static void secp256k1_fe_from_signed30(secp256k1_fe *r, const secp256k1_modinv32_signed30 *a) {
+ const uint32_t M26 = UINT32_MAX >> 6;
+ const uint32_t a0 = a->v[0], a1 = a->v[1], a2 = a->v[2], a3 = a->v[3], a4 = a->v[4],
+ a5 = a->v[5], a6 = a->v[6], a7 = a->v[7], a8 = a->v[8];
+
+ /* The output from secp256k1_modinv32{_var} should be normalized to range [0,modulus), and
+ * have limbs in [0,2^30). The modulus is < 2^256, so the top limb must be below 2^(256-30*8).
+ */
+ VERIFY_CHECK(a0 >> 30 == 0);
+ VERIFY_CHECK(a1 >> 30 == 0);
+ VERIFY_CHECK(a2 >> 30 == 0);
+ VERIFY_CHECK(a3 >> 30 == 0);
+ VERIFY_CHECK(a4 >> 30 == 0);
+ VERIFY_CHECK(a5 >> 30 == 0);
+ VERIFY_CHECK(a6 >> 30 == 0);
+ VERIFY_CHECK(a7 >> 30 == 0);
+ VERIFY_CHECK(a8 >> 16 == 0);
+
+ r->n[0] = a0 & M26;
+ r->n[1] = (a0 >> 26 | a1 << 4) & M26;
+ r->n[2] = (a1 >> 22 | a2 << 8) & M26;
+ r->n[3] = (a2 >> 18 | a3 << 12) & M26;
+ r->n[4] = (a3 >> 14 | a4 << 16) & M26;
+ r->n[5] = (a4 >> 10 | a5 << 20) & M26;
+ r->n[6] = (a5 >> 6 | a6 << 24) & M26;
+ r->n[7] = (a6 >> 2 ) & M26;
+ r->n[8] = (a6 >> 28 | a7 << 2) & M26;
+ r->n[9] = (a7 >> 24 | a8 << 6);
+
+#ifdef VERIFY
+ r->magnitude = 1;
+ r->normalized = 1;
+ secp256k1_fe_verify(r);
+#endif
+}
+
+static void secp256k1_fe_to_signed30(secp256k1_modinv32_signed30 *r, const secp256k1_fe *a) {
+ const uint32_t M30 = UINT32_MAX >> 2;
+ const uint64_t a0 = a->n[0], a1 = a->n[1], a2 = a->n[2], a3 = a->n[3], a4 = a->n[4],
+ a5 = a->n[5], a6 = a->n[6], a7 = a->n[7], a8 = a->n[8], a9 = a->n[9];
+
+#ifdef VERIFY
+ VERIFY_CHECK(a->normalized);
+#endif
+
+ r->v[0] = (a0 | a1 << 26) & M30;
+ r->v[1] = (a1 >> 4 | a2 << 22) & M30;
+ r->v[2] = (a2 >> 8 | a3 << 18) & M30;
+ r->v[3] = (a3 >> 12 | a4 << 14) & M30;
+ r->v[4] = (a4 >> 16 | a5 << 10) & M30;
+ r->v[5] = (a5 >> 20 | a6 << 6) & M30;
+ r->v[6] = (a6 >> 24 | a7 << 2
+ | a8 << 28) & M30;
+ r->v[7] = (a8 >> 2 | a9 << 24) & M30;
+ r->v[8] = a9 >> 6;
+}
+
+static const secp256k1_modinv32_modinfo secp256k1_const_modinfo_fe = {
+ {{-0x3D1, -4, 0, 0, 0, 0, 0, 0, 65536}},
+ 0x2DDACACFL
+};
+
+static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *x) {
+ secp256k1_fe tmp;
+ secp256k1_modinv32_signed30 s;
+
+ tmp = *x;
+ secp256k1_fe_normalize(&tmp);
+ secp256k1_fe_to_signed30(&s, &tmp);
+ secp256k1_modinv32(&s, &secp256k1_const_modinfo_fe);
+ secp256k1_fe_from_signed30(r, &s);
+
+ VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
+}
+
+static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) {
+ secp256k1_fe tmp;
+ secp256k1_modinv32_signed30 s;
+
+ tmp = *x;
+ secp256k1_fe_normalize_var(&tmp);
+ secp256k1_fe_to_signed30(&s, &tmp);
+ secp256k1_modinv32_var(&s, &secp256k1_const_modinfo_fe);
+ secp256k1_fe_from_signed30(r, &s);
+
+ VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
+}
+
#endif /* SECP256K1_FIELD_REPR_IMPL_H */
diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h
index 3465ea3247..b73cfea206 100644
--- a/src/field_5x52_impl.h
+++ b/src/field_5x52_impl.h
@@ -13,6 +13,7 @@
#include "util.h"
#include "field.h"
+#include "modinv64_impl.h"
#if defined(USE_ASM_X86_64)
#include "field_5x52_asm_impl.h"
@@ -498,4 +499,80 @@ static SECP256K1_INLINE void secp256k1_fe_from_storage(secp256k1_fe *r, const se
#endif
}
+static void secp256k1_fe_from_signed62(secp256k1_fe *r, const secp256k1_modinv64_signed62 *a) {
+ const uint64_t M52 = UINT64_MAX >> 12;
+ const uint64_t a0 = a->v[0], a1 = a->v[1], a2 = a->v[2], a3 = a->v[3], a4 = a->v[4];
+
+ /* The output from secp256k1_modinv64{_var} should be normalized to range [0,modulus), and
+ * have limbs in [0,2^62). The modulus is < 2^256, so the top limb must be below 2^(256-62*4).
+ */
+ VERIFY_CHECK(a0 >> 62 == 0);
+ VERIFY_CHECK(a1 >> 62 == 0);
+ VERIFY_CHECK(a2 >> 62 == 0);
+ VERIFY_CHECK(a3 >> 62 == 0);
+ VERIFY_CHECK(a4 >> 8 == 0);
+
+ r->n[0] = a0 & M52;
+ r->n[1] = (a0 >> 52 | a1 << 10) & M52;
+ r->n[2] = (a1 >> 42 | a2 << 20) & M52;
+ r->n[3] = (a2 >> 32 | a3 << 30) & M52;
+ r->n[4] = (a3 >> 22 | a4 << 40);
+
+#ifdef VERIFY
+ r->magnitude = 1;
+ r->normalized = 1;
+ secp256k1_fe_verify(r);
+#endif
+}
+
+static void secp256k1_fe_to_signed62(secp256k1_modinv64_signed62 *r, const secp256k1_fe *a) {
+ const uint64_t M62 = UINT64_MAX >> 2;
+ const uint64_t a0 = a->n[0], a1 = a->n[1], a2 = a->n[2], a3 = a->n[3], a4 = a->n[4];
+
+#ifdef VERIFY
+ VERIFY_CHECK(a->normalized);
+#endif
+
+ r->v[0] = (a0 | a1 << 52) & M62;
+ r->v[1] = (a1 >> 10 | a2 << 42) & M62;
+ r->v[2] = (a2 >> 20 | a3 << 32) & M62;
+ r->v[3] = (a3 >> 30 | a4 << 22) & M62;
+ r->v[4] = a4 >> 40;
+}
+
+static const secp256k1_modinv64_modinfo secp256k1_const_modinfo_fe = {
+ {{-0x1000003D1LL, 0, 0, 0, 256}},
+ 0x27C7F6E22DDACACFLL
+};
+
+static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *x) {
+ secp256k1_fe tmp;
+ secp256k1_modinv64_signed62 s;
+
+ tmp = *x;
+ secp256k1_fe_normalize(&tmp);
+ secp256k1_fe_to_signed62(&s, &tmp);
+ secp256k1_modinv64(&s, &secp256k1_const_modinfo_fe);
+ secp256k1_fe_from_signed62(r, &s);
+
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
+#endif
+}
+
+static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) {
+ secp256k1_fe tmp;
+ secp256k1_modinv64_signed62 s;
+
+ tmp = *x;
+ secp256k1_fe_normalize_var(&tmp);
+ secp256k1_fe_to_signed62(&s, &tmp);
+ secp256k1_modinv64_var(&s, &secp256k1_const_modinfo_fe);
+ secp256k1_fe_from_signed62(r, &s);
+
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
+#endif
+}
+
#endif /* SECP256K1_FIELD_REPR_IMPL_H */
diff --git a/src/field_impl.h b/src/field_impl.h
index f0096f6312..374284a1f4 100644
--- a/src/field_impl.h
+++ b/src/field_impl.h
@@ -12,7 +12,6 @@
#endif
#include "util.h"
-#include "num.h"
#if defined(SECP256K1_WIDEMUL_INT128)
#include "field_5x52_impl.h"
@@ -136,158 +135,6 @@ static int secp256k1_fe_sqrt(secp256k1_fe *r, const secp256k1_fe *a) {
return secp256k1_fe_equal(&t1, a);
}
-static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *a) {
- secp256k1_fe x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1;
- int j;
-
- /** The binary representation of (p - 2) has 5 blocks of 1s, with lengths in
- * { 1, 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block:
- * [1], [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223]
- */
-
- secp256k1_fe_sqr(&x2, a);
- secp256k1_fe_mul(&x2, &x2, a);
-
- secp256k1_fe_sqr(&x3, &x2);
- secp256k1_fe_mul(&x3, &x3, a);
-
- x6 = x3;
- for (j=0; j<3; j++) {
- secp256k1_fe_sqr(&x6, &x6);
- }
- secp256k1_fe_mul(&x6, &x6, &x3);
-
- x9 = x6;
- for (j=0; j<3; j++) {
- secp256k1_fe_sqr(&x9, &x9);
- }
- secp256k1_fe_mul(&x9, &x9, &x3);
-
- x11 = x9;
- for (j=0; j<2; j++) {
- secp256k1_fe_sqr(&x11, &x11);
- }
- secp256k1_fe_mul(&x11, &x11, &x2);
-
- x22 = x11;
- for (j=0; j<11; j++) {
- secp256k1_fe_sqr(&x22, &x22);
- }
- secp256k1_fe_mul(&x22, &x22, &x11);
-
- x44 = x22;
- for (j=0; j<22; j++) {
- secp256k1_fe_sqr(&x44, &x44);
- }
- secp256k1_fe_mul(&x44, &x44, &x22);
-
- x88 = x44;
- for (j=0; j<44; j++) {
- secp256k1_fe_sqr(&x88, &x88);
- }
- secp256k1_fe_mul(&x88, &x88, &x44);
-
- x176 = x88;
- for (j=0; j<88; j++) {
- secp256k1_fe_sqr(&x176, &x176);
- }
- secp256k1_fe_mul(&x176, &x176, &x88);
-
- x220 = x176;
- for (j=0; j<44; j++) {
- secp256k1_fe_sqr(&x220, &x220);
- }
- secp256k1_fe_mul(&x220, &x220, &x44);
-
- x223 = x220;
- for (j=0; j<3; j++) {
- secp256k1_fe_sqr(&x223, &x223);
- }
- secp256k1_fe_mul(&x223, &x223, &x3);
-
- /* The final result is then assembled using a sliding window over the blocks. */
-
- t1 = x223;
- for (j=0; j<23; j++) {
- secp256k1_fe_sqr(&t1, &t1);
- }
- secp256k1_fe_mul(&t1, &t1, &x22);
- for (j=0; j<5; j++) {
- secp256k1_fe_sqr(&t1, &t1);
- }
- secp256k1_fe_mul(&t1, &t1, a);
- for (j=0; j<3; j++) {
- secp256k1_fe_sqr(&t1, &t1);
- }
- secp256k1_fe_mul(&t1, &t1, &x2);
- for (j=0; j<2; j++) {
- secp256k1_fe_sqr(&t1, &t1);
- }
- secp256k1_fe_mul(r, a, &t1);
-}
-
-static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *a) {
-#if defined(USE_FIELD_INV_BUILTIN)
- secp256k1_fe_inv(r, a);
-#elif defined(USE_FIELD_INV_NUM)
- secp256k1_num n, m;
- static const secp256k1_fe negone = SECP256K1_FE_CONST(
- 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
- 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL, 0xFFFFFC2EUL
- );
- /* secp256k1 field prime, value p defined in "Standards for Efficient Cryptography" (SEC2) 2.7.1. */
- static const unsigned char prime[32] = {
- 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
- 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
- 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
- 0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F
- };
- unsigned char b[32];
- int res;
- secp256k1_fe c = *a;
- secp256k1_fe_normalize_var(&c);
- secp256k1_fe_get_b32(b, &c);
- secp256k1_num_set_bin(&n, b, 32);
- secp256k1_num_set_bin(&m, prime, 32);
- secp256k1_num_mod_inverse(&n, &n, &m);
- secp256k1_num_get_bin(b, 32, &n);
- res = secp256k1_fe_set_b32(r, b);
- (void)res;
- VERIFY_CHECK(res);
- /* Verify the result is the (unique) valid inverse using non-GMP code. */
- secp256k1_fe_mul(&c, &c, r);
- secp256k1_fe_add(&c, &negone);
- CHECK(secp256k1_fe_normalizes_to_zero_var(&c));
-#else
-#error "Please select field inverse implementation"
-#endif
-}
-
-static int secp256k1_fe_is_quad_var(const secp256k1_fe *a) {
-#ifndef USE_NUM_NONE
- unsigned char b[32];
- secp256k1_num n;
- secp256k1_num m;
- /* secp256k1 field prime, value p defined in "Standards for Efficient Cryptography" (SEC2) 2.7.1. */
- static const unsigned char prime[32] = {
- 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
- 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
- 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
- 0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F
- };
-
- secp256k1_fe c = *a;
- secp256k1_fe_normalize_var(&c);
- secp256k1_fe_get_b32(b, &c);
- secp256k1_num_set_bin(&n, b, 32);
- secp256k1_num_set_bin(&m, prime, 32);
- return secp256k1_num_jacobi(&n, &m) >= 0;
-#else
- secp256k1_fe r;
- return secp256k1_fe_sqrt(&r, a);
-#endif
-}
-
static const secp256k1_fe secp256k1_fe_one = SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 1);
#endif /* SECP256K1_FIELD_IMPL_H */
diff --git a/src/group.h b/src/group.h
index 40bf96122a..b9cd334dae 100644
--- a/src/group.h
+++ b/src/group.h
@@ -7,7 +7,6 @@
#ifndef SECP256K1_GROUP_H
#define SECP256K1_GROUP_H
-#include "num.h"
#include "field.h"
/** A group element of the secp256k1 curve, in affine coordinates. */
@@ -43,12 +42,6 @@ typedef struct {
/** Set a group element equal to the point with given X and Y coordinates */
static void secp256k1_ge_set_xy(secp256k1_ge *r, const secp256k1_fe *x, const secp256k1_fe *y);
-/** Set a group element (affine) equal to the point with the given X coordinate
- * and a Y coordinate that is a quadratic residue modulo p. The return value
- * is true iff a coordinate with the given X coordinate exists.
- */
-static int secp256k1_ge_set_xquad(secp256k1_ge *r, const secp256k1_fe *x);
-
/** Set a group element (affine) equal to the point with the given X coordinate, and given oddness
* for Y. Return value indicates whether the result is valid. */
static int secp256k1_ge_set_xo_var(secp256k1_ge *r, const secp256k1_fe *x, int odd);
@@ -96,9 +89,6 @@ static void secp256k1_gej_neg(secp256k1_gej *r, const secp256k1_gej *a);
/** Check whether a group element is the point at infinity. */
static int secp256k1_gej_is_infinity(const secp256k1_gej *a);
-/** Check whether a group element's y coordinate is a quadratic residue. */
-static int secp256k1_gej_has_quad_y_var(const secp256k1_gej *a);
-
/** Set r equal to the double of a. Constant time. */
static void secp256k1_gej_double(secp256k1_gej *r, const secp256k1_gej *a);
diff --git a/src/group_impl.h b/src/group_impl.h
index b7094c5377..19ebd8f44e 100644
--- a/src/group_impl.h
+++ b/src/group_impl.h
@@ -7,7 +7,6 @@
#ifndef SECP256K1_GROUP_IMPL_H
#define SECP256K1_GROUP_IMPL_H
-#include "num.h"
#include "field.h"
#include "group.h"
@@ -207,18 +206,14 @@ static void secp256k1_ge_clear(secp256k1_ge *r) {
secp256k1_fe_clear(&r->y);
}
-static int secp256k1_ge_set_xquad(secp256k1_ge *r, const secp256k1_fe *x) {
+static int secp256k1_ge_set_xo_var(secp256k1_ge *r, const secp256k1_fe *x, int odd) {
secp256k1_fe x2, x3;
r->x = *x;
secp256k1_fe_sqr(&x2, x);
secp256k1_fe_mul(&x3, x, &x2);
r->infinity = 0;
secp256k1_fe_add(&x3, &secp256k1_fe_const_b);
- return secp256k1_fe_sqrt(&r->y, &x3);
-}
-
-static int secp256k1_ge_set_xo_var(secp256k1_ge *r, const secp256k1_fe *x, int odd) {
- if (!secp256k1_ge_set_xquad(r, x)) {
+ if (!secp256k1_fe_sqrt(&r->y, &x3)) {
return 0;
}
secp256k1_fe_normalize_var(&r->y);
@@ -655,20 +650,6 @@ static void secp256k1_ge_mul_lambda(secp256k1_ge *r, const secp256k1_ge *a) {
secp256k1_fe_mul(&r->x, &r->x, &beta);
}
-static int secp256k1_gej_has_quad_y_var(const secp256k1_gej *a) {
- secp256k1_fe yz;
-
- if (a->infinity) {
- return 0;
- }
-
- /* We rely on the fact that the Jacobi symbol of 1 / a->z^3 is the same as
- * that of a->z. Thus a->y / a->z^3 is a quadratic residue iff a->y * a->z
- is */
- secp256k1_fe_mul(&yz, &a->y, &a->z);
- return secp256k1_fe_is_quad_var(&yz);
-}
-
static int secp256k1_ge_is_in_correct_subgroup(const secp256k1_ge* ge) {
#ifdef EXHAUSTIVE_TEST_ORDER
secp256k1_gej out;
diff --git a/src/modinv32.h b/src/modinv32.h
new file mode 100644
index 0000000000..0efdda9ab5
--- /dev/null
+++ b/src/modinv32.h
@@ -0,0 +1,42 @@
+/***********************************************************************
+ * Copyright (c) 2020 Peter Dettman *
+ * Distributed under the MIT software license, see the accompanying *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef SECP256K1_MODINV32_H
+#define SECP256K1_MODINV32_H
+
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+#include "util.h"
+
+/* A signed 30-bit limb representation of integers.
+ *
+ * Its value is sum(v[i] * 2^(30*i), i=0..8). */
+typedef struct {
+ int32_t v[9];
+} secp256k1_modinv32_signed30;
+
+typedef struct {
+ /* The modulus in signed30 notation, must be odd and in [3, 2^256]. */
+ secp256k1_modinv32_signed30 modulus;
+
+ /* modulus^{-1} mod 2^30 */
+ uint32_t modulus_inv30;
+} secp256k1_modinv32_modinfo;
+
+/* Replace x with its modular inverse mod modinfo->modulus. x must be in range [0, modulus).
+ * If x is zero, the result will be zero as well. If not, the inverse must exist (i.e., the gcd of
+ * x and modulus must be 1). These rules are automatically satisfied if the modulus is prime.
+ *
+ * On output, all of x's limbs will be in [0, 2^30).
+ */
+static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo);
+
+/* Same as secp256k1_modinv32_var, but constant time in x (not in the modulus). */
+static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo);
+
+#endif /* SECP256K1_MODINV32_H */
diff --git a/src/modinv32_impl.h b/src/modinv32_impl.h
new file mode 100644
index 0000000000..aa7988c4bb
--- /dev/null
+++ b/src/modinv32_impl.h
@@ -0,0 +1,587 @@
+/***********************************************************************
+ * Copyright (c) 2020 Peter Dettman *
+ * Distributed under the MIT software license, see the accompanying *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef SECP256K1_MODINV32_IMPL_H
+#define SECP256K1_MODINV32_IMPL_H
+
+#include "modinv32.h"
+
+#include "util.h"
+
+#include
+
+/* This file implements modular inversion based on the paper "Fast constant-time gcd computation and
+ * modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
+ *
+ * For an explanation of the algorithm, see doc/safegcd_implementation.md. This file contains an
+ * implementation for N=30, using 30-bit signed limbs represented as int32_t.
+ */
+
+#ifdef VERIFY
+static const secp256k1_modinv32_signed30 SECP256K1_SIGNED30_ONE = {{1}};
+
+/* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^30). */
+static void secp256k1_modinv32_mul_30(secp256k1_modinv32_signed30 *r, const secp256k1_modinv32_signed30 *a, int alen, int32_t factor) {
+ const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
+ int64_t c = 0;
+ int i;
+ for (i = 0; i < 8; ++i) {
+ if (i < alen) c += (int64_t)a->v[i] * factor;
+ r->v[i] = (int32_t)c & M30; c >>= 30;
+ }
+ if (8 < alen) c += (int64_t)a->v[8] * factor;
+ VERIFY_CHECK(c == (int32_t)c);
+ r->v[8] = (int32_t)c;
+}
+
+/* Return -1 for ab*factor. A consists of alen limbs; b has 9. */
+static int secp256k1_modinv32_mul_cmp_30(const secp256k1_modinv32_signed30 *a, int alen, const secp256k1_modinv32_signed30 *b, int32_t factor) {
+ int i;
+ secp256k1_modinv32_signed30 am, bm;
+ secp256k1_modinv32_mul_30(&am, a, alen, 1); /* Normalize all but the top limb of a. */
+ secp256k1_modinv32_mul_30(&bm, b, 9, factor);
+ for (i = 0; i < 8; ++i) {
+ /* Verify that all but the top limb of a and b are normalized. */
+ VERIFY_CHECK(am.v[i] >> 30 == 0);
+ VERIFY_CHECK(bm.v[i] >> 30 == 0);
+ }
+ for (i = 8; i >= 0; --i) {
+ if (am.v[i] < bm.v[i]) return -1;
+ if (am.v[i] > bm.v[i]) return 1;
+ }
+ return 0;
+}
+#endif
+
+/* Take as input a signed30 number in range (-2*modulus,modulus), and add a multiple of the modulus
+ * to it to bring it to range [0,modulus). If sign < 0, the input will also be negated in the
+ * process. The input must have limbs in range (-2^30,2^30). The output will have limbs in range
+ * [0,2^30). */
+static void secp256k1_modinv32_normalize_30(secp256k1_modinv32_signed30 *r, int32_t sign, const secp256k1_modinv32_modinfo *modinfo) {
+ const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
+ int32_t r0 = r->v[0], r1 = r->v[1], r2 = r->v[2], r3 = r->v[3], r4 = r->v[4],
+ r5 = r->v[5], r6 = r->v[6], r7 = r->v[7], r8 = r->v[8];
+ int32_t cond_add, cond_negate;
+
+#ifdef VERIFY
+ /* Verify that all limbs are in range (-2^30,2^30). */
+ int i;
+ for (i = 0; i < 9; ++i) {
+ VERIFY_CHECK(r->v[i] >= -M30);
+ VERIFY_CHECK(r->v[i] <= M30);
+ }
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, -2) > 0); /* r > -2*modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
+#endif
+
+ /* In a first step, add the modulus if the input is negative, and then negate if requested.
+ * This brings r from range (-2*modulus,modulus) to range (-modulus,modulus). As all input
+ * limbs are in range (-2^30,2^30), this cannot overflow an int32_t. Note that the right
+ * shifts below are signed sign-extending shifts (see assumptions.h for tests that that is
+ * indeed the behavior of the right shift operator). */
+ cond_add = r8 >> 31;
+ r0 += modinfo->modulus.v[0] & cond_add;
+ r1 += modinfo->modulus.v[1] & cond_add;
+ r2 += modinfo->modulus.v[2] & cond_add;
+ r3 += modinfo->modulus.v[3] & cond_add;
+ r4 += modinfo->modulus.v[4] & cond_add;
+ r5 += modinfo->modulus.v[5] & cond_add;
+ r6 += modinfo->modulus.v[6] & cond_add;
+ r7 += modinfo->modulus.v[7] & cond_add;
+ r8 += modinfo->modulus.v[8] & cond_add;
+ cond_negate = sign >> 31;
+ r0 = (r0 ^ cond_negate) - cond_negate;
+ r1 = (r1 ^ cond_negate) - cond_negate;
+ r2 = (r2 ^ cond_negate) - cond_negate;
+ r3 = (r3 ^ cond_negate) - cond_negate;
+ r4 = (r4 ^ cond_negate) - cond_negate;
+ r5 = (r5 ^ cond_negate) - cond_negate;
+ r6 = (r6 ^ cond_negate) - cond_negate;
+ r7 = (r7 ^ cond_negate) - cond_negate;
+ r8 = (r8 ^ cond_negate) - cond_negate;
+ /* Propagate the top bits, to bring limbs back to range (-2^30,2^30). */
+ r1 += r0 >> 30; r0 &= M30;
+ r2 += r1 >> 30; r1 &= M30;
+ r3 += r2 >> 30; r2 &= M30;
+ r4 += r3 >> 30; r3 &= M30;
+ r5 += r4 >> 30; r4 &= M30;
+ r6 += r5 >> 30; r5 &= M30;
+ r7 += r6 >> 30; r6 &= M30;
+ r8 += r7 >> 30; r7 &= M30;
+
+ /* In a second step add the modulus again if the result is still negative, bringing r to range
+ * [0,modulus). */
+ cond_add = r8 >> 31;
+ r0 += modinfo->modulus.v[0] & cond_add;
+ r1 += modinfo->modulus.v[1] & cond_add;
+ r2 += modinfo->modulus.v[2] & cond_add;
+ r3 += modinfo->modulus.v[3] & cond_add;
+ r4 += modinfo->modulus.v[4] & cond_add;
+ r5 += modinfo->modulus.v[5] & cond_add;
+ r6 += modinfo->modulus.v[6] & cond_add;
+ r7 += modinfo->modulus.v[7] & cond_add;
+ r8 += modinfo->modulus.v[8] & cond_add;
+ /* And propagate again. */
+ r1 += r0 >> 30; r0 &= M30;
+ r2 += r1 >> 30; r1 &= M30;
+ r3 += r2 >> 30; r2 &= M30;
+ r4 += r3 >> 30; r3 &= M30;
+ r5 += r4 >> 30; r4 &= M30;
+ r6 += r5 >> 30; r5 &= M30;
+ r7 += r6 >> 30; r6 &= M30;
+ r8 += r7 >> 30; r7 &= M30;
+
+ r->v[0] = r0;
+ r->v[1] = r1;
+ r->v[2] = r2;
+ r->v[3] = r3;
+ r->v[4] = r4;
+ r->v[5] = r5;
+ r->v[6] = r6;
+ r->v[7] = r7;
+ r->v[8] = r8;
+
+#ifdef VERIFY
+ VERIFY_CHECK(r0 >> 30 == 0);
+ VERIFY_CHECK(r1 >> 30 == 0);
+ VERIFY_CHECK(r2 >> 30 == 0);
+ VERIFY_CHECK(r3 >> 30 == 0);
+ VERIFY_CHECK(r4 >> 30 == 0);
+ VERIFY_CHECK(r5 >> 30 == 0);
+ VERIFY_CHECK(r6 >> 30 == 0);
+ VERIFY_CHECK(r7 >> 30 == 0);
+ VERIFY_CHECK(r8 >> 30 == 0);
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 0) >= 0); /* r >= 0 */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
+#endif
+}
+
+/* Data type for transition matrices (see section 3 of explanation).
+ *
+ * t = [ u v ]
+ * [ q r ]
+ */
+typedef struct {
+ int32_t u, v, q, r;
+} secp256k1_modinv32_trans2x2;
+
+/* Compute the transition matrix and eta for 30 divsteps.
+ *
+ * Input: eta: initial eta
+ * f0: bottom limb of initial f
+ * g0: bottom limb of initial g
+ * Output: t: transition matrix
+ * Return: final eta
+ *
+ * Implements the divsteps_n_matrix function from the explanation.
+ */
+static int32_t secp256k1_modinv32_divsteps_30(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
+ /* u,v,q,r are the elements of the transformation matrix being built up,
+ * starting with the identity matrix. Semantically they are signed integers
+ * in range [-2^30,2^30], but here represented as unsigned mod 2^32. This
+ * permits left shifting (which is UB for negative numbers). The range
+ * being inside [-2^31,2^31) means that casting to signed works correctly.
+ */
+ uint32_t u = 1, v = 0, q = 0, r = 1;
+ uint32_t c1, c2, f = f0, g = g0, x, y, z;
+ int i;
+
+ for (i = 0; i < 30; ++i) {
+ VERIFY_CHECK((f & 1) == 1); /* f must always be odd */
+ VERIFY_CHECK((u * f0 + v * g0) == f << i);
+ VERIFY_CHECK((q * f0 + r * g0) == g << i);
+ /* Compute conditional masks for (eta < 0) and for (g & 1). */
+ c1 = eta >> 31;
+ c2 = -(g & 1);
+ /* Compute x,y,z, conditionally negated versions of f,u,v. */
+ x = (f ^ c1) - c1;
+ y = (u ^ c1) - c1;
+ z = (v ^ c1) - c1;
+ /* Conditionally add x,y,z to g,q,r. */
+ g += x & c2;
+ q += y & c2;
+ r += z & c2;
+ /* In what follows, c1 is a condition mask for (eta < 0) and (g & 1). */
+ c1 &= c2;
+ /* Conditionally negate eta, and unconditionally subtract 1. */
+ eta = (eta ^ c1) - (c1 + 1);
+ /* Conditionally add g,q,r to f,u,v. */
+ f += g & c1;
+ u += q & c1;
+ v += r & c1;
+ /* Shifts */
+ g >>= 1;
+ u <<= 1;
+ v <<= 1;
+ /* Bounds on eta that follow from the bounds on iteration count (max 25*30 divsteps). */
+ VERIFY_CHECK(eta >= -751 && eta <= 751);
+ }
+ /* Return data in t and return value. */
+ t->u = (int32_t)u;
+ t->v = (int32_t)v;
+ t->q = (int32_t)q;
+ t->r = (int32_t)r;
+ /* The determinant of t must be a power of two. This guarantees that multiplication with t
+ * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
+ * will be divided out again). As each divstep's individual matrix has determinant 2, the
+ * aggregate of 30 of them will have determinant 2^30. */
+ VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
+ return eta;
+}
+
+/* Compute the transition matrix and eta for 30 divsteps (variable time).
+ *
+ * Input: eta: initial eta
+ * f0: bottom limb of initial f
+ * g0: bottom limb of initial g
+ * Output: t: transition matrix
+ * Return: final eta
+ *
+ * Implements the divsteps_n_matrix_var function from the explanation.
+ */
+static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
+ /* inv256[i] = -(2*i+1)^-1 (mod 256) */
+ static const uint8_t inv256[128] = {
+ 0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
+ 0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
+ 0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
+ 0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
+ 0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
+ 0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
+ 0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
+ 0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
+ 0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
+ 0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
+ 0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
+ };
+
+ /* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */
+ uint32_t u = 1, v = 0, q = 0, r = 1;
+ uint32_t f = f0, g = g0, m;
+ uint16_t w;
+ int i = 30, limit, zeros;
+
+ for (;;) {
+ /* Use a sentinel bit to count zeros only up to i. */
+ zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
+ /* Perform zeros divsteps at once; they all just divide g by two. */
+ g >>= zeros;
+ u <<= zeros;
+ v <<= zeros;
+ eta -= zeros;
+ i -= zeros;
+ /* We're done once we've done 30 divsteps. */
+ if (i == 0) break;
+ VERIFY_CHECK((f & 1) == 1);
+ VERIFY_CHECK((g & 1) == 1);
+ VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
+ VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
+ /* Bounds on eta that follow from the bounds on iteration count (max 25*30 divsteps). */
+ VERIFY_CHECK(eta >= -751 && eta <= 751);
+ /* If eta is negative, negate it and replace f,g with g,-f. */
+ if (eta < 0) {
+ uint32_t tmp;
+ eta = -eta;
+ tmp = f; f = g; g = -tmp;
+ tmp = u; u = q; q = -tmp;
+ tmp = v; v = r; r = -tmp;
+ }
+ /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
+ * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
+ * can be done as its sign will flip once that happens. */
+ limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
+ /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
+ VERIFY_CHECK(limit > 0 && limit <= 30);
+ m = (UINT32_MAX >> (32 - limit)) & 255U;
+ /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
+ w = (g * inv256[(f >> 1) & 127]) & m;
+ /* Do so. */
+ g += f * w;
+ q += u * w;
+ r += v * w;
+ VERIFY_CHECK((g & m) == 0);
+ }
+ /* Return data in t and return value. */
+ t->u = (int32_t)u;
+ t->v = (int32_t)v;
+ t->q = (int32_t)q;
+ t->r = (int32_t)r;
+ /* The determinant of t must be a power of two. This guarantees that multiplication with t
+ * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
+ * will be divided out again). As each divstep's individual matrix has determinant 2, the
+ * aggregate of 30 of them will have determinant 2^30. */
+ VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
+ return eta;
+}
+
+/* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps.
+ *
+ * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
+ * (-2^30,2^30).
+ *
+ * This implements the update_de function from the explanation.
+ */
+static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp256k1_modinv32_signed30 *e, const secp256k1_modinv32_trans2x2 *t, const secp256k1_modinv32_modinfo* modinfo) {
+ const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
+ const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
+ int32_t di, ei, md, me, sd, se;
+ int64_t cd, ce;
+ int i;
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
+ VERIFY_CHECK((labs(u) + labs(v)) >= 0); /* |u|+|v| doesn't overflow */
+ VERIFY_CHECK((labs(q) + labs(r)) >= 0); /* |q|+|r| doesn't overflow */
+ VERIFY_CHECK((labs(u) + labs(v)) <= M30 + 1); /* |u|+|v| <= 2^30 */
+ VERIFY_CHECK((labs(q) + labs(r)) <= M30 + 1); /* |q|+|r| <= 2^30 */
+#endif
+ /* [md,me] start as zero; plus [u,q] if d is negative; plus [v,r] if e is negative. */
+ sd = d->v[8] >> 31;
+ se = e->v[8] >> 31;
+ md = (u & sd) + (v & se);
+ me = (q & sd) + (r & se);
+ /* Begin computing t*[d,e]. */
+ di = d->v[0];
+ ei = e->v[0];
+ cd = (int64_t)u * di + (int64_t)v * ei;
+ ce = (int64_t)q * di + (int64_t)r * ei;
+ /* Correct md,me so that t*[d,e]+modulus*[md,me] has 30 zero bottom bits. */
+ md -= (modinfo->modulus_inv30 * (uint32_t)cd + md) & M30;
+ me -= (modinfo->modulus_inv30 * (uint32_t)ce + me) & M30;
+ /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
+ cd += (int64_t)modinfo->modulus.v[0] * md;
+ ce += (int64_t)modinfo->modulus.v[0] * me;
+ /* Verify that the low 30 bits of the computation are indeed zero, and then throw them away. */
+ VERIFY_CHECK(((int32_t)cd & M30) == 0); cd >>= 30;
+ VERIFY_CHECK(((int32_t)ce & M30) == 0); ce >>= 30;
+ /* Now iteratively compute limb i=1..8 of t*[d,e]+modulus*[md,me], and store them in output
+ * limb i-1 (shifting down by 30 bits). */
+ for (i = 1; i < 9; ++i) {
+ di = d->v[i];
+ ei = e->v[i];
+ cd += (int64_t)u * di + (int64_t)v * ei;
+ ce += (int64_t)q * di + (int64_t)r * ei;
+ cd += (int64_t)modinfo->modulus.v[i] * md;
+ ce += (int64_t)modinfo->modulus.v[i] * me;
+ d->v[i - 1] = (int32_t)cd & M30; cd >>= 30;
+ e->v[i - 1] = (int32_t)ce & M30; ce >>= 30;
+ }
+ /* What remains is limb 9 of t*[d,e]+modulus*[md,me]; store it as output limb 8. */
+ d->v[8] = (int32_t)cd;
+ e->v[8] = (int32_t)ce;
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
+#endif
+}
+
+/* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
+ *
+ * This implements the update_fg function from the explanation.
+ */
+static void secp256k1_modinv32_update_fg_30(secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t) {
+ const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
+ const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
+ int32_t fi, gi;
+ int64_t cf, cg;
+ int i;
+ /* Start computing t*[f,g]. */
+ fi = f->v[0];
+ gi = g->v[0];
+ cf = (int64_t)u * fi + (int64_t)v * gi;
+ cg = (int64_t)q * fi + (int64_t)r * gi;
+ /* Verify that the bottom 30 bits of the result are zero, and then throw them away. */
+ VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
+ VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
+ /* Now iteratively compute limb i=1..8 of t*[f,g], and store them in output limb i-1 (shifting
+ * down by 30 bits). */
+ for (i = 1; i < 9; ++i) {
+ fi = f->v[i];
+ gi = g->v[i];
+ cf += (int64_t)u * fi + (int64_t)v * gi;
+ cg += (int64_t)q * fi + (int64_t)r * gi;
+ f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
+ g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
+ }
+ /* What remains is limb 9 of t*[f,g]; store it as output limb 8. */
+ f->v[8] = (int32_t)cf;
+ g->v[8] = (int32_t)cg;
+}
+
+/* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
+ *
+ * Version that operates on a variable number of limbs in f and g.
+ *
+ * This implements the update_fg function from the explanation in modinv64_impl.h.
+ */
+static void secp256k1_modinv32_update_fg_30_var(int len, secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t) {
+ const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
+ const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
+ int32_t fi, gi;
+ int64_t cf, cg;
+ int i;
+ VERIFY_CHECK(len > 0);
+ /* Start computing t*[f,g]. */
+ fi = f->v[0];
+ gi = g->v[0];
+ cf = (int64_t)u * fi + (int64_t)v * gi;
+ cg = (int64_t)q * fi + (int64_t)r * gi;
+ /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
+ VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
+ VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
+ /* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting
+ * down by 30 bits). */
+ for (i = 1; i < len; ++i) {
+ fi = f->v[i];
+ gi = g->v[i];
+ cf += (int64_t)u * fi + (int64_t)v * gi;
+ cg += (int64_t)q * fi + (int64_t)r * gi;
+ f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
+ g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
+ }
+ /* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */
+ f->v[len - 1] = (int32_t)cf;
+ g->v[len - 1] = (int32_t)cg;
+}
+
+/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
+static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) {
+ /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */
+ secp256k1_modinv32_signed30 d = {{0}};
+ secp256k1_modinv32_signed30 e = {{1}};
+ secp256k1_modinv32_signed30 f = modinfo->modulus;
+ secp256k1_modinv32_signed30 g = *x;
+ int i;
+ int32_t eta = -1;
+
+ /* Do 25 iterations of 30 divsteps each = 750 divsteps. 724 suffices for 256-bit inputs. */
+ for (i = 0; i < 25; ++i) {
+ /* Compute transition matrix and new eta after 30 divsteps. */
+ secp256k1_modinv32_trans2x2 t;
+ eta = secp256k1_modinv32_divsteps_30(eta, f.v[0], g.v[0], &t);
+ /* Update d,e using that transition matrix. */
+ secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
+ /* Update f,g using that transition matrix. */
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
+#endif
+ secp256k1_modinv32_update_fg_30(&f, &g, &t);
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
+#endif
+ }
+
+ /* At this point sufficient iterations have been performed that g must have reached 0
+ * and (if g was not originally 0) f must now equal +/- GCD of the initial f, g
+ * values i.e. +/- 1, and d now contains +/- the modular inverse. */
+#ifdef VERIFY
+ /* g == 0 */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &SECP256K1_SIGNED30_ONE, 0) == 0);
+ /* |f| == 1, or (x == 0 and d == 0 and |f|=modulus) */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
+ secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
+ (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
+ secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
+ (secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) == 0 ||
+ secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) == 0)));
+#endif
+
+ /* Optionally negate d, normalize to [0,modulus), and return it. */
+ secp256k1_modinv32_normalize_30(&d, f.v[8], modinfo);
+ *x = d;
+}
+
+/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (variable time). */
+static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) {
+ /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */
+ secp256k1_modinv32_signed30 d = {{0, 0, 0, 0, 0, 0, 0, 0, 0}};
+ secp256k1_modinv32_signed30 e = {{1, 0, 0, 0, 0, 0, 0, 0, 0}};
+ secp256k1_modinv32_signed30 f = modinfo->modulus;
+ secp256k1_modinv32_signed30 g = *x;
+#ifdef VERIFY
+ int i = 0;
+#endif
+ int j, len = 9;
+ int32_t eta = -1;
+ int32_t cond, fn, gn;
+
+ /* Do iterations of 30 divsteps each until g=0. */
+ while (1) {
+ /* Compute transition matrix and new eta after 30 divsteps. */
+ secp256k1_modinv32_trans2x2 t;
+ eta = secp256k1_modinv32_divsteps_30_var(eta, f.v[0], g.v[0], &t);
+ /* Update d,e using that transition matrix. */
+ secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
+ /* Update f,g using that transition matrix. */
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
+#endif
+ secp256k1_modinv32_update_fg_30_var(len, &f, &g, &t);
+ /* If the bottom limb of g is 0, there is a chance g=0. */
+ if (g.v[0] == 0) {
+ cond = 0;
+ /* Check if all other limbs are also 0. */
+ for (j = 1; j < len; ++j) {
+ cond |= g.v[j];
+ }
+ /* If so, we're done. */
+ if (cond == 0) break;
+ }
+
+ /* Determine if len>1 and limb (len-1) of both f and g is 0 or -1. */
+ fn = f.v[len - 1];
+ gn = g.v[len - 1];
+ cond = ((int32_t)len - 2) >> 31;
+ cond |= fn ^ (fn >> 31);
+ cond |= gn ^ (gn >> 31);
+ /* If so, reduce length, propagating the sign of f and g's top limb into the one below. */
+ if (cond == 0) {
+ f.v[len - 2] |= (uint32_t)fn << 30;
+ g.v[len - 2] |= (uint32_t)gn << 30;
+ --len;
+ }
+#ifdef VERIFY
+ VERIFY_CHECK(++i < 25); /* We should never need more than 25*30 = 750 divsteps */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
+#endif
+ }
+
+ /* At this point g is 0 and (if g was not originally 0) f must now equal +/- GCD of
+ * the initial f, g values i.e. +/- 1, and d now contains +/- the modular inverse. */
+#ifdef VERIFY
+ /* g == 0 */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &SECP256K1_SIGNED30_ONE, 0) == 0);
+ /* |f| == 1, or (x == 0 and d == 0 and |f|=modulus) */
+ VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
+ secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
+ (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
+ secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
+ (secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) == 0 ||
+ secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) == 0)));
+#endif
+
+ /* Optionally negate d, normalize to [0,modulus), and return it. */
+ secp256k1_modinv32_normalize_30(&d, f.v[len - 1], modinfo);
+ *x = d;
+}
+
+#endif /* SECP256K1_MODINV32_IMPL_H */
diff --git a/src/modinv64.h b/src/modinv64.h
new file mode 100644
index 0000000000..da506dfa9f
--- /dev/null
+++ b/src/modinv64.h
@@ -0,0 +1,46 @@
+/***********************************************************************
+ * Copyright (c) 2020 Peter Dettman *
+ * Distributed under the MIT software license, see the accompanying *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef SECP256K1_MODINV64_H
+#define SECP256K1_MODINV64_H
+
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+#include "util.h"
+
+#ifndef SECP256K1_WIDEMUL_INT128
+#error "modinv64 requires 128-bit wide multiplication support"
+#endif
+
+/* A signed 62-bit limb representation of integers.
+ *
+ * Its value is sum(v[i] * 2^(62*i), i=0..4). */
+typedef struct {
+ int64_t v[5];
+} secp256k1_modinv64_signed62;
+
+typedef struct {
+ /* The modulus in signed62 notation, must be odd and in [3, 2^256]. */
+ secp256k1_modinv64_signed62 modulus;
+
+ /* modulus^{-1} mod 2^62 */
+ uint64_t modulus_inv62;
+} secp256k1_modinv64_modinfo;
+
+/* Replace x with its modular inverse mod modinfo->modulus. x must be in range [0, modulus).
+ * If x is zero, the result will be zero as well. If not, the inverse must exist (i.e., the gcd of
+ * x and modulus must be 1). These rules are automatically satisfied if the modulus is prime.
+ *
+ * On output, all of x's limbs will be in [0, 2^62).
+ */
+static void secp256k1_modinv64_var(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo);
+
+/* Same as secp256k1_modinv64_var, but constant time in x (not in the modulus). */
+static void secp256k1_modinv64(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo);
+
+#endif /* SECP256K1_MODINV64_H */
diff --git a/src/modinv64_impl.h b/src/modinv64_impl.h
new file mode 100644
index 0000000000..78505fa183
--- /dev/null
+++ b/src/modinv64_impl.h
@@ -0,0 +1,589 @@
+/***********************************************************************
+ * Copyright (c) 2020 Peter Dettman *
+ * Distributed under the MIT software license, see the accompanying *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef SECP256K1_MODINV64_IMPL_H
+#define SECP256K1_MODINV64_IMPL_H
+
+#include "modinv64.h"
+
+#include "util.h"
+
+/* This file implements modular inversion based on the paper "Fast constant-time gcd computation and
+ * modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
+ *
+ * For an explanation of the algorithm, see doc/safegcd_implementation.md. This file contains an
+ * implementation for N=62, using 62-bit signed limbs represented as int64_t.
+ */
+
+#ifdef VERIFY
+/* Helper function to compute the absolute value of an int64_t.
+ * (we don't use abs/labs/llabs as it depends on the int sizes). */
+static int64_t secp256k1_modinv64_abs(int64_t v) {
+ VERIFY_CHECK(v > INT64_MIN);
+ if (v < 0) return -v;
+ return v;
+}
+
+static const secp256k1_modinv64_signed62 SECP256K1_SIGNED62_ONE = {{1}};
+
+/* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^62). */
+static void secp256k1_modinv64_mul_62(secp256k1_modinv64_signed62 *r, const secp256k1_modinv64_signed62 *a, int alen, int64_t factor) {
+ const int64_t M62 = (int64_t)(UINT64_MAX >> 2);
+ int128_t c = 0;
+ int i;
+ for (i = 0; i < 4; ++i) {
+ if (i < alen) c += (int128_t)a->v[i] * factor;
+ r->v[i] = (int64_t)c & M62; c >>= 62;
+ }
+ if (4 < alen) c += (int128_t)a->v[4] * factor;
+ VERIFY_CHECK(c == (int64_t)c);
+ r->v[4] = (int64_t)c;
+}
+
+/* Return -1 for ab*factor. A has alen limbs; b has 5. */
+static int secp256k1_modinv64_mul_cmp_62(const secp256k1_modinv64_signed62 *a, int alen, const secp256k1_modinv64_signed62 *b, int64_t factor) {
+ int i;
+ secp256k1_modinv64_signed62 am, bm;
+ secp256k1_modinv64_mul_62(&am, a, alen, 1); /* Normalize all but the top limb of a. */
+ secp256k1_modinv64_mul_62(&bm, b, 5, factor);
+ for (i = 0; i < 4; ++i) {
+ /* Verify that all but the top limb of a and b are normalized. */
+ VERIFY_CHECK(am.v[i] >> 62 == 0);
+ VERIFY_CHECK(bm.v[i] >> 62 == 0);
+ }
+ for (i = 4; i >= 0; --i) {
+ if (am.v[i] < bm.v[i]) return -1;
+ if (am.v[i] > bm.v[i]) return 1;
+ }
+ return 0;
+}
+#endif
+
+/* Take as input a signed62 number in range (-2*modulus,modulus), and add a multiple of the modulus
+ * to it to bring it to range [0,modulus). If sign < 0, the input will also be negated in the
+ * process. The input must have limbs in range (-2^62,2^62). The output will have limbs in range
+ * [0,2^62). */
+static void secp256k1_modinv64_normalize_62(secp256k1_modinv64_signed62 *r, int64_t sign, const secp256k1_modinv64_modinfo *modinfo) {
+ const int64_t M62 = (int64_t)(UINT64_MAX >> 2);
+ int64_t r0 = r->v[0], r1 = r->v[1], r2 = r->v[2], r3 = r->v[3], r4 = r->v[4];
+ int64_t cond_add, cond_negate;
+
+#ifdef VERIFY
+ /* Verify that all limbs are in range (-2^62,2^62). */
+ int i;
+ for (i = 0; i < 5; ++i) {
+ VERIFY_CHECK(r->v[i] >= -M62);
+ VERIFY_CHECK(r->v[i] <= M62);
+ }
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(r, 5, &modinfo->modulus, -2) > 0); /* r > -2*modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(r, 5, &modinfo->modulus, 1) < 0); /* r < modulus */
+#endif
+
+ /* In a first step, add the modulus if the input is negative, and then negate if requested.
+ * This brings r from range (-2*modulus,modulus) to range (-modulus,modulus). As all input
+ * limbs are in range (-2^62,2^62), this cannot overflow an int64_t. Note that the right
+ * shifts below are signed sign-extending shifts (see assumptions.h for tests that that is
+ * indeed the behavior of the right shift operator). */
+ cond_add = r4 >> 63;
+ r0 += modinfo->modulus.v[0] & cond_add;
+ r1 += modinfo->modulus.v[1] & cond_add;
+ r2 += modinfo->modulus.v[2] & cond_add;
+ r3 += modinfo->modulus.v[3] & cond_add;
+ r4 += modinfo->modulus.v[4] & cond_add;
+ cond_negate = sign >> 63;
+ r0 = (r0 ^ cond_negate) - cond_negate;
+ r1 = (r1 ^ cond_negate) - cond_negate;
+ r2 = (r2 ^ cond_negate) - cond_negate;
+ r3 = (r3 ^ cond_negate) - cond_negate;
+ r4 = (r4 ^ cond_negate) - cond_negate;
+ /* Propagate the top bits, to bring limbs back to range (-2^62,2^62). */
+ r1 += r0 >> 62; r0 &= M62;
+ r2 += r1 >> 62; r1 &= M62;
+ r3 += r2 >> 62; r2 &= M62;
+ r4 += r3 >> 62; r3 &= M62;
+
+ /* In a second step add the modulus again if the result is still negative, bringing
+ * r to range [0,modulus). */
+ cond_add = r4 >> 63;
+ r0 += modinfo->modulus.v[0] & cond_add;
+ r1 += modinfo->modulus.v[1] & cond_add;
+ r2 += modinfo->modulus.v[2] & cond_add;
+ r3 += modinfo->modulus.v[3] & cond_add;
+ r4 += modinfo->modulus.v[4] & cond_add;
+ /* And propagate again. */
+ r1 += r0 >> 62; r0 &= M62;
+ r2 += r1 >> 62; r1 &= M62;
+ r3 += r2 >> 62; r2 &= M62;
+ r4 += r3 >> 62; r3 &= M62;
+
+ r->v[0] = r0;
+ r->v[1] = r1;
+ r->v[2] = r2;
+ r->v[3] = r3;
+ r->v[4] = r4;
+
+#ifdef VERIFY
+ VERIFY_CHECK(r0 >> 62 == 0);
+ VERIFY_CHECK(r1 >> 62 == 0);
+ VERIFY_CHECK(r2 >> 62 == 0);
+ VERIFY_CHECK(r3 >> 62 == 0);
+ VERIFY_CHECK(r4 >> 62 == 0);
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(r, 5, &modinfo->modulus, 0) >= 0); /* r >= 0 */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(r, 5, &modinfo->modulus, 1) < 0); /* r < modulus */
+#endif
+}
+
+/* Data type for transition matrices (see section 3 of explanation).
+ *
+ * t = [ u v ]
+ * [ q r ]
+ */
+typedef struct {
+ int64_t u, v, q, r;
+} secp256k1_modinv64_trans2x2;
+
+/* Compute the transition matrix and eta for 62 divsteps.
+ *
+ * Input: eta: initial eta
+ * f0: bottom limb of initial f
+ * g0: bottom limb of initial g
+ * Output: t: transition matrix
+ * Return: final eta
+ *
+ * Implements the divsteps_n_matrix function from the explanation.
+ */
+static int64_t secp256k1_modinv64_divsteps_62(int64_t eta, uint64_t f0, uint64_t g0, secp256k1_modinv64_trans2x2 *t) {
+ /* u,v,q,r are the elements of the transformation matrix being built up,
+ * starting with the identity matrix. Semantically they are signed integers
+ * in range [-2^62,2^62], but here represented as unsigned mod 2^64. This
+ * permits left shifting (which is UB for negative numbers). The range
+ * being inside [-2^63,2^63) means that casting to signed works correctly.
+ */
+ uint64_t u = 1, v = 0, q = 0, r = 1;
+ uint64_t c1, c2, f = f0, g = g0, x, y, z;
+ int i;
+
+ for (i = 0; i < 62; ++i) {
+ VERIFY_CHECK((f & 1) == 1); /* f must always be odd */
+ VERIFY_CHECK((u * f0 + v * g0) == f << i);
+ VERIFY_CHECK((q * f0 + r * g0) == g << i);
+ /* Compute conditional masks for (eta < 0) and for (g & 1). */
+ c1 = eta >> 63;
+ c2 = -(g & 1);
+ /* Compute x,y,z, conditionally negated versions of f,u,v. */
+ x = (f ^ c1) - c1;
+ y = (u ^ c1) - c1;
+ z = (v ^ c1) - c1;
+ /* Conditionally add x,y,z to g,q,r. */
+ g += x & c2;
+ q += y & c2;
+ r += z & c2;
+ /* In what follows, c1 is a condition mask for (eta < 0) and (g & 1). */
+ c1 &= c2;
+ /* Conditionally negate eta, and unconditionally subtract 1. */
+ eta = (eta ^ c1) - (c1 + 1);
+ /* Conditionally add g,q,r to f,u,v. */
+ f += g & c1;
+ u += q & c1;
+ v += r & c1;
+ /* Shifts */
+ g >>= 1;
+ u <<= 1;
+ v <<= 1;
+ /* Bounds on eta that follow from the bounds on iteration count (max 12*62 divsteps). */
+ VERIFY_CHECK(eta >= -745 && eta <= 745);
+ }
+ /* Return data in t and return value. */
+ t->u = (int64_t)u;
+ t->v = (int64_t)v;
+ t->q = (int64_t)q;
+ t->r = (int64_t)r;
+ /* The determinant of t must be a power of two. This guarantees that multiplication with t
+ * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
+ * will be divided out again). As each divstep's individual matrix has determinant 2, the
+ * aggregate of 62 of them will have determinant 2^62. */
+ VERIFY_CHECK((int128_t)t->u * t->r - (int128_t)t->v * t->q == ((int128_t)1) << 62);
+ return eta;
+}
+
+/* Compute the transition matrix and eta for 62 divsteps (variable time).
+ *
+ * Input: eta: initial eta
+ * f0: bottom limb of initial f
+ * g0: bottom limb of initial g
+ * Output: t: transition matrix
+ * Return: final eta
+ *
+ * Implements the divsteps_n_matrix_var function from the explanation.
+ */
+static int64_t secp256k1_modinv64_divsteps_62_var(int64_t eta, uint64_t f0, uint64_t g0, secp256k1_modinv64_trans2x2 *t) {
+ /* Transformation matrix; see comments in secp256k1_modinv64_divsteps_62. */
+ uint64_t u = 1, v = 0, q = 0, r = 1;
+ uint64_t f = f0, g = g0, m;
+ uint32_t w;
+ int i = 62, limit, zeros;
+
+ for (;;) {
+ /* Use a sentinel bit to count zeros only up to i. */
+ zeros = secp256k1_ctz64_var(g | (UINT64_MAX << i));
+ /* Perform zeros divsteps at once; they all just divide g by two. */
+ g >>= zeros;
+ u <<= zeros;
+ v <<= zeros;
+ eta -= zeros;
+ i -= zeros;
+ /* We're done once we've done 62 divsteps. */
+ if (i == 0) break;
+ VERIFY_CHECK((f & 1) == 1);
+ VERIFY_CHECK((g & 1) == 1);
+ VERIFY_CHECK((u * f0 + v * g0) == f << (62 - i));
+ VERIFY_CHECK((q * f0 + r * g0) == g << (62 - i));
+ /* Bounds on eta that follow from the bounds on iteration count (max 12*62 divsteps). */
+ VERIFY_CHECK(eta >= -745 && eta <= 745);
+ /* If eta is negative, negate it and replace f,g with g,-f. */
+ if (eta < 0) {
+ uint64_t tmp;
+ eta = -eta;
+ tmp = f; f = g; g = -tmp;
+ tmp = u; u = q; q = -tmp;
+ tmp = v; v = r; r = -tmp;
+ /* Use a formula to cancel out up to 6 bits of g. Also, no more than i can be cancelled
+ * out (as we'd be done before that point), and no more than eta+1 can be done as its
+ * will flip again once that happens. */
+ limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
+ VERIFY_CHECK(limit > 0 && limit <= 62);
+ /* m is a mask for the bottom min(limit, 6) bits. */
+ m = (UINT64_MAX >> (64 - limit)) & 63U;
+ /* Find what multiple of f must be added to g to cancel its bottom min(limit, 6)
+ * bits. */
+ w = (f * g * (f * f - 2)) & m;
+ } else {
+ /* In this branch, use a simpler formula that only lets us cancel up to 4 bits of g, as
+ * eta tends to be smaller here. */
+ limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
+ VERIFY_CHECK(limit > 0 && limit <= 62);
+ /* m is a mask for the bottom min(limit, 4) bits. */
+ m = (UINT64_MAX >> (64 - limit)) & 15U;
+ /* Find what multiple of f must be added to g to cancel its bottom min(limit, 4)
+ * bits. */
+ w = f + (((f + 1) & 4) << 1);
+ w = (-w * g) & m;
+ }
+ g += f * w;
+ q += u * w;
+ r += v * w;
+ VERIFY_CHECK((g & m) == 0);
+ }
+ /* Return data in t and return value. */
+ t->u = (int64_t)u;
+ t->v = (int64_t)v;
+ t->q = (int64_t)q;
+ t->r = (int64_t)r;
+ /* The determinant of t must be a power of two. This guarantees that multiplication with t
+ * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
+ * will be divided out again). As each divstep's individual matrix has determinant 2, the
+ * aggregate of 62 of them will have determinant 2^62. */
+ VERIFY_CHECK((int128_t)t->u * t->r - (int128_t)t->v * t->q == ((int128_t)1) << 62);
+ return eta;
+}
+
+/* Compute (t/2^62) * [d, e] mod modulus, where t is a transition matrix for 62 divsteps.
+ *
+ * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
+ * (-2^62,2^62).
+ *
+ * This implements the update_de function from the explanation.
+ */
+static void secp256k1_modinv64_update_de_62(secp256k1_modinv64_signed62 *d, secp256k1_modinv64_signed62 *e, const secp256k1_modinv64_trans2x2 *t, const secp256k1_modinv64_modinfo* modinfo) {
+ const int64_t M62 = (int64_t)(UINT64_MAX >> 2);
+ const int64_t d0 = d->v[0], d1 = d->v[1], d2 = d->v[2], d3 = d->v[3], d4 = d->v[4];
+ const int64_t e0 = e->v[0], e1 = e->v[1], e2 = e->v[2], e3 = e->v[3], e4 = e->v[4];
+ const int64_t u = t->u, v = t->v, q = t->q, r = t->r;
+ int64_t md, me, sd, se;
+ int128_t cd, ce;
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, 1) < 0); /* d < modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(e, 5, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(e, 5, &modinfo->modulus, 1) < 0); /* e < modulus */
+ VERIFY_CHECK((secp256k1_modinv64_abs(u) + secp256k1_modinv64_abs(v)) >= 0); /* |u|+|v| doesn't overflow */
+ VERIFY_CHECK((secp256k1_modinv64_abs(q) + secp256k1_modinv64_abs(r)) >= 0); /* |q|+|r| doesn't overflow */
+ VERIFY_CHECK((secp256k1_modinv64_abs(u) + secp256k1_modinv64_abs(v)) <= M62 + 1); /* |u|+|v| <= 2^62 */
+ VERIFY_CHECK((secp256k1_modinv64_abs(q) + secp256k1_modinv64_abs(r)) <= M62 + 1); /* |q|+|r| <= 2^62 */
+#endif
+ /* [md,me] start as zero; plus [u,q] if d is negative; plus [v,r] if e is negative. */
+ sd = d4 >> 63;
+ se = e4 >> 63;
+ md = (u & sd) + (v & se);
+ me = (q & sd) + (r & se);
+ /* Begin computing t*[d,e]. */
+ cd = (int128_t)u * d0 + (int128_t)v * e0;
+ ce = (int128_t)q * d0 + (int128_t)r * e0;
+ /* Correct md,me so that t*[d,e]+modulus*[md,me] has 62 zero bottom bits. */
+ md -= (modinfo->modulus_inv62 * (uint64_t)cd + md) & M62;
+ me -= (modinfo->modulus_inv62 * (uint64_t)ce + me) & M62;
+ /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
+ cd += (int128_t)modinfo->modulus.v[0] * md;
+ ce += (int128_t)modinfo->modulus.v[0] * me;
+ /* Verify that the low 62 bits of the computation are indeed zero, and then throw them away. */
+ VERIFY_CHECK(((int64_t)cd & M62) == 0); cd >>= 62;
+ VERIFY_CHECK(((int64_t)ce & M62) == 0); ce >>= 62;
+ /* Compute limb 1 of t*[d,e]+modulus*[md,me], and store it as output limb 0 (= down shift). */
+ cd += (int128_t)u * d1 + (int128_t)v * e1;
+ ce += (int128_t)q * d1 + (int128_t)r * e1;
+ if (modinfo->modulus.v[1]) { /* Optimize for the case where limb of modulus is zero. */
+ cd += (int128_t)modinfo->modulus.v[1] * md;
+ ce += (int128_t)modinfo->modulus.v[1] * me;
+ }
+ d->v[0] = (int64_t)cd & M62; cd >>= 62;
+ e->v[0] = (int64_t)ce & M62; ce >>= 62;
+ /* Compute limb 2 of t*[d,e]+modulus*[md,me], and store it as output limb 1. */
+ cd += (int128_t)u * d2 + (int128_t)v * e2;
+ ce += (int128_t)q * d2 + (int128_t)r * e2;
+ if (modinfo->modulus.v[2]) { /* Optimize for the case where limb of modulus is zero. */
+ cd += (int128_t)modinfo->modulus.v[2] * md;
+ ce += (int128_t)modinfo->modulus.v[2] * me;
+ }
+ d->v[1] = (int64_t)cd & M62; cd >>= 62;
+ e->v[1] = (int64_t)ce & M62; ce >>= 62;
+ /* Compute limb 3 of t*[d,e]+modulus*[md,me], and store it as output limb 2. */
+ cd += (int128_t)u * d3 + (int128_t)v * e3;
+ ce += (int128_t)q * d3 + (int128_t)r * e3;
+ if (modinfo->modulus.v[3]) { /* Optimize for the case where limb of modulus is zero. */
+ cd += (int128_t)modinfo->modulus.v[3] * md;
+ ce += (int128_t)modinfo->modulus.v[3] * me;
+ }
+ d->v[2] = (int64_t)cd & M62; cd >>= 62;
+ e->v[2] = (int64_t)ce & M62; ce >>= 62;
+ /* Compute limb 4 of t*[d,e]+modulus*[md,me], and store it as output limb 3. */
+ cd += (int128_t)u * d4 + (int128_t)v * e4;
+ ce += (int128_t)q * d4 + (int128_t)r * e4;
+ cd += (int128_t)modinfo->modulus.v[4] * md;
+ ce += (int128_t)modinfo->modulus.v[4] * me;
+ d->v[3] = (int64_t)cd & M62; cd >>= 62;
+ e->v[3] = (int64_t)ce & M62; ce >>= 62;
+ /* What remains is limb 5 of t*[d,e]+modulus*[md,me]; store it as output limb 4. */
+ d->v[4] = (int64_t)cd;
+ e->v[4] = (int64_t)ce;
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, 1) < 0); /* d < modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(e, 5, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(e, 5, &modinfo->modulus, 1) < 0); /* e < modulus */
+#endif
+}
+
+/* Compute (t/2^62) * [f, g], where t is a transition matrix for 62 divsteps.
+ *
+ * This implements the update_fg function from the explanation.
+ */
+static void secp256k1_modinv64_update_fg_62(secp256k1_modinv64_signed62 *f, secp256k1_modinv64_signed62 *g, const secp256k1_modinv64_trans2x2 *t) {
+ const int64_t M62 = (int64_t)(UINT64_MAX >> 2);
+ const int64_t f0 = f->v[0], f1 = f->v[1], f2 = f->v[2], f3 = f->v[3], f4 = f->v[4];
+ const int64_t g0 = g->v[0], g1 = g->v[1], g2 = g->v[2], g3 = g->v[3], g4 = g->v[4];
+ const int64_t u = t->u, v = t->v, q = t->q, r = t->r;
+ int128_t cf, cg;
+ /* Start computing t*[f,g]. */
+ cf = (int128_t)u * f0 + (int128_t)v * g0;
+ cg = (int128_t)q * f0 + (int128_t)r * g0;
+ /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
+ VERIFY_CHECK(((int64_t)cf & M62) == 0); cf >>= 62;
+ VERIFY_CHECK(((int64_t)cg & M62) == 0); cg >>= 62;
+ /* Compute limb 1 of t*[f,g], and store it as output limb 0 (= down shift). */
+ cf += (int128_t)u * f1 + (int128_t)v * g1;
+ cg += (int128_t)q * f1 + (int128_t)r * g1;
+ f->v[0] = (int64_t)cf & M62; cf >>= 62;
+ g->v[0] = (int64_t)cg & M62; cg >>= 62;
+ /* Compute limb 2 of t*[f,g], and store it as output limb 1. */
+ cf += (int128_t)u * f2 + (int128_t)v * g2;
+ cg += (int128_t)q * f2 + (int128_t)r * g2;
+ f->v[1] = (int64_t)cf & M62; cf >>= 62;
+ g->v[1] = (int64_t)cg & M62; cg >>= 62;
+ /* Compute limb 3 of t*[f,g], and store it as output limb 2. */
+ cf += (int128_t)u * f3 + (int128_t)v * g3;
+ cg += (int128_t)q * f3 + (int128_t)r * g3;
+ f->v[2] = (int64_t)cf & M62; cf >>= 62;
+ g->v[2] = (int64_t)cg & M62; cg >>= 62;
+ /* Compute limb 4 of t*[f,g], and store it as output limb 3. */
+ cf += (int128_t)u * f4 + (int128_t)v * g4;
+ cg += (int128_t)q * f4 + (int128_t)r * g4;
+ f->v[3] = (int64_t)cf & M62; cf >>= 62;
+ g->v[3] = (int64_t)cg & M62; cg >>= 62;
+ /* What remains is limb 5 of t*[f,g]; store it as output limb 4. */
+ f->v[4] = (int64_t)cf;
+ g->v[4] = (int64_t)cg;
+}
+
+/* Compute (t/2^62) * [f, g], where t is a transition matrix for 62 divsteps.
+ *
+ * Version that operates on a variable number of limbs in f and g.
+ *
+ * This implements the update_fg function from the explanation.
+ */
+static void secp256k1_modinv64_update_fg_62_var(int len, secp256k1_modinv64_signed62 *f, secp256k1_modinv64_signed62 *g, const secp256k1_modinv64_trans2x2 *t) {
+ const int64_t M62 = (int64_t)(UINT64_MAX >> 2);
+ const int64_t u = t->u, v = t->v, q = t->q, r = t->r;
+ int64_t fi, gi;
+ int128_t cf, cg;
+ int i;
+ VERIFY_CHECK(len > 0);
+ /* Start computing t*[f,g]. */
+ fi = f->v[0];
+ gi = g->v[0];
+ cf = (int128_t)u * fi + (int128_t)v * gi;
+ cg = (int128_t)q * fi + (int128_t)r * gi;
+ /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
+ VERIFY_CHECK(((int64_t)cf & M62) == 0); cf >>= 62;
+ VERIFY_CHECK(((int64_t)cg & M62) == 0); cg >>= 62;
+ /* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting
+ * down by 62 bits). */
+ for (i = 1; i < len; ++i) {
+ fi = f->v[i];
+ gi = g->v[i];
+ cf += (int128_t)u * fi + (int128_t)v * gi;
+ cg += (int128_t)q * fi + (int128_t)r * gi;
+ f->v[i - 1] = (int64_t)cf & M62; cf >>= 62;
+ g->v[i - 1] = (int64_t)cg & M62; cg >>= 62;
+ }
+ /* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */
+ f->v[len - 1] = (int64_t)cf;
+ g->v[len - 1] = (int64_t)cg;
+}
+
+/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
+static void secp256k1_modinv64(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo) {
+ /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */
+ secp256k1_modinv64_signed62 d = {{0, 0, 0, 0, 0}};
+ secp256k1_modinv64_signed62 e = {{1, 0, 0, 0, 0}};
+ secp256k1_modinv64_signed62 f = modinfo->modulus;
+ secp256k1_modinv64_signed62 g = *x;
+ int i;
+ int64_t eta = -1;
+
+ /* Do 12 iterations of 62 divsteps each = 744 divsteps. 724 suffices for 256-bit inputs. */
+ for (i = 0; i < 12; ++i) {
+ /* Compute transition matrix and new eta after 62 divsteps. */
+ secp256k1_modinv64_trans2x2 t;
+ eta = secp256k1_modinv64_divsteps_62(eta, f.v[0], g.v[0], &t);
+ /* Update d,e using that transition matrix. */
+ secp256k1_modinv64_update_de_62(&d, &e, &t, modinfo);
+ /* Update f,g using that transition matrix. */
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, 5, &modinfo->modulus, -1) > 0); /* f > -modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, 5, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, 5, &modinfo->modulus, -1) > 0); /* g > -modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, 5, &modinfo->modulus, 1) < 0); /* g < modulus */
+#endif
+ secp256k1_modinv64_update_fg_62(&f, &g, &t);
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, 5, &modinfo->modulus, -1) > 0); /* f > -modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, 5, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, 5, &modinfo->modulus, -1) > 0); /* g > -modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, 5, &modinfo->modulus, 1) < 0); /* g < modulus */
+#endif
+ }
+
+ /* At this point sufficient iterations have been performed that g must have reached 0
+ * and (if g was not originally 0) f must now equal +/- GCD of the initial f, g
+ * values i.e. +/- 1, and d now contains +/- the modular inverse. */
+#ifdef VERIFY
+ /* g == 0 */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, 5, &SECP256K1_SIGNED62_ONE, 0) == 0);
+ /* |f| == 1, or (x == 0 and d == 0 and |f|=modulus) */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, 5, &SECP256K1_SIGNED62_ONE, -1) == 0 ||
+ secp256k1_modinv64_mul_cmp_62(&f, 5, &SECP256K1_SIGNED62_ONE, 1) == 0 ||
+ (secp256k1_modinv64_mul_cmp_62(x, 5, &SECP256K1_SIGNED62_ONE, 0) == 0 &&
+ secp256k1_modinv64_mul_cmp_62(&d, 5, &SECP256K1_SIGNED62_ONE, 0) == 0 &&
+ (secp256k1_modinv64_mul_cmp_62(&f, 5, &modinfo->modulus, 1) == 0 ||
+ secp256k1_modinv64_mul_cmp_62(&f, 5, &modinfo->modulus, -1) == 0)));
+#endif
+
+ /* Optionally negate d, normalize to [0,modulus), and return it. */
+ secp256k1_modinv64_normalize_62(&d, f.v[4], modinfo);
+ *x = d;
+}
+
+/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (variable time). */
+static void secp256k1_modinv64_var(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo) {
+ /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */
+ secp256k1_modinv64_signed62 d = {{0, 0, 0, 0, 0}};
+ secp256k1_modinv64_signed62 e = {{1, 0, 0, 0, 0}};
+ secp256k1_modinv64_signed62 f = modinfo->modulus;
+ secp256k1_modinv64_signed62 g = *x;
+#ifdef VERIFY
+ int i = 0;
+#endif
+ int j, len = 5;
+ int64_t eta = -1;
+ int64_t cond, fn, gn;
+
+ /* Do iterations of 62 divsteps each until g=0. */
+ while (1) {
+ /* Compute transition matrix and new eta after 62 divsteps. */
+ secp256k1_modinv64_trans2x2 t;
+ eta = secp256k1_modinv64_divsteps_62_var(eta, f.v[0], g.v[0], &t);
+ /* Update d,e using that transition matrix. */
+ secp256k1_modinv64_update_de_62(&d, &e, &t, modinfo);
+ /* Update f,g using that transition matrix. */
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
+#endif
+ secp256k1_modinv64_update_fg_62_var(len, &f, &g, &t);
+ /* If the bottom limb of g is zero, there is a chance that g=0. */
+ if (g.v[0] == 0) {
+ cond = 0;
+ /* Check if the other limbs are also 0. */
+ for (j = 1; j < len; ++j) {
+ cond |= g.v[j];
+ }
+ /* If so, we're done. */
+ if (cond == 0) break;
+ }
+
+ /* Determine if len>1 and limb (len-1) of both f and g is 0 or -1. */
+ fn = f.v[len - 1];
+ gn = g.v[len - 1];
+ cond = ((int64_t)len - 2) >> 63;
+ cond |= fn ^ (fn >> 63);
+ cond |= gn ^ (gn >> 63);
+ /* If so, reduce length, propagating the sign of f and g's top limb into the one below. */
+ if (cond == 0) {
+ f.v[len - 2] |= (uint64_t)fn << 62;
+ g.v[len - 2] |= (uint64_t)gn << 62;
+ --len;
+ }
+#ifdef VERIFY
+ VERIFY_CHECK(++i < 12); /* We should never need more than 12*62 = 744 divsteps */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
+#endif
+ }
+
+ /* At this point g is 0 and (if g was not originally 0) f must now equal +/- GCD of
+ * the initial f, g values i.e. +/- 1, and d now contains +/- the modular inverse. */
+#ifdef VERIFY
+ /* g == 0 */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &SECP256K1_SIGNED62_ONE, 0) == 0);
+ /* |f| == 1, or (x == 0 and d == 0 and |f|=modulus) */
+ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &SECP256K1_SIGNED62_ONE, -1) == 0 ||
+ secp256k1_modinv64_mul_cmp_62(&f, len, &SECP256K1_SIGNED62_ONE, 1) == 0 ||
+ (secp256k1_modinv64_mul_cmp_62(x, 5, &SECP256K1_SIGNED62_ONE, 0) == 0 &&
+ secp256k1_modinv64_mul_cmp_62(&d, 5, &SECP256K1_SIGNED62_ONE, 0) == 0 &&
+ (secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 1) == 0 ||
+ secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, -1) == 0)));
+#endif
+
+ /* Optionally negate d, normalize to [0,modulus), and return it. */
+ secp256k1_modinv64_normalize_62(&d, f.v[len - 1], modinfo);
+ *x = d;
+}
+
+#endif /* SECP256K1_MODINV64_IMPL_H */
diff --git a/src/num.h b/src/num.h
deleted file mode 100644
index 59a5cf2d71..0000000000
--- a/src/num.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/***********************************************************************
- * Copyright (c) 2013, 2014 Pieter Wuille *
- * Distributed under the MIT software license, see the accompanying *
- * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
- ***********************************************************************/
-
-#ifndef SECP256K1_NUM_H
-#define SECP256K1_NUM_H
-
-#ifndef USE_NUM_NONE
-
-#if defined HAVE_CONFIG_H
-#include "libsecp256k1-config.h"
-#endif
-
-#if defined(USE_NUM_GMP)
-#include "num_gmp.h"
-#else
-#error "Please select num implementation"
-#endif
-
-/** Copy a number. */
-static void secp256k1_num_copy(secp256k1_num *r, const secp256k1_num *a);
-
-/** Convert a number's absolute value to a binary big-endian string.
- * There must be enough place. */
-static void secp256k1_num_get_bin(unsigned char *r, unsigned int rlen, const secp256k1_num *a);
-
-/** Set a number to the value of a binary big-endian string. */
-static void secp256k1_num_set_bin(secp256k1_num *r, const unsigned char *a, unsigned int alen);
-
-/** Compute a modular inverse. The input must be less than the modulus. */
-static void secp256k1_num_mod_inverse(secp256k1_num *r, const secp256k1_num *a, const secp256k1_num *m);
-
-/** Compute the jacobi symbol (a|b). b must be positive and odd. */
-static int secp256k1_num_jacobi(const secp256k1_num *a, const secp256k1_num *b);
-
-/** Compare the absolute value of two numbers. */
-static int secp256k1_num_cmp(const secp256k1_num *a, const secp256k1_num *b);
-
-/** Test whether two number are equal (including sign). */
-static int secp256k1_num_eq(const secp256k1_num *a, const secp256k1_num *b);
-
-/** Add two (signed) numbers. */
-static void secp256k1_num_add(secp256k1_num *r, const secp256k1_num *a, const secp256k1_num *b);
-
-/** Subtract two (signed) numbers. */
-static void secp256k1_num_sub(secp256k1_num *r, const secp256k1_num *a, const secp256k1_num *b);
-
-/** Multiply two (signed) numbers. */
-static void secp256k1_num_mul(secp256k1_num *r, const secp256k1_num *a, const secp256k1_num *b);
-
-/** Replace a number by its remainder modulo m. M's sign is ignored. The result is a number between 0 and m-1,
- even if r was negative. */
-static void secp256k1_num_mod(secp256k1_num *r, const secp256k1_num *m);
-
-/** Right-shift the passed number by bits bits. */
-static void secp256k1_num_shift(secp256k1_num *r, int bits);
-
-/** Check whether a number is zero. */
-static int secp256k1_num_is_zero(const secp256k1_num *a);
-
-/** Check whether a number is one. */
-static int secp256k1_num_is_one(const secp256k1_num *a);
-
-/** Check whether a number is strictly negative. */
-static int secp256k1_num_is_neg(const secp256k1_num *a);
-
-/** Change a number's sign. */
-static void secp256k1_num_negate(secp256k1_num *r);
-
-#endif
-
-#endif /* SECP256K1_NUM_H */
diff --git a/src/num_gmp.h b/src/num_gmp.h
deleted file mode 100644
index cc6c51a5fa..0000000000
--- a/src/num_gmp.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/***********************************************************************
- * Copyright (c) 2013, 2014 Pieter Wuille *
- * Distributed under the MIT software license, see the accompanying *
- * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
- ***********************************************************************/
-
-#ifndef SECP256K1_NUM_REPR_H
-#define SECP256K1_NUM_REPR_H
-
-#include
-
-#define NUM_LIMBS ((256+GMP_NUMB_BITS-1)/GMP_NUMB_BITS)
-
-typedef struct {
- mp_limb_t data[2*NUM_LIMBS];
- int neg;
- int limbs;
-} secp256k1_num;
-
-#endif /* SECP256K1_NUM_REPR_H */
diff --git a/src/num_gmp_impl.h b/src/num_gmp_impl.h
deleted file mode 100644
index c07947dd9d..0000000000
--- a/src/num_gmp_impl.h
+++ /dev/null
@@ -1,288 +0,0 @@
-/***********************************************************************
- * Copyright (c) 2013, 2014 Pieter Wuille *
- * Distributed under the MIT software license, see the accompanying *
- * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
- ***********************************************************************/
-
-#ifndef SECP256K1_NUM_REPR_IMPL_H
-#define SECP256K1_NUM_REPR_IMPL_H
-
-#include
-#include
-#include
-
-#include "util.h"
-#include "num.h"
-
-#ifdef VERIFY
-static void secp256k1_num_sanity(const secp256k1_num *a) {
- VERIFY_CHECK(a->limbs == 1 || (a->limbs > 1 && a->data[a->limbs-1] != 0));
-}
-#else
-#define secp256k1_num_sanity(a) do { } while(0)
-#endif
-
-static void secp256k1_num_copy(secp256k1_num *r, const secp256k1_num *a) {
- *r = *a;
-}
-
-static void secp256k1_num_get_bin(unsigned char *r, unsigned int rlen, const secp256k1_num *a) {
- unsigned char tmp[65];
- int len = 0;
- int shift = 0;
- if (a->limbs>1 || a->data[0] != 0) {
- len = mpn_get_str(tmp, 256, (mp_limb_t*)a->data, a->limbs);
- }
- while (shift < len && tmp[shift] == 0) shift++;
- VERIFY_CHECK(len-shift <= (int)rlen);
- memset(r, 0, rlen - len + shift);
- if (len > shift) {
- memcpy(r + rlen - len + shift, tmp + shift, len - shift);
- }
- memset(tmp, 0, sizeof(tmp));
-}
-
-static void secp256k1_num_set_bin(secp256k1_num *r, const unsigned char *a, unsigned int alen) {
- int len;
- VERIFY_CHECK(alen > 0);
- VERIFY_CHECK(alen <= 64);
- len = mpn_set_str(r->data, a, alen, 256);
- if (len == 0) {
- r->data[0] = 0;
- len = 1;
- }
- VERIFY_CHECK(len <= NUM_LIMBS*2);
- r->limbs = len;
- r->neg = 0;
- while (r->limbs > 1 && r->data[r->limbs-1]==0) {
- r->limbs--;
- }
-}
-
-static void secp256k1_num_add_abs(secp256k1_num *r, const secp256k1_num *a, const secp256k1_num *b) {
- mp_limb_t c = mpn_add(r->data, a->data, a->limbs, b->data, b->limbs);
- r->limbs = a->limbs;
- if (c != 0) {
- VERIFY_CHECK(r->limbs < 2*NUM_LIMBS);
- r->data[r->limbs++] = c;
- }
-}
-
-static void secp256k1_num_sub_abs(secp256k1_num *r, const secp256k1_num *a, const secp256k1_num *b) {
- mp_limb_t c = mpn_sub(r->data, a->data, a->limbs, b->data, b->limbs);
- (void)c;
- VERIFY_CHECK(c == 0);
- r->limbs = a->limbs;
- while (r->limbs > 1 && r->data[r->limbs-1]==0) {
- r->limbs--;
- }
-}
-
-static void secp256k1_num_mod(secp256k1_num *r, const secp256k1_num *m) {
- secp256k1_num_sanity(r);
- secp256k1_num_sanity(m);
-
- if (r->limbs >= m->limbs) {
- mp_limb_t t[2*NUM_LIMBS];
- mpn_tdiv_qr(t, r->data, 0, r->data, r->limbs, m->data, m->limbs);
- memset(t, 0, sizeof(t));
- r->limbs = m->limbs;
- while (r->limbs > 1 && r->data[r->limbs-1]==0) {
- r->limbs--;
- }
- }
-
- if (r->neg && (r->limbs > 1 || r->data[0] != 0)) {
- secp256k1_num_sub_abs(r, m, r);
- r->neg = 0;
- }
-}
-
-static void secp256k1_num_mod_inverse(secp256k1_num *r, const secp256k1_num *a, const secp256k1_num *m) {
- int i;
- mp_limb_t g[NUM_LIMBS+1];
- mp_limb_t u[NUM_LIMBS+1];
- mp_limb_t v[NUM_LIMBS+1];
- mp_size_t sn;
- mp_size_t gn;
- secp256k1_num_sanity(a);
- secp256k1_num_sanity(m);
-
- /** mpn_gcdext computes: (G,S) = gcdext(U,V), where
- * * G = gcd(U,V)
- * * G = U*S + V*T
- * * U has equal or more limbs than V, and V has no padding
- * If we set U to be (a padded version of) a, and V = m:
- * G = a*S + m*T
- * G = a*S mod m
- * Assuming G=1:
- * S = 1/a mod m
- */
- VERIFY_CHECK(m->limbs <= NUM_LIMBS);
- VERIFY_CHECK(m->data[m->limbs-1] != 0);
- for (i = 0; i < m->limbs; i++) {
- u[i] = (i < a->limbs) ? a->data[i] : 0;
- v[i] = m->data[i];
- }
- sn = NUM_LIMBS+1;
- gn = mpn_gcdext(g, r->data, &sn, u, m->limbs, v, m->limbs);
- (void)gn;
- VERIFY_CHECK(gn == 1);
- VERIFY_CHECK(g[0] == 1);
- r->neg = a->neg ^ m->neg;
- if (sn < 0) {
- mpn_sub(r->data, m->data, m->limbs, r->data, -sn);
- r->limbs = m->limbs;
- while (r->limbs > 1 && r->data[r->limbs-1]==0) {
- r->limbs--;
- }
- } else {
- r->limbs = sn;
- }
- memset(g, 0, sizeof(g));
- memset(u, 0, sizeof(u));
- memset(v, 0, sizeof(v));
-}
-
-static int secp256k1_num_jacobi(const secp256k1_num *a, const secp256k1_num *b) {
- int ret;
- mpz_t ga, gb;
- secp256k1_num_sanity(a);
- secp256k1_num_sanity(b);
- VERIFY_CHECK(!b->neg && (b->limbs > 0) && (b->data[0] & 1));
-
- mpz_inits(ga, gb, NULL);
-
- mpz_import(gb, b->limbs, -1, sizeof(mp_limb_t), 0, 0, b->data);
- mpz_import(ga, a->limbs, -1, sizeof(mp_limb_t), 0, 0, a->data);
- if (a->neg) {
- mpz_neg(ga, ga);
- }
-
- ret = mpz_jacobi(ga, gb);
-
- mpz_clears(ga, gb, NULL);
-
- return ret;
-}
-
-static int secp256k1_num_is_one(const secp256k1_num *a) {
- return (a->limbs == 1 && a->data[0] == 1);
-}
-
-static int secp256k1_num_is_zero(const secp256k1_num *a) {
- return (a->limbs == 1 && a->data[0] == 0);
-}
-
-static int secp256k1_num_is_neg(const secp256k1_num *a) {
- return (a->limbs > 1 || a->data[0] != 0) && a->neg;
-}
-
-static int secp256k1_num_cmp(const secp256k1_num *a, const secp256k1_num *b) {
- if (a->limbs > b->limbs) {
- return 1;
- }
- if (a->limbs < b->limbs) {
- return -1;
- }
- return mpn_cmp(a->data, b->data, a->limbs);
-}
-
-static int secp256k1_num_eq(const secp256k1_num *a, const secp256k1_num *b) {
- if (a->limbs > b->limbs) {
- return 0;
- }
- if (a->limbs < b->limbs) {
- return 0;
- }
- if ((a->neg && !secp256k1_num_is_zero(a)) != (b->neg && !secp256k1_num_is_zero(b))) {
- return 0;
- }
- return mpn_cmp(a->data, b->data, a->limbs) == 0;
-}
-
-static void secp256k1_num_subadd(secp256k1_num *r, const secp256k1_num *a, const secp256k1_num *b, int bneg) {
- if (!(b->neg ^ bneg ^ a->neg)) { /* a and b have the same sign */
- r->neg = a->neg;
- if (a->limbs >= b->limbs) {
- secp256k1_num_add_abs(r, a, b);
- } else {
- secp256k1_num_add_abs(r, b, a);
- }
- } else {
- if (secp256k1_num_cmp(a, b) > 0) {
- r->neg = a->neg;
- secp256k1_num_sub_abs(r, a, b);
- } else {
- r->neg = b->neg ^ bneg;
- secp256k1_num_sub_abs(r, b, a);
- }
- }
-}
-
-static void secp256k1_num_add(secp256k1_num *r, const secp256k1_num *a, const secp256k1_num *b) {
- secp256k1_num_sanity(a);
- secp256k1_num_sanity(b);
- secp256k1_num_subadd(r, a, b, 0);
-}
-
-static void secp256k1_num_sub(secp256k1_num *r, const secp256k1_num *a, const secp256k1_num *b) {
- secp256k1_num_sanity(a);
- secp256k1_num_sanity(b);
- secp256k1_num_subadd(r, a, b, 1);
-}
-
-static void secp256k1_num_mul(secp256k1_num *r, const secp256k1_num *a, const secp256k1_num *b) {
- mp_limb_t tmp[2*NUM_LIMBS+1];
- secp256k1_num_sanity(a);
- secp256k1_num_sanity(b);
-
- VERIFY_CHECK(a->limbs + b->limbs <= 2*NUM_LIMBS+1);
- if ((a->limbs==1 && a->data[0]==0) || (b->limbs==1 && b->data[0]==0)) {
- r->limbs = 1;
- r->neg = 0;
- r->data[0] = 0;
- return;
- }
- if (a->limbs >= b->limbs) {
- mpn_mul(tmp, a->data, a->limbs, b->data, b->limbs);
- } else {
- mpn_mul(tmp, b->data, b->limbs, a->data, a->limbs);
- }
- r->limbs = a->limbs + b->limbs;
- if (r->limbs > 1 && tmp[r->limbs - 1]==0) {
- r->limbs--;
- }
- VERIFY_CHECK(r->limbs <= 2*NUM_LIMBS);
- mpn_copyi(r->data, tmp, r->limbs);
- r->neg = a->neg ^ b->neg;
- memset(tmp, 0, sizeof(tmp));
-}
-
-static void secp256k1_num_shift(secp256k1_num *r, int bits) {
- if (bits % GMP_NUMB_BITS) {
- /* Shift within limbs. */
- mpn_rshift(r->data, r->data, r->limbs, bits % GMP_NUMB_BITS);
- }
- if (bits >= GMP_NUMB_BITS) {
- int i;
- /* Shift full limbs. */
- for (i = 0; i < r->limbs; i++) {
- int index = i + (bits / GMP_NUMB_BITS);
- if (index < r->limbs && index < 2*NUM_LIMBS) {
- r->data[i] = r->data[index];
- } else {
- r->data[i] = 0;
- }
- }
- }
- while (r->limbs>1 && r->data[r->limbs-1]==0) {
- r->limbs--;
- }
-}
-
-static void secp256k1_num_negate(secp256k1_num *r) {
- r->neg ^= 1;
-}
-
-#endif /* SECP256K1_NUM_REPR_IMPL_H */
diff --git a/src/num_impl.h b/src/num_impl.h
deleted file mode 100644
index 880598efe7..0000000000
--- a/src/num_impl.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/***********************************************************************
- * Copyright (c) 2013, 2014 Pieter Wuille *
- * Distributed under the MIT software license, see the accompanying *
- * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
- ***********************************************************************/
-
-#ifndef SECP256K1_NUM_IMPL_H
-#define SECP256K1_NUM_IMPL_H
-
-#if defined HAVE_CONFIG_H
-#include "libsecp256k1-config.h"
-#endif
-
-#include "num.h"
-
-#if defined(USE_NUM_GMP)
-#include "num_gmp_impl.h"
-#elif defined(USE_NUM_NONE)
-/* Nothing. */
-#else
-#error "Please select num implementation"
-#endif
-
-#endif /* SECP256K1_NUM_IMPL_H */
diff --git a/src/scalar.h b/src/scalar.h
index 0b737f940c..aaaa3d8827 100644
--- a/src/scalar.h
+++ b/src/scalar.h
@@ -7,7 +7,6 @@
#ifndef SECP256K1_SCALAR_H
#define SECP256K1_SCALAR_H
-#include "num.h"
#include "util.h"
#if defined HAVE_CONFIG_H
@@ -63,9 +62,6 @@ static void secp256k1_scalar_mul(secp256k1_scalar *r, const secp256k1_scalar *a,
* the low bits that were shifted off */
static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n);
-/** Compute the square of a scalar (modulo the group order). */
-static void secp256k1_scalar_sqr(secp256k1_scalar *r, const secp256k1_scalar *a);
-
/** Compute the inverse of a scalar (modulo the group order). */
static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar *a);
@@ -91,14 +87,6 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar *a);
* Returns -1 if the number was negated, 1 otherwise */
static int secp256k1_scalar_cond_negate(secp256k1_scalar *a, int flag);
-#ifndef USE_NUM_NONE
-/** Convert a scalar to a number. */
-static void secp256k1_scalar_get_num(secp256k1_num *r, const secp256k1_scalar *a);
-
-/** Get the order of the group as a number. */
-static void secp256k1_scalar_order_get_num(secp256k1_num *r);
-#endif
-
/** Compare two scalars. */
static int secp256k1_scalar_eq(const secp256k1_scalar *a, const secp256k1_scalar *b);
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
index 3eaa0418c6..a1def26fca 100644
--- a/src/scalar_4x64_impl.h
+++ b/src/scalar_4x64_impl.h
@@ -7,6 +7,8 @@
#ifndef SECP256K1_SCALAR_REPR_IMPL_H
#define SECP256K1_SCALAR_REPR_IMPL_H
+#include "modinv64_impl.h"
+
/* Limbs of the secp256k1 order. */
#define SECP256K1_N_0 ((uint64_t)0xBFD25E8CD0364141ULL)
#define SECP256K1_N_1 ((uint64_t)0xBAAEDCE6AF48A03BULL)
@@ -212,28 +214,6 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
VERIFY_CHECK(c1 >= th); \
}
-/** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
-#define muladd2(a,b) { \
- uint64_t tl, th, th2, tl2; \
- { \
- uint128_t t = (uint128_t)a * b; \
- th = t >> 64; /* at most 0xFFFFFFFFFFFFFFFE */ \
- tl = t; \
- } \
- th2 = th + th; /* at most 0xFFFFFFFFFFFFFFFE (in case th was 0x7FFFFFFFFFFFFFFF) */ \
- c2 += (th2 < th); /* never overflows by contract (verified the next line) */ \
- VERIFY_CHECK((th2 >= th) || (c2 != 0)); \
- tl2 = tl + tl; /* at most 0xFFFFFFFFFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFFFFFFFFFF) */ \
- th2 += (tl2 < tl); /* at most 0xFFFFFFFFFFFFFFFF */ \
- c0 += tl2; /* overflow is handled on the next line */ \
- th2 += (c0 < tl2); /* second overflow is handled on the next line */ \
- c2 += (c0 < tl2) & (th2 == 0); /* never overflows by contract (verified the next line) */ \
- VERIFY_CHECK((c0 >= tl2) || (th2 != 0) || (c2 != 0)); \
- c1 += th2; /* overflow is handled on the next line */ \
- c2 += (c1 < th2); /* never overflows by contract (verified the next line) */ \
- VERIFY_CHECK((c1 >= th2) || (c2 != 0)); \
-}
-
/** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
#define sumadd(a) { \
unsigned int over; \
@@ -743,148 +723,10 @@ static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, c
#endif
}
-static void secp256k1_scalar_sqr_512(uint64_t l[8], const secp256k1_scalar *a) {
-#ifdef USE_ASM_X86_64
- __asm__ __volatile__(
- /* Preload */
- "movq 0(%%rdi), %%r11\n"
- "movq 8(%%rdi), %%r12\n"
- "movq 16(%%rdi), %%r13\n"
- "movq 24(%%rdi), %%r14\n"
- /* (rax,rdx) = a0 * a0 */
- "movq %%r11, %%rax\n"
- "mulq %%r11\n"
- /* Extract l0 */
- "movq %%rax, 0(%%rsi)\n"
- /* (r8,r9,r10) = (rdx,0) */
- "movq %%rdx, %%r8\n"
- "xorq %%r9, %%r9\n"
- "xorq %%r10, %%r10\n"
- /* (r8,r9,r10) += 2 * a0 * a1 */
- "movq %%r11, %%rax\n"
- "mulq %%r12\n"
- "addq %%rax, %%r8\n"
- "adcq %%rdx, %%r9\n"
- "adcq $0, %%r10\n"
- "addq %%rax, %%r8\n"
- "adcq %%rdx, %%r9\n"
- "adcq $0, %%r10\n"
- /* Extract l1 */
- "movq %%r8, 8(%%rsi)\n"
- "xorq %%r8, %%r8\n"
- /* (r9,r10,r8) += 2 * a0 * a2 */
- "movq %%r11, %%rax\n"
- "mulq %%r13\n"
- "addq %%rax, %%r9\n"
- "adcq %%rdx, %%r10\n"
- "adcq $0, %%r8\n"
- "addq %%rax, %%r9\n"
- "adcq %%rdx, %%r10\n"
- "adcq $0, %%r8\n"
- /* (r9,r10,r8) += a1 * a1 */
- "movq %%r12, %%rax\n"
- "mulq %%r12\n"
- "addq %%rax, %%r9\n"
- "adcq %%rdx, %%r10\n"
- "adcq $0, %%r8\n"
- /* Extract l2 */
- "movq %%r9, 16(%%rsi)\n"
- "xorq %%r9, %%r9\n"
- /* (r10,r8,r9) += 2 * a0 * a3 */
- "movq %%r11, %%rax\n"
- "mulq %%r14\n"
- "addq %%rax, %%r10\n"
- "adcq %%rdx, %%r8\n"
- "adcq $0, %%r9\n"
- "addq %%rax, %%r10\n"
- "adcq %%rdx, %%r8\n"
- "adcq $0, %%r9\n"
- /* (r10,r8,r9) += 2 * a1 * a2 */
- "movq %%r12, %%rax\n"
- "mulq %%r13\n"
- "addq %%rax, %%r10\n"
- "adcq %%rdx, %%r8\n"
- "adcq $0, %%r9\n"
- "addq %%rax, %%r10\n"
- "adcq %%rdx, %%r8\n"
- "adcq $0, %%r9\n"
- /* Extract l3 */
- "movq %%r10, 24(%%rsi)\n"
- "xorq %%r10, %%r10\n"
- /* (r8,r9,r10) += 2 * a1 * a3 */
- "movq %%r12, %%rax\n"
- "mulq %%r14\n"
- "addq %%rax, %%r8\n"
- "adcq %%rdx, %%r9\n"
- "adcq $0, %%r10\n"
- "addq %%rax, %%r8\n"
- "adcq %%rdx, %%r9\n"
- "adcq $0, %%r10\n"
- /* (r8,r9,r10) += a2 * a2 */
- "movq %%r13, %%rax\n"
- "mulq %%r13\n"
- "addq %%rax, %%r8\n"
- "adcq %%rdx, %%r9\n"
- "adcq $0, %%r10\n"
- /* Extract l4 */
- "movq %%r8, 32(%%rsi)\n"
- "xorq %%r8, %%r8\n"
- /* (r9,r10,r8) += 2 * a2 * a3 */
- "movq %%r13, %%rax\n"
- "mulq %%r14\n"
- "addq %%rax, %%r9\n"
- "adcq %%rdx, %%r10\n"
- "adcq $0, %%r8\n"
- "addq %%rax, %%r9\n"
- "adcq %%rdx, %%r10\n"
- "adcq $0, %%r8\n"
- /* Extract l5 */
- "movq %%r9, 40(%%rsi)\n"
- /* (r10,r8) += a3 * a3 */
- "movq %%r14, %%rax\n"
- "mulq %%r14\n"
- "addq %%rax, %%r10\n"
- "adcq %%rdx, %%r8\n"
- /* Extract l6 */
- "movq %%r10, 48(%%rsi)\n"
- /* Extract l7 */
- "movq %%r8, 56(%%rsi)\n"
- :
- : "S"(l), "D"(a->d)
- : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "cc", "memory");
-#else
- /* 160 bit accumulator. */
- uint64_t c0 = 0, c1 = 0;
- uint32_t c2 = 0;
-
- /* l[0..7] = a[0..3] * b[0..3]. */
- muladd_fast(a->d[0], a->d[0]);
- extract_fast(l[0]);
- muladd2(a->d[0], a->d[1]);
- extract(l[1]);
- muladd2(a->d[0], a->d[2]);
- muladd(a->d[1], a->d[1]);
- extract(l[2]);
- muladd2(a->d[0], a->d[3]);
- muladd2(a->d[1], a->d[2]);
- extract(l[3]);
- muladd2(a->d[1], a->d[3]);
- muladd(a->d[2], a->d[2]);
- extract(l[4]);
- muladd2(a->d[2], a->d[3]);
- extract(l[5]);
- muladd_fast(a->d[3], a->d[3]);
- extract_fast(l[6]);
- VERIFY_CHECK(c1 == 0);
- l[7] = c0;
-#endif
-}
-
#undef sumadd
#undef sumadd_fast
#undef muladd
#undef muladd_fast
-#undef muladd2
#undef extract
#undef extract_fast
@@ -906,12 +748,6 @@ static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n) {
return ret;
}
-static void secp256k1_scalar_sqr(secp256k1_scalar *r, const secp256k1_scalar *a) {
- uint64_t l[8];
- secp256k1_scalar_sqr_512(l, a);
- secp256k1_scalar_reduce_512(r, l);
-}
-
static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k) {
r1->d[0] = k->d[0];
r1->d[1] = k->d[1];
@@ -955,4 +791,78 @@ static SECP256K1_INLINE void secp256k1_scalar_cmov(secp256k1_scalar *r, const se
r->d[3] = (r->d[3] & mask0) | (a->d[3] & mask1);
}
+static void secp256k1_scalar_from_signed62(secp256k1_scalar *r, const secp256k1_modinv64_signed62 *a) {
+ const uint64_t a0 = a->v[0], a1 = a->v[1], a2 = a->v[2], a3 = a->v[3], a4 = a->v[4];
+
+ /* The output from secp256k1_modinv64{_var} should be normalized to range [0,modulus), and
+ * have limbs in [0,2^62). The modulus is < 2^256, so the top limb must be below 2^(256-62*4).
+ */
+ VERIFY_CHECK(a0 >> 62 == 0);
+ VERIFY_CHECK(a1 >> 62 == 0);
+ VERIFY_CHECK(a2 >> 62 == 0);
+ VERIFY_CHECK(a3 >> 62 == 0);
+ VERIFY_CHECK(a4 >> 8 == 0);
+
+ r->d[0] = a0 | a1 << 62;
+ r->d[1] = a1 >> 2 | a2 << 60;
+ r->d[2] = a2 >> 4 | a3 << 58;
+ r->d[3] = a3 >> 6 | a4 << 56;
+
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0);
+#endif
+}
+
+static void secp256k1_scalar_to_signed62(secp256k1_modinv64_signed62 *r, const secp256k1_scalar *a) {
+ const uint64_t M62 = UINT64_MAX >> 2;
+ const uint64_t a0 = a->d[0], a1 = a->d[1], a2 = a->d[2], a3 = a->d[3];
+
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_scalar_check_overflow(a) == 0);
+#endif
+
+ r->v[0] = a0 & M62;
+ r->v[1] = (a0 >> 62 | a1 << 2) & M62;
+ r->v[2] = (a1 >> 60 | a2 << 4) & M62;
+ r->v[3] = (a2 >> 58 | a3 << 6) & M62;
+ r->v[4] = a3 >> 56;
+}
+
+static const secp256k1_modinv64_modinfo secp256k1_const_modinfo_scalar = {
+ {{0x3FD25E8CD0364141LL, 0x2ABB739ABD2280EELL, -0x15LL, 0, 256}},
+ 0x34F20099AA774EC1LL
+};
+
+static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar *x) {
+ secp256k1_modinv64_signed62 s;
+#ifdef VERIFY
+ int zero_in = secp256k1_scalar_is_zero(x);
+#endif
+ secp256k1_scalar_to_signed62(&s, x);
+ secp256k1_modinv64(&s, &secp256k1_const_modinfo_scalar);
+ secp256k1_scalar_from_signed62(r, &s);
+
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_scalar_is_zero(r) == zero_in);
+#endif
+}
+
+static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_scalar *x) {
+ secp256k1_modinv64_signed62 s;
+#ifdef VERIFY
+ int zero_in = secp256k1_scalar_is_zero(x);
+#endif
+ secp256k1_scalar_to_signed62(&s, x);
+ secp256k1_modinv64_var(&s, &secp256k1_const_modinfo_scalar);
+ secp256k1_scalar_from_signed62(r, &s);
+
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_scalar_is_zero(r) == zero_in);
+#endif
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar *a) {
+ return !(a->d[0] & 1);
+}
+
#endif /* SECP256K1_SCALAR_REPR_IMPL_H */
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
index bf98e01d76..62c7ae7156 100644
--- a/src/scalar_8x32_impl.h
+++ b/src/scalar_8x32_impl.h
@@ -7,6 +7,8 @@
#ifndef SECP256K1_SCALAR_REPR_IMPL_H
#define SECP256K1_SCALAR_REPR_IMPL_H
+#include "modinv32_impl.h"
+
/* Limbs of the secp256k1 order. */
#define SECP256K1_N_0 ((uint32_t)0xD0364141UL)
#define SECP256K1_N_1 ((uint32_t)0xBFD25E8CUL)
@@ -291,28 +293,6 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
VERIFY_CHECK(c1 >= th); \
}
-/** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
-#define muladd2(a,b) { \
- uint32_t tl, th, th2, tl2; \
- { \
- uint64_t t = (uint64_t)a * b; \
- th = t >> 32; /* at most 0xFFFFFFFE */ \
- tl = t; \
- } \
- th2 = th + th; /* at most 0xFFFFFFFE (in case th was 0x7FFFFFFF) */ \
- c2 += (th2 < th); /* never overflows by contract (verified the next line) */ \
- VERIFY_CHECK((th2 >= th) || (c2 != 0)); \
- tl2 = tl + tl; /* at most 0xFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFF) */ \
- th2 += (tl2 < tl); /* at most 0xFFFFFFFF */ \
- c0 += tl2; /* overflow is handled on the next line */ \
- th2 += (c0 < tl2); /* second overflow is handled on the next line */ \
- c2 += (c0 < tl2) & (th2 == 0); /* never overflows by contract (verified the next line) */ \
- VERIFY_CHECK((c0 >= tl2) || (th2 != 0) || (c2 != 0)); \
- c1 += th2; /* overflow is handled on the next line */ \
- c2 += (c1 < th2); /* never overflows by contract (verified the next line) */ \
- VERIFY_CHECK((c1 >= th2) || (c2 != 0)); \
-}
-
/** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
#define sumadd(a) { \
unsigned int over; \
@@ -576,71 +556,10 @@ static void secp256k1_scalar_mul_512(uint32_t *l, const secp256k1_scalar *a, con
l[15] = c0;
}
-static void secp256k1_scalar_sqr_512(uint32_t *l, const secp256k1_scalar *a) {
- /* 96 bit accumulator. */
- uint32_t c0 = 0, c1 = 0, c2 = 0;
-
- /* l[0..15] = a[0..7]^2. */
- muladd_fast(a->d[0], a->d[0]);
- extract_fast(l[0]);
- muladd2(a->d[0], a->d[1]);
- extract(l[1]);
- muladd2(a->d[0], a->d[2]);
- muladd(a->d[1], a->d[1]);
- extract(l[2]);
- muladd2(a->d[0], a->d[3]);
- muladd2(a->d[1], a->d[2]);
- extract(l[3]);
- muladd2(a->d[0], a->d[4]);
- muladd2(a->d[1], a->d[3]);
- muladd(a->d[2], a->d[2]);
- extract(l[4]);
- muladd2(a->d[0], a->d[5]);
- muladd2(a->d[1], a->d[4]);
- muladd2(a->d[2], a->d[3]);
- extract(l[5]);
- muladd2(a->d[0], a->d[6]);
- muladd2(a->d[1], a->d[5]);
- muladd2(a->d[2], a->d[4]);
- muladd(a->d[3], a->d[3]);
- extract(l[6]);
- muladd2(a->d[0], a->d[7]);
- muladd2(a->d[1], a->d[6]);
- muladd2(a->d[2], a->d[5]);
- muladd2(a->d[3], a->d[4]);
- extract(l[7]);
- muladd2(a->d[1], a->d[7]);
- muladd2(a->d[2], a->d[6]);
- muladd2(a->d[3], a->d[5]);
- muladd(a->d[4], a->d[4]);
- extract(l[8]);
- muladd2(a->d[2], a->d[7]);
- muladd2(a->d[3], a->d[6]);
- muladd2(a->d[4], a->d[5]);
- extract(l[9]);
- muladd2(a->d[3], a->d[7]);
- muladd2(a->d[4], a->d[6]);
- muladd(a->d[5], a->d[5]);
- extract(l[10]);
- muladd2(a->d[4], a->d[7]);
- muladd2(a->d[5], a->d[6]);
- extract(l[11]);
- muladd2(a->d[5], a->d[7]);
- muladd(a->d[6], a->d[6]);
- extract(l[12]);
- muladd2(a->d[6], a->d[7]);
- extract(l[13]);
- muladd_fast(a->d[7], a->d[7]);
- extract_fast(l[14]);
- VERIFY_CHECK(c1 == 0);
- l[15] = c0;
-}
-
#undef sumadd
#undef sumadd_fast
#undef muladd
#undef muladd_fast
-#undef muladd2
#undef extract
#undef extract_fast
@@ -666,12 +585,6 @@ static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n) {
return ret;
}
-static void secp256k1_scalar_sqr(secp256k1_scalar *r, const secp256k1_scalar *a) {
- uint32_t l[16];
- secp256k1_scalar_sqr_512(l, a);
- secp256k1_scalar_reduce_512(r, l);
-}
-
static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k) {
r1->d[0] = k->d[0];
r1->d[1] = k->d[1];
@@ -731,4 +644,92 @@ static SECP256K1_INLINE void secp256k1_scalar_cmov(secp256k1_scalar *r, const se
r->d[7] = (r->d[7] & mask0) | (a->d[7] & mask1);
}
+static void secp256k1_scalar_from_signed30(secp256k1_scalar *r, const secp256k1_modinv32_signed30 *a) {
+ const uint32_t a0 = a->v[0], a1 = a->v[1], a2 = a->v[2], a3 = a->v[3], a4 = a->v[4],
+ a5 = a->v[5], a6 = a->v[6], a7 = a->v[7], a8 = a->v[8];
+
+ /* The output from secp256k1_modinv32{_var} should be normalized to range [0,modulus), and
+ * have limbs in [0,2^30). The modulus is < 2^256, so the top limb must be below 2^(256-30*8).
+ */
+ VERIFY_CHECK(a0 >> 30 == 0);
+ VERIFY_CHECK(a1 >> 30 == 0);
+ VERIFY_CHECK(a2 >> 30 == 0);
+ VERIFY_CHECK(a3 >> 30 == 0);
+ VERIFY_CHECK(a4 >> 30 == 0);
+ VERIFY_CHECK(a5 >> 30 == 0);
+ VERIFY_CHECK(a6 >> 30 == 0);
+ VERIFY_CHECK(a7 >> 30 == 0);
+ VERIFY_CHECK(a8 >> 16 == 0);
+
+ r->d[0] = a0 | a1 << 30;
+ r->d[1] = a1 >> 2 | a2 << 28;
+ r->d[2] = a2 >> 4 | a3 << 26;
+ r->d[3] = a3 >> 6 | a4 << 24;
+ r->d[4] = a4 >> 8 | a5 << 22;
+ r->d[5] = a5 >> 10 | a6 << 20;
+ r->d[6] = a6 >> 12 | a7 << 18;
+ r->d[7] = a7 >> 14 | a8 << 16;
+
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0);
+#endif
+}
+
+static void secp256k1_scalar_to_signed30(secp256k1_modinv32_signed30 *r, const secp256k1_scalar *a) {
+ const uint32_t M30 = UINT32_MAX >> 2;
+ const uint32_t a0 = a->d[0], a1 = a->d[1], a2 = a->d[2], a3 = a->d[3],
+ a4 = a->d[4], a5 = a->d[5], a6 = a->d[6], a7 = a->d[7];
+
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_scalar_check_overflow(a) == 0);
+#endif
+
+ r->v[0] = a0 & M30;
+ r->v[1] = (a0 >> 30 | a1 << 2) & M30;
+ r->v[2] = (a1 >> 28 | a2 << 4) & M30;
+ r->v[3] = (a2 >> 26 | a3 << 6) & M30;
+ r->v[4] = (a3 >> 24 | a4 << 8) & M30;
+ r->v[5] = (a4 >> 22 | a5 << 10) & M30;
+ r->v[6] = (a5 >> 20 | a6 << 12) & M30;
+ r->v[7] = (a6 >> 18 | a7 << 14) & M30;
+ r->v[8] = a7 >> 16;
+}
+
+static const secp256k1_modinv32_modinfo secp256k1_const_modinfo_scalar = {
+ {{0x10364141L, 0x3F497A33L, 0x348A03BBL, 0x2BB739ABL, -0x146L, 0, 0, 0, 65536}},
+ 0x2A774EC1L
+};
+
+static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar *x) {
+ secp256k1_modinv32_signed30 s;
+#ifdef VERIFY
+ int zero_in = secp256k1_scalar_is_zero(x);
+#endif
+ secp256k1_scalar_to_signed30(&s, x);
+ secp256k1_modinv32(&s, &secp256k1_const_modinfo_scalar);
+ secp256k1_scalar_from_signed30(r, &s);
+
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_scalar_is_zero(r) == zero_in);
+#endif
+}
+
+static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_scalar *x) {
+ secp256k1_modinv32_signed30 s;
+#ifdef VERIFY
+ int zero_in = secp256k1_scalar_is_zero(x);
+#endif
+ secp256k1_scalar_to_signed30(&s, x);
+ secp256k1_modinv32_var(&s, &secp256k1_const_modinfo_scalar);
+ secp256k1_scalar_from_signed30(r, &s);
+
+#ifdef VERIFY
+ VERIFY_CHECK(secp256k1_scalar_is_zero(r) == zero_in);
+#endif
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar *a) {
+ return !(a->d[0] & 1);
+}
+
#endif /* SECP256K1_SCALAR_REPR_IMPL_H */
diff --git a/src/scalar_impl.h b/src/scalar_impl.h
index 61c1fbd58e..e124474773 100644
--- a/src/scalar_impl.h
+++ b/src/scalar_impl.h
@@ -31,231 +31,12 @@
static const secp256k1_scalar secp256k1_scalar_one = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 1);
static const secp256k1_scalar secp256k1_scalar_zero = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0);
-#ifndef USE_NUM_NONE
-static void secp256k1_scalar_get_num(secp256k1_num *r, const secp256k1_scalar *a) {
- unsigned char c[32];
- secp256k1_scalar_get_b32(c, a);
- secp256k1_num_set_bin(r, c, 32);
-}
-
-/** secp256k1 curve order, see secp256k1_ecdsa_const_order_as_fe in ecdsa_impl.h */
-static void secp256k1_scalar_order_get_num(secp256k1_num *r) {
-#if defined(EXHAUSTIVE_TEST_ORDER)
- static const unsigned char order[32] = {
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,EXHAUSTIVE_TEST_ORDER
- };
-#else
- static const unsigned char order[32] = {
- 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
- 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,
- 0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,
- 0xBF,0xD2,0x5E,0x8C,0xD0,0x36,0x41,0x41
- };
-#endif
- secp256k1_num_set_bin(r, order, 32);
-}
-#endif
-
static int secp256k1_scalar_set_b32_seckey(secp256k1_scalar *r, const unsigned char *bin) {
int overflow;
secp256k1_scalar_set_b32(r, bin, &overflow);
return (!overflow) & (!secp256k1_scalar_is_zero(r));
}
-static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar *x) {
-#if defined(EXHAUSTIVE_TEST_ORDER)
- int i;
- *r = 0;
- for (i = 0; i < EXHAUSTIVE_TEST_ORDER; i++)
- if ((i * *x) % EXHAUSTIVE_TEST_ORDER == 1)
- *r = i;
- /* If this VERIFY_CHECK triggers we were given a noninvertible scalar (and thus
- * have a composite group order; fix it in exhaustive_tests.c). */
- VERIFY_CHECK(*r != 0);
-}
-#else
- secp256k1_scalar *t;
- int i;
- /* First compute xN as x ^ (2^N - 1) for some values of N,
- * and uM as x ^ M for some values of M. */
- secp256k1_scalar x2, x3, x6, x8, x14, x28, x56, x112, x126;
- secp256k1_scalar u2, u5, u9, u11, u13;
-
- secp256k1_scalar_sqr(&u2, x);
- secp256k1_scalar_mul(&x2, &u2, x);
- secp256k1_scalar_mul(&u5, &u2, &x2);
- secp256k1_scalar_mul(&x3, &u5, &u2);
- secp256k1_scalar_mul(&u9, &x3, &u2);
- secp256k1_scalar_mul(&u11, &u9, &u2);
- secp256k1_scalar_mul(&u13, &u11, &u2);
-
- secp256k1_scalar_sqr(&x6, &u13);
- secp256k1_scalar_sqr(&x6, &x6);
- secp256k1_scalar_mul(&x6, &x6, &u11);
-
- secp256k1_scalar_sqr(&x8, &x6);
- secp256k1_scalar_sqr(&x8, &x8);
- secp256k1_scalar_mul(&x8, &x8, &x2);
-
- secp256k1_scalar_sqr(&x14, &x8);
- for (i = 0; i < 5; i++) {
- secp256k1_scalar_sqr(&x14, &x14);
- }
- secp256k1_scalar_mul(&x14, &x14, &x6);
-
- secp256k1_scalar_sqr(&x28, &x14);
- for (i = 0; i < 13; i++) {
- secp256k1_scalar_sqr(&x28, &x28);
- }
- secp256k1_scalar_mul(&x28, &x28, &x14);
-
- secp256k1_scalar_sqr(&x56, &x28);
- for (i = 0; i < 27; i++) {
- secp256k1_scalar_sqr(&x56, &x56);
- }
- secp256k1_scalar_mul(&x56, &x56, &x28);
-
- secp256k1_scalar_sqr(&x112, &x56);
- for (i = 0; i < 55; i++) {
- secp256k1_scalar_sqr(&x112, &x112);
- }
- secp256k1_scalar_mul(&x112, &x112, &x56);
-
- secp256k1_scalar_sqr(&x126, &x112);
- for (i = 0; i < 13; i++) {
- secp256k1_scalar_sqr(&x126, &x126);
- }
- secp256k1_scalar_mul(&x126, &x126, &x14);
-
- /* Then accumulate the final result (t starts at x126). */
- t = &x126;
- for (i = 0; i < 3; i++) {
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &u5); /* 101 */
- for (i = 0; i < 4; i++) { /* 0 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &x3); /* 111 */
- for (i = 0; i < 4; i++) { /* 0 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &u5); /* 101 */
- for (i = 0; i < 5; i++) { /* 0 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &u11); /* 1011 */
- for (i = 0; i < 4; i++) {
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &u11); /* 1011 */
- for (i = 0; i < 4; i++) { /* 0 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &x3); /* 111 */
- for (i = 0; i < 5; i++) { /* 00 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &x3); /* 111 */
- for (i = 0; i < 6; i++) { /* 00 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &u13); /* 1101 */
- for (i = 0; i < 4; i++) { /* 0 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &u5); /* 101 */
- for (i = 0; i < 3; i++) {
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &x3); /* 111 */
- for (i = 0; i < 5; i++) { /* 0 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &u9); /* 1001 */
- for (i = 0; i < 6; i++) { /* 000 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &u5); /* 101 */
- for (i = 0; i < 10; i++) { /* 0000000 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &x3); /* 111 */
- for (i = 0; i < 4; i++) { /* 0 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &x3); /* 111 */
- for (i = 0; i < 9; i++) { /* 0 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &x8); /* 11111111 */
- for (i = 0; i < 5; i++) { /* 0 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &u9); /* 1001 */
- for (i = 0; i < 6; i++) { /* 00 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &u11); /* 1011 */
- for (i = 0; i < 4; i++) {
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &u13); /* 1101 */
- for (i = 0; i < 5; i++) {
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &x2); /* 11 */
- for (i = 0; i < 6; i++) { /* 00 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &u13); /* 1101 */
- for (i = 0; i < 10; i++) { /* 000000 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &u13); /* 1101 */
- for (i = 0; i < 4; i++) {
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, &u9); /* 1001 */
- for (i = 0; i < 6; i++) { /* 00000 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, x); /* 1 */
- for (i = 0; i < 8; i++) { /* 00 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(r, t, &x6); /* 111111 */
-}
-
-SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar *a) {
- return !(a->d[0] & 1);
-}
-#endif
-
-static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_scalar *x) {
-#if defined(USE_SCALAR_INV_BUILTIN)
- secp256k1_scalar_inverse(r, x);
-#elif defined(USE_SCALAR_INV_NUM)
- unsigned char b[32];
- secp256k1_num n, m;
- secp256k1_scalar t = *x;
- secp256k1_scalar_get_b32(b, &t);
- secp256k1_num_set_bin(&n, b, 32);
- secp256k1_scalar_order_get_num(&m);
- secp256k1_num_mod_inverse(&n, &n, &m);
- secp256k1_num_get_bin(b, 32, &n);
- secp256k1_scalar_set_b32(r, b, NULL);
- /* Verify that the inverse was computed correctly, without GMP code. */
- secp256k1_scalar_mul(&t, &t, r);
- CHECK(secp256k1_scalar_is_one(&t));
-#else
-#error "Please select scalar inverse implementation"
-#endif
-}
-
/* These parameters are generated using sage/gen_exhaustive_groups.sage. */
#if defined(EXHAUSTIVE_TEST_ORDER)
# if EXHAUSTIVE_TEST_ORDER == 13
diff --git a/src/scalar_low_impl.h b/src/scalar_low_impl.h
index 98ffd1536e..7176f0b2ca 100644
--- a/src/scalar_low_impl.h
+++ b/src/scalar_low_impl.h
@@ -104,10 +104,6 @@ static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n) {
return ret;
}
-static void secp256k1_scalar_sqr(secp256k1_scalar *r, const secp256k1_scalar *a) {
- *r = (*a * *a) % EXHAUSTIVE_TEST_ORDER;
-}
-
static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *a) {
*r1 = *a;
*r2 = 0;
@@ -125,4 +121,19 @@ static SECP256K1_INLINE void secp256k1_scalar_cmov(secp256k1_scalar *r, const se
*r = (*r & mask0) | (*a & mask1);
}
+static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar *x) {
+ int i;
+ *r = 0;
+ for (i = 0; i < EXHAUSTIVE_TEST_ORDER; i++)
+ if ((i * *x) % EXHAUSTIVE_TEST_ORDER == 1)
+ *r = i;
+ /* If this VERIFY_CHECK triggers we were given a noninvertible scalar (and thus
+ * have a composite group order; fix it in exhaustive_tests.c). */
+ VERIFY_CHECK(*r != 0);
+}
+
+static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_scalar *x) {
+ secp256k1_scalar_inverse(r, x);
+}
+
#endif /* SECP256K1_SCALAR_REPR_IMPL_H */
diff --git a/src/secp256k1.c b/src/secp256k1.c
index 4f56c27c8a..aef3f99ac3 100644
--- a/src/secp256k1.c
+++ b/src/secp256k1.c
@@ -9,7 +9,6 @@
#include "assumptions.h"
#include "util.h"
-#include "num_impl.h"
#include "field_impl.h"
#include "scalar_impl.h"
#include "group_impl.h"
diff --git a/src/tests.c b/src/tests.c
index c2d5e28924..ba645bbe81 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -18,6 +18,7 @@
#include "include/secp256k1.h"
#include "include/secp256k1_preallocated.h"
#include "testrand_impl.h"
+#include "util.h"
#ifdef ENABLE_OPENSSL_TESTS
#include "openssl/bn.h"
@@ -32,6 +33,11 @@ void ECDSA_SIG_get0(const ECDSA_SIG *sig, const BIGNUM **pr, const BIGNUM **ps)
#include "contrib/lax_der_parsing.c"
#include "contrib/lax_der_privatekey_parsing.c"
+#include "modinv32_impl.h"
+#ifdef SECP256K1_WIDEMUL_INT128
+#include "modinv64_impl.h"
+#endif
+
static int count = 64;
static secp256k1_context *ctx = NULL;
@@ -416,6 +422,25 @@ void run_scratch_tests(void) {
secp256k1_context_destroy(none);
}
+void run_ctz_tests(void) {
+ static const uint32_t b32[] = {1, 0xffffffff, 0x5e56968f, 0xe0d63129};
+ static const uint64_t b64[] = {1, 0xffffffffffffffff, 0xbcd02462139b3fc3, 0x98b5f80c769693ef};
+ int shift;
+ unsigned i;
+ for (i = 0; i < sizeof(b32) / sizeof(b32[0]); ++i) {
+ for (shift = 0; shift < 32; ++shift) {
+ CHECK(secp256k1_ctz32_var_debruijn(b32[i] << shift) == shift);
+ CHECK(secp256k1_ctz32_var(b32[i] << shift) == shift);
+ }
+ }
+ for (i = 0; i < sizeof(b64) / sizeof(b64[0]); ++i) {
+ for (shift = 0; shift < 64; ++shift) {
+ CHECK(secp256k1_ctz64_var_debruijn(b64[i] << shift) == shift);
+ CHECK(secp256k1_ctz64_var(b64[i] << shift) == shift);
+ }
+ }
+}
+
/***** HASH TESTS *****/
void run_sha256_tests(void) {
@@ -611,202 +636,448 @@ void run_rand_int(void) {
}
}
-/***** NUM TESTS *****/
+/***** MODINV TESTS *****/
+
+/* Compute the modular inverse of (odd) x mod 2^64. */
+uint64_t modinv2p64(uint64_t x) {
+ /* If w = 1/x mod 2^(2^L), then w*(2 - w*x) = 1/x mod 2^(2^(L+1)). See
+ * Hacker's Delight second edition, Henry S. Warren, Jr., pages 245-247 for
+ * why. Start with L=0, for which it is true for every odd x that
+ * 1/x=1 mod 2. Iterating 6 times gives us 1/x mod 2^64. */
+ int l;
+ uint64_t w = 1;
+ CHECK(x & 1);
+ for (l = 0; l < 6; ++l) w *= (2 - w*x);
+ return w;
+}
-#ifndef USE_NUM_NONE
-void random_num_negate(secp256k1_num *num) {
- if (secp256k1_testrand_bits(1)) {
- secp256k1_num_negate(num);
+/* compute out = (a*b) mod m; if b=NULL, treat b=1.
+ *
+ * Out is a 512-bit number (represented as 32 uint16_t's in LE order). The other
+ * arguments are 256-bit numbers (represented as 16 uint16_t's in LE order). */
+void mulmod256(uint16_t* out, const uint16_t* a, const uint16_t* b, const uint16_t* m) {
+ uint16_t mul[32];
+ uint64_t c = 0;
+ int i, j;
+ int m_bitlen = 0;
+ int mul_bitlen = 0;
+
+ if (b != NULL) {
+ /* Compute the product of a and b, and put it in mul. */
+ for (i = 0; i < 32; ++i) {
+ for (j = i <= 15 ? 0 : i - 15; j <= i && j <= 15; j++) {
+ c += (uint64_t)a[j] * b[i - j];
+ }
+ mul[i] = c & 0xFFFF;
+ c >>= 16;
+ }
+ CHECK(c == 0);
+
+ /* compute the highest set bit in mul */
+ for (i = 511; i >= 0; --i) {
+ if ((mul[i >> 4] >> (i & 15)) & 1) {
+ mul_bitlen = i;
+ break;
+ }
+ }
+ } else {
+ /* if b==NULL, set mul=a. */
+ memcpy(mul, a, 32);
+ memset(mul + 16, 0, 32);
+ /* compute the highest set bit in mul */
+ for (i = 255; i >= 0; --i) {
+ if ((mul[i >> 4] >> (i & 15)) & 1) {
+ mul_bitlen = i;
+ break;
+ }
+ }
}
-}
-void random_num_order_test(secp256k1_num *num) {
- secp256k1_scalar sc;
- random_scalar_order_test(&sc);
- secp256k1_scalar_get_num(num, &sc);
+ /* Compute the highest set bit in m. */
+ for (i = 255; i >= 0; --i) {
+ if ((m[i >> 4] >> (i & 15)) & 1) {
+ m_bitlen = i;
+ break;
+ }
+ }
+
+ /* Try do mul -= m<= 0; --i) {
+ uint16_t mul2[32];
+ int64_t cs;
+
+ /* Compute mul2 = mul - m<= 0 && bitpos < 256) {
+ sub |= ((m[bitpos >> 4] >> (bitpos & 15)) & 1) << p;
+ }
+ }
+ /* Add mul[j]-sub to accumulator, and shift bottom 16 bits out to mul2[j]. */
+ cs += mul[j];
+ cs -= sub;
+ mul2[j] = (cs & 0xFFFF);
+ cs >>= 16;
+ }
+ /* If remainder of subtraction is 0, set mul = mul2. */
+ if (cs == 0) {
+ memcpy(mul, mul2, sizeof(mul));
+ }
+ }
+ /* Sanity check: test that all limbs higher than m's highest are zero */
+ for (i = (m_bitlen >> 4) + 1; i < 32; ++i) {
+ CHECK(mul[i] == 0);
+ }
+ memcpy(out, mul, 32);
}
-void random_num_order(secp256k1_num *num) {
- secp256k1_scalar sc;
- random_scalar_order(&sc);
- secp256k1_scalar_get_num(num, &sc);
+/* Convert a 256-bit number represented as 16 uint16_t's to signed30 notation. */
+void uint16_to_signed30(secp256k1_modinv32_signed30* out, const uint16_t* in) {
+ int i;
+ memset(out->v, 0, sizeof(out->v));
+ for (i = 0; i < 256; ++i) {
+ out->v[i / 30] |= (int32_t)(((in[i >> 4]) >> (i & 15)) & 1) << (i % 30);
+ }
}
-void test_num_negate(void) {
- secp256k1_num n1;
- secp256k1_num n2;
- random_num_order_test(&n1); /* n1 = R */
- random_num_negate(&n1);
- secp256k1_num_copy(&n2, &n1); /* n2 = R */
- secp256k1_num_sub(&n1, &n2, &n1); /* n1 = n2-n1 = 0 */
- CHECK(secp256k1_num_is_zero(&n1));
- secp256k1_num_copy(&n1, &n2); /* n1 = R */
- secp256k1_num_negate(&n1); /* n1 = -R */
- CHECK(!secp256k1_num_is_zero(&n1));
- secp256k1_num_add(&n1, &n2, &n1); /* n1 = n2+n1 = 0 */
- CHECK(secp256k1_num_is_zero(&n1));
- secp256k1_num_copy(&n1, &n2); /* n1 = R */
- secp256k1_num_negate(&n1); /* n1 = -R */
- CHECK(secp256k1_num_is_neg(&n1) != secp256k1_num_is_neg(&n2));
- secp256k1_num_negate(&n1); /* n1 = R */
- CHECK(secp256k1_num_eq(&n1, &n2));
+/* Convert a 256-bit number in signed30 notation to a representation as 16 uint16_t's. */
+void signed30_to_uint16(uint16_t* out, const secp256k1_modinv32_signed30* in) {
+ int i;
+ memset(out, 0, 32);
+ for (i = 0; i < 256; ++i) {
+ out[i >> 4] |= (((in->v[i / 30]) >> (i % 30)) & 1) << (i & 15);
+ }
}
-void test_num_add_sub(void) {
+/* Randomly mutate the sign of limbs in signed30 representation, without changing the value. */
+void mutate_sign_signed30(secp256k1_modinv32_signed30* x) {
int i;
- secp256k1_scalar s;
- secp256k1_num n1;
- secp256k1_num n2;
- secp256k1_num n1p2, n2p1, n1m2, n2m1;
- random_num_order_test(&n1); /* n1 = R1 */
- if (secp256k1_testrand_bits(1)) {
- random_num_negate(&n1);
+ for (i = 0; i < 16; ++i) {
+ int pos = secp256k1_testrand_int(8);
+ if (x->v[pos] > 0 && x->v[pos + 1] <= 0x3fffffff) {
+ x->v[pos] -= 0x40000000;
+ x->v[pos + 1] += 1;
+ } else if (x->v[pos] < 0 && x->v[pos + 1] >= 0x3fffffff) {
+ x->v[pos] += 0x40000000;
+ x->v[pos + 1] -= 1;
+ }
}
- random_num_order_test(&n2); /* n2 = R2 */
- if (secp256k1_testrand_bits(1)) {
- random_num_negate(&n2);
- }
- secp256k1_num_add(&n1p2, &n1, &n2); /* n1p2 = R1 + R2 */
- secp256k1_num_add(&n2p1, &n2, &n1); /* n2p1 = R2 + R1 */
- secp256k1_num_sub(&n1m2, &n1, &n2); /* n1m2 = R1 - R2 */
- secp256k1_num_sub(&n2m1, &n2, &n1); /* n2m1 = R2 - R1 */
- CHECK(secp256k1_num_eq(&n1p2, &n2p1));
- CHECK(!secp256k1_num_eq(&n1p2, &n1m2));
- secp256k1_num_negate(&n2m1); /* n2m1 = -R2 + R1 */
- CHECK(secp256k1_num_eq(&n2m1, &n1m2));
- CHECK(!secp256k1_num_eq(&n2m1, &n1));
- secp256k1_num_add(&n2m1, &n2m1, &n2); /* n2m1 = -R2 + R1 + R2 = R1 */
- CHECK(secp256k1_num_eq(&n2m1, &n1));
- CHECK(!secp256k1_num_eq(&n2p1, &n1));
- secp256k1_num_sub(&n2p1, &n2p1, &n2); /* n2p1 = R2 + R1 - R2 = R1 */
- CHECK(secp256k1_num_eq(&n2p1, &n1));
-
- /* check is_one */
- secp256k1_scalar_set_int(&s, 1);
- secp256k1_scalar_get_num(&n1, &s);
- CHECK(secp256k1_num_is_one(&n1));
- /* check that 2^n + 1 is never 1 */
- secp256k1_scalar_get_num(&n2, &s);
- for (i = 0; i < 250; ++i) {
- secp256k1_num_add(&n1, &n1, &n1); /* n1 *= 2 */
- secp256k1_num_add(&n1p2, &n1, &n2); /* n1p2 = n1 + 1 */
- CHECK(!secp256k1_num_is_one(&n1p2));
+}
+
+/* Test secp256k1_modinv32{_var}, using inputs in 16-bit limb format, and returning inverse. */
+void test_modinv32_uint16(uint16_t* out, const uint16_t* in, const uint16_t* mod) {
+ uint16_t tmp[16];
+ secp256k1_modinv32_signed30 x;
+ secp256k1_modinv32_modinfo m;
+ int i, vartime, nonzero;
+
+ uint16_to_signed30(&x, in);
+ nonzero = (x.v[0] | x.v[1] | x.v[2] | x.v[3] | x.v[4] | x.v[5] | x.v[6] | x.v[7] | x.v[8]) != 0;
+ uint16_to_signed30(&m.modulus, mod);
+ mutate_sign_signed30(&m.modulus);
+
+ /* compute 1/modulus mod 2^30 */
+ m.modulus_inv30 = modinv2p64(m.modulus.v[0]) & 0x3fffffff;
+ CHECK(((m.modulus_inv30 * m.modulus.v[0]) & 0x3fffffff) == 1);
+
+ for (vartime = 0; vartime < 2; ++vartime) {
+ /* compute inverse */
+ (vartime ? secp256k1_modinv32_var : secp256k1_modinv32)(&x, &m);
+
+ /* produce output */
+ signed30_to_uint16(out, &x);
+
+ /* check if the inverse times the input is 1 (mod m), unless x is 0. */
+ mulmod256(tmp, out, in, mod);
+ CHECK(tmp[0] == nonzero);
+ for (i = 1; i < 16; ++i) CHECK(tmp[i] == 0);
+
+ /* invert again */
+ (vartime ? secp256k1_modinv32_var : secp256k1_modinv32)(&x, &m);
+
+ /* check if the result is equal to the input */
+ signed30_to_uint16(tmp, &x);
+ for (i = 0; i < 16; ++i) CHECK(tmp[i] == in[i]);
}
}
-void test_num_mod(void) {
+#ifdef SECP256K1_WIDEMUL_INT128
+/* Convert a 256-bit number represented as 16 uint16_t's to signed62 notation. */
+void uint16_to_signed62(secp256k1_modinv64_signed62* out, const uint16_t* in) {
int i;
- secp256k1_scalar s;
- secp256k1_num order, n;
-
- /* check that 0 mod anything is 0 */
- random_scalar_order_test(&s);
- secp256k1_scalar_get_num(&order, &s);
- secp256k1_scalar_set_int(&s, 0);
- secp256k1_scalar_get_num(&n, &s);
- secp256k1_num_mod(&n, &order);
- CHECK(secp256k1_num_is_zero(&n));
-
- /* check that anything mod 1 is 0 */
- secp256k1_scalar_set_int(&s, 1);
- secp256k1_scalar_get_num(&order, &s);
- secp256k1_scalar_get_num(&n, &s);
- secp256k1_num_mod(&n, &order);
- CHECK(secp256k1_num_is_zero(&n));
-
- /* check that increasing the number past 2^256 does not break this */
- random_scalar_order_test(&s);
- secp256k1_scalar_get_num(&n, &s);
- /* multiply by 2^8, which'll test this case with high probability */
- for (i = 0; i < 8; ++i) {
- secp256k1_num_add(&n, &n, &n);
+ memset(out->v, 0, sizeof(out->v));
+ for (i = 0; i < 256; ++i) {
+ out->v[i / 62] |= (int64_t)(((in[i >> 4]) >> (i & 15)) & 1) << (i % 62);
}
- secp256k1_num_mod(&n, &order);
- CHECK(secp256k1_num_is_zero(&n));
}
-void test_num_jacobi(void) {
- secp256k1_scalar sqr;
- secp256k1_scalar small;
- secp256k1_scalar five; /* five is not a quadratic residue */
- secp256k1_num order, n;
+/* Convert a 256-bit number in signed62 notation to a representation as 16 uint16_t's. */
+void signed62_to_uint16(uint16_t* out, const secp256k1_modinv64_signed62* in) {
int i;
- /* squares mod 5 are 1, 4 */
- const int jacobi5[10] = { 0, 1, -1, -1, 1, 0, 1, -1, -1, 1 };
+ memset(out, 0, 32);
+ for (i = 0; i < 256; ++i) {
+ out[i >> 4] |= (((in->v[i / 62]) >> (i % 62)) & 1) << (i & 15);
+ }
+}
- /* check some small values with 5 as the order */
- secp256k1_scalar_set_int(&five, 5);
- secp256k1_scalar_get_num(&order, &five);
- for (i = 0; i < 10; ++i) {
- secp256k1_scalar_set_int(&small, i);
- secp256k1_scalar_get_num(&n, &small);
- CHECK(secp256k1_num_jacobi(&n, &order) == jacobi5[i]);
+/* Randomly mutate the sign of limbs in signed62 representation, without changing the value. */
+void mutate_sign_signed62(secp256k1_modinv64_signed62* x) {
+ static const int64_t M62 = (int64_t)(UINT64_MAX >> 2);
+ int i;
+ for (i = 0; i < 8; ++i) {
+ int pos = secp256k1_testrand_int(4);
+ if (x->v[pos] > 0 && x->v[pos + 1] <= M62) {
+ x->v[pos] -= (M62 + 1);
+ x->v[pos + 1] += 1;
+ } else if (x->v[pos] < 0 && x->v[pos + 1] >= -M62) {
+ x->v[pos] += (M62 + 1);
+ x->v[pos + 1] -= 1;
+ }
}
+}
- /** test large values with 5 as group order */
- secp256k1_scalar_get_num(&order, &five);
- /* we first need a scalar which is not a multiple of 5 */
- do {
- secp256k1_num fiven;
- random_scalar_order_test(&sqr);
- secp256k1_scalar_get_num(&fiven, &five);
- secp256k1_scalar_get_num(&n, &sqr);
- secp256k1_num_mod(&n, &fiven);
- } while (secp256k1_num_is_zero(&n));
- /* next force it to be a residue. 2 is a nonresidue mod 5 so we can
- * just multiply by two, i.e. add the number to itself */
- if (secp256k1_num_jacobi(&n, &order) == -1) {
- secp256k1_num_add(&n, &n, &n);
- }
-
- /* test residue */
- CHECK(secp256k1_num_jacobi(&n, &order) == 1);
- /* test nonresidue */
- secp256k1_num_add(&n, &n, &n);
- CHECK(secp256k1_num_jacobi(&n, &order) == -1);
-
- /** test with secp group order as order */
- secp256k1_scalar_order_get_num(&order);
- random_scalar_order_test(&sqr);
- secp256k1_scalar_sqr(&sqr, &sqr);
- /* test residue */
- secp256k1_scalar_get_num(&n, &sqr);
- CHECK(secp256k1_num_jacobi(&n, &order) == 1);
- /* test nonresidue */
- secp256k1_scalar_mul(&sqr, &sqr, &five);
- secp256k1_scalar_get_num(&n, &sqr);
- CHECK(secp256k1_num_jacobi(&n, &order) == -1);
- /* test multiple of the order*/
- CHECK(secp256k1_num_jacobi(&order, &order) == 0);
-
- /* check one less than the order */
- secp256k1_scalar_set_int(&small, 1);
- secp256k1_scalar_get_num(&n, &small);
- secp256k1_num_sub(&n, &order, &n);
- CHECK(secp256k1_num_jacobi(&n, &order) == 1); /* sage confirms this is 1 */
+/* Test secp256k1_modinv64{_var}, using inputs in 16-bit limb format, and returning inverse. */
+void test_modinv64_uint16(uint16_t* out, const uint16_t* in, const uint16_t* mod) {
+ static const int64_t M62 = (int64_t)(UINT64_MAX >> 2);
+ uint16_t tmp[16];
+ secp256k1_modinv64_signed62 x;
+ secp256k1_modinv64_modinfo m;
+ int i, vartime, nonzero;
+
+ uint16_to_signed62(&x, in);
+ nonzero = (x.v[0] | x.v[1] | x.v[2] | x.v[3] | x.v[4]) != 0;
+ uint16_to_signed62(&m.modulus, mod);
+ mutate_sign_signed62(&m.modulus);
+
+ /* compute 1/modulus mod 2^62 */
+ m.modulus_inv62 = modinv2p64(m.modulus.v[0]) & M62;
+ CHECK(((m.modulus_inv62 * m.modulus.v[0]) & M62) == 1);
+
+ for (vartime = 0; vartime < 2; ++vartime) {
+ /* compute inverse */
+ (vartime ? secp256k1_modinv64_var : secp256k1_modinv64)(&x, &m);
+
+ /* produce output */
+ signed62_to_uint16(out, &x);
+
+ /* check if the inverse times the input is 1 (mod m), unless x is 0. */
+ mulmod256(tmp, out, in, mod);
+ CHECK(tmp[0] == nonzero);
+ for (i = 1; i < 16; ++i) CHECK(tmp[i] == 0);
+
+ /* invert again */
+ (vartime ? secp256k1_modinv64_var : secp256k1_modinv64)(&x, &m);
+
+ /* check if the result is equal to the input */
+ signed62_to_uint16(tmp, &x);
+ for (i = 0; i < 16; ++i) CHECK(tmp[i] == in[i]);
+ }
}
+#endif
-void run_num_smalltests(void) {
+/* test if a and b are coprime */
+int coprime(const uint16_t* a, const uint16_t* b) {
+ uint16_t x[16], y[16], t[16];
int i;
- for (i = 0; i < 100*count; i++) {
- test_num_negate();
- test_num_add_sub();
- test_num_mod();
- test_num_jacobi();
+ int iszero;
+ memcpy(x, a, 32);
+ memcpy(y, b, 32);
+
+ /* simple gcd loop: while x!=0, (x,y)=(y%x,x) */
+ while (1) {
+ iszero = 1;
+ for (i = 0; i < 16; ++i) {
+ if (x[i] != 0) {
+ iszero = 0;
+ break;
+ }
+ }
+ if (iszero) break;
+ mulmod256(t, y, NULL, x);
+ memcpy(y, x, 32);
+ memcpy(x, t, 32);
+ }
+
+ /* return whether y=1 */
+ if (y[0] != 1) return 0;
+ for (i = 1; i < 16; ++i) {
+ if (y[i] != 0) return 0;
}
+ return 1;
}
+
+void run_modinv_tests(void) {
+ /* Fixed test cases. Each tuple is (input, modulus, output), each as 16x16 bits in LE order. */
+ static const uint16_t CASES[][3][16] = {
+ /* Test case known to need 713 divsteps */
+ {{0x1513, 0x5389, 0x54e9, 0x2798, 0x1957, 0x66a0, 0x8057, 0x3477,
+ 0x7784, 0x1052, 0x326a, 0x9331, 0x6506, 0xa95c, 0x91f3, 0xfb5e},
+ {0x2bdd, 0x8df4, 0xcc61, 0x481f, 0xdae5, 0x5ca7, 0xf43b, 0x7d54,
+ 0x13d6, 0x469b, 0x2294, 0x20f4, 0xb2a4, 0xa2d1, 0x3ff1, 0xfd4b},
+ {0xffd8, 0xd9a0, 0x456e, 0x81bb, 0xbabd, 0x6cea, 0x6dbd, 0x73ab,
+ 0xbb94, 0x3d3c, 0xdf08, 0x31c4, 0x3e32, 0xc179, 0x2486, 0xb86b}},
+ /* Test case known to need 589 divsteps, reaching delta=-140 and
+ delta=141. */
+ {{0x3fb1, 0x903b, 0x4eb7, 0x4813, 0xd863, 0x26bf, 0xd89f, 0xa8a9,
+ 0x02fe, 0x57c6, 0x554a, 0x4eab, 0x165e, 0x3d61, 0xee1e, 0x456c},
+ {0x9295, 0x823b, 0x5c1f, 0x5386, 0x48e0, 0x02ff, 0x4c2a, 0xa2da,
+ 0xe58f, 0x967c, 0xc97e, 0x3f5a, 0x69fb, 0x52d9, 0x0a86, 0xb4a3},
+ {0x3d30, 0xb893, 0xa809, 0xa7a8, 0x26f5, 0x5b42, 0x55be, 0xf4d0,
+ 0x12c2, 0x7e6a, 0xe41a, 0x90c7, 0xebfa, 0xf920, 0x304e, 0x1419}},
+ /* Test case known to need 650 divsteps, and doing 65 consecutive (f,g/2) steps. */
+ {{0x8583, 0x5058, 0xbeae, 0xeb69, 0x48bc, 0x52bb, 0x6a9d, 0xcc94,
+ 0x2a21, 0x87d5, 0x5b0d, 0x42f6, 0x5b8a, 0x2214, 0xe9d6, 0xa040},
+ {0x7531, 0x27cb, 0x7e53, 0xb739, 0x6a5f, 0x83f5, 0xa45c, 0xcb1d,
+ 0x8a87, 0x1c9c, 0x51d7, 0x851c, 0xb9d8, 0x1fbe, 0xc241, 0xd4a3},
+ {0xcdb4, 0x275c, 0x7d22, 0xa906, 0x0173, 0xc054, 0x7fdf, 0x5005,
+ 0x7fb8, 0x9059, 0xdf51, 0x99df, 0x2654, 0x8f6e, 0x070f, 0xb347}},
+ /* Test case with the group order as modulus, needing 635 divsteps. */
+ {{0x95ed, 0x6c01, 0xd113, 0x5ff1, 0xd7d0, 0x29cc, 0x5817, 0x6120,
+ 0xca8e, 0xaad1, 0x25ae, 0x8e84, 0x9af6, 0x30bf, 0xf0ed, 0x1686},
+ {0x4141, 0xd036, 0x5e8c, 0xbfd2, 0xa03b, 0xaf48, 0xdce6, 0xbaae,
+ 0xfffe, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0x1631, 0xbf4a, 0x286a, 0x2716, 0x469f, 0x2ac8, 0x1312, 0xe9bc,
+ 0x04f4, 0x304b, 0x9931, 0x113b, 0xd932, 0xc8f4, 0x0d0d, 0x01a1}},
+ /* Test case with the field size as modulus, needing 637 divsteps. */
+ {{0x9ec3, 0x1919, 0xca84, 0x7c11, 0xf996, 0x06f3, 0x5408, 0x6688,
+ 0x1320, 0xdb8a, 0x632a, 0x0dcb, 0x8a84, 0x6bee, 0x9c95, 0xe34e},
+ {0xfc2f, 0xffff, 0xfffe, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0x18e5, 0x19b6, 0xdf92, 0x1aaa, 0x09fb, 0x8a3f, 0x52b0, 0x8701,
+ 0xac0c, 0x2582, 0xda44, 0x9bcc, 0x6828, 0x1c53, 0xbd8f, 0xbd2c}},
+ /* Test case with the field size as modulus, needing 935 divsteps with
+ broken eta handling. */
+ {{0x1b37, 0xbdc3, 0x8bcd, 0x25e3, 0x1eae, 0x567d, 0x30b6, 0xf0d8,
+ 0x9277, 0x0cf8, 0x9c2e, 0xecd7, 0x631d, 0xe38f, 0xd4f8, 0x5c93},
+ {0xfc2f, 0xffff, 0xfffe, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0x1622, 0xe05b, 0xe880, 0x7de9, 0x3e45, 0xb682, 0xee6c, 0x67ed,
+ 0xa179, 0x15db, 0x6b0d, 0xa656, 0x7ccb, 0x8ef7, 0xa2ff, 0xe279}},
+ /* Test case with the group size as modulus, needing 981 divsteps with
+ broken eta handling. */
+ {{0xfeb9, 0xb877, 0xee41, 0x7fa3, 0x87da, 0x94c4, 0x9d04, 0xc5ae,
+ 0x5708, 0x0994, 0xfc79, 0x0916, 0xbf32, 0x3ad8, 0xe11c, 0x5ca2},
+ {0x4141, 0xd036, 0x5e8c, 0xbfd2, 0xa03b, 0xaf48, 0xdce6, 0xbaae,
+ 0xfffe, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0x0f12, 0x075e, 0xce1c, 0x6f92, 0xc80f, 0xca92, 0x9a04, 0x6126,
+ 0x4b6c, 0x57d6, 0xca31, 0x97f3, 0x1f99, 0xf4fd, 0xda4d, 0x42ce}},
+ /* Test case with the field size as modulus, input = 0. */
+ {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0xfc2f, 0xffff, 0xfffe, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}},
+ /* Test case with the field size as modulus, input = 1. */
+ {{0x0001, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0xfc2f, 0xffff, 0xfffe, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0x0001, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}},
+ /* Test case with the field size as modulus, input = 2. */
+ {{0x0002, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0xfc2f, 0xffff, 0xfffe, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0xfe18, 0x7fff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x7fff}},
+ /* Test case with the field size as modulus, input = field - 1. */
+ {{0xfc2e, 0xffff, 0xfffe, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0xfc2f, 0xffff, 0xfffe, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0xfc2e, 0xffff, 0xfffe, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}},
+ /* Test case with the group size as modulus, input = 0. */
+ {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x4141, 0xd036, 0x5e8c, 0xbfd2, 0xa03b, 0xaf48, 0xdce6, 0xbaae,
+ 0xfffe, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}},
+ /* Test case with the group size as modulus, input = 1. */
+ {{0x0001, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x4141, 0xd036, 0x5e8c, 0xbfd2, 0xa03b, 0xaf48, 0xdce6, 0xbaae,
+ 0xfffe, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0x0001, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}},
+ /* Test case with the group size as modulus, input = 2. */
+ {{0x0002, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x4141, 0xd036, 0x5e8c, 0xbfd2, 0xa03b, 0xaf48, 0xdce6, 0xbaae,
+ 0xfffe, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0x20a1, 0x681b, 0x2f46, 0xdfe9, 0x501d, 0x57a4, 0x6e73, 0x5d57,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x7fff}},
+ /* Test case with the group size as modulus, input = group - 1. */
+ {{0x4140, 0xd036, 0x5e8c, 0xbfd2, 0xa03b, 0xaf48, 0xdce6, 0xbaae,
+ 0xfffe, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0x4141, 0xd036, 0x5e8c, 0xbfd2, 0xa03b, 0xaf48, 0xdce6, 0xbaae,
+ 0xfffe, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0x4140, 0xd036, 0x5e8c, 0xbfd2, 0xa03b, 0xaf48, 0xdce6, 0xbaae,
+ 0xfffe, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}}
+ };
+
+ int i, j, ok;
+
+ /* Test known inputs/outputs */
+ for (i = 0; (size_t)i < sizeof(CASES) / sizeof(CASES[0]); ++i) {
+ uint16_t out[16];
+ test_modinv32_uint16(out, CASES[i][0], CASES[i][1]);
+ for (j = 0; j < 16; ++j) CHECK(out[j] == CASES[i][2][j]);
+#ifdef SECP256K1_WIDEMUL_INT128
+ test_modinv64_uint16(out, CASES[i][0], CASES[i][1]);
+ for (j = 0; j < 16; ++j) CHECK(out[j] == CASES[i][2][j]);
+#endif
+ }
+
+ for (i = 0; i < 100 * count; ++i) {
+ /* 256-bit numbers in 16-uint16_t's notation */
+ static const uint16_t ZERO[16] = {0};
+ uint16_t xd[16]; /* the number (in range [0,2^256)) to be inverted */
+ uint16_t md[16]; /* the modulus (odd, in range [3,2^256)) */
+ uint16_t id[16]; /* the inverse of xd mod md */
+
+ /* generate random xd and md, so that md is odd, md>1, xd 0));
- secp256k1_scalar_negate(&neg, &s);
- secp256k1_num_sub(&negnum, &order, &snum);
- secp256k1_num_mod(&negnum, &order);
- /* Check that comparison with the half order is equal to testing for high scalar after negation. */
- CHECK(secp256k1_scalar_is_high(&neg) == (secp256k1_num_cmp(&negnum, &half_order) > 0));
- /* Negating should change the high property, unless the value was already zero. */
- CHECK((secp256k1_scalar_is_high(&s) == secp256k1_scalar_is_high(&neg)) == secp256k1_scalar_is_zero(&s));
- secp256k1_scalar_get_num(&negnum2, &neg);
- /* Negating a scalar should be equal to (order - n) mod order on the number. */
- CHECK(secp256k1_num_eq(&negnum, &negnum2));
- secp256k1_scalar_add(&neg, &neg, &s);
- /* Adding a number to its negation should result in zero. */
- CHECK(secp256k1_scalar_is_zero(&neg));
- secp256k1_scalar_negate(&neg, &neg);
- /* Negating zero should still result in zero. */
- CHECK(secp256k1_scalar_is_zero(&neg));
- }
-
- {
- /* Test secp256k1_scalar_mul_shift_var. */
- secp256k1_scalar r;
- secp256k1_num one;
- secp256k1_num rnum;
- secp256k1_num rnum2;
- unsigned char cone[1] = {0x01};
- unsigned int shift = 256 + secp256k1_testrand_int(257);
- secp256k1_scalar_mul_shift_var(&r, &s1, &s2, shift);
- secp256k1_num_mul(&rnum, &s1num, &s2num);
- secp256k1_num_shift(&rnum, shift - 1);
- secp256k1_num_set_bin(&one, cone, 1);
- secp256k1_num_add(&rnum, &rnum, &one);
- secp256k1_num_shift(&rnum, 1);
- secp256k1_scalar_get_num(&rnum2, &r);
- CHECK(secp256k1_num_eq(&rnum, &rnum2));
- }
-
{
/* test secp256k1_scalar_shr_int */
secp256k1_scalar r;
@@ -955,34 +1142,6 @@ void scalar_test(void) {
CHECK(expected == low);
}
}
-#endif
-
- {
- /* Test that scalar inverses are equal to the inverse of their number modulo the order. */
- if (!secp256k1_scalar_is_zero(&s)) {
- secp256k1_scalar inv;
-#ifndef USE_NUM_NONE
- secp256k1_num invnum;
- secp256k1_num invnum2;
-#endif
- secp256k1_scalar_inverse(&inv, &s);
-#ifndef USE_NUM_NONE
- secp256k1_num_mod_inverse(&invnum, &snum, &order);
- secp256k1_scalar_get_num(&invnum2, &inv);
- CHECK(secp256k1_num_eq(&invnum, &invnum2));
-#endif
- secp256k1_scalar_mul(&inv, &inv, &s);
- /* Multiplying a scalar with its inverse must result in one. */
- CHECK(secp256k1_scalar_is_one(&inv));
- secp256k1_scalar_inverse(&inv, &inv);
- /* Inverting one must result in one. */
- CHECK(secp256k1_scalar_is_one(&inv));
-#ifndef USE_NUM_NONE
- secp256k1_scalar_get_num(&invnum, &inv);
- CHECK(secp256k1_num_is_one(&invnum));
-#endif
- }
- }
{
/* Test commutativity of add. */
@@ -1054,14 +1213,6 @@ void scalar_test(void) {
CHECK(secp256k1_scalar_eq(&r1, &r2));
}
- {
- /* Test square. */
- secp256k1_scalar r1, r2;
- secp256k1_scalar_sqr(&r1, &s1);
- secp256k1_scalar_mul(&r2, &s1, &s1);
- CHECK(secp256k1_scalar_eq(&r1, &r2));
- }
-
{
/* Test multiplicative identity. */
secp256k1_scalar r1, v1;
@@ -1126,48 +1277,6 @@ void run_scalar_tests(void) {
CHECK(secp256k1_scalar_is_zero(&o));
}
-#ifndef USE_NUM_NONE
- {
- /* Test secp256k1_scalar_set_b32 boundary conditions */
- secp256k1_num order;
- secp256k1_scalar scalar;
- unsigned char bin[32];
- unsigned char bin_tmp[32];
- int overflow = 0;
- /* 2^256-1 - order */
- static const secp256k1_scalar all_ones_minus_order = SECP256K1_SCALAR_CONST(
- 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000001UL,
- 0x45512319UL, 0x50B75FC4UL, 0x402DA173UL, 0x2FC9BEBEUL
- );
-
- /* A scalar set to 0s should be 0. */
- memset(bin, 0, 32);
- secp256k1_scalar_set_b32(&scalar, bin, &overflow);
- CHECK(overflow == 0);
- CHECK(secp256k1_scalar_is_zero(&scalar));
-
- /* A scalar with value of the curve order should be 0. */
- secp256k1_scalar_order_get_num(&order);
- secp256k1_num_get_bin(bin, 32, &order);
- secp256k1_scalar_set_b32(&scalar, bin, &overflow);
- CHECK(overflow == 1);
- CHECK(secp256k1_scalar_is_zero(&scalar));
-
- /* A scalar with value of the curve order minus one should not overflow. */
- bin[31] -= 1;
- secp256k1_scalar_set_b32(&scalar, bin, &overflow);
- CHECK(overflow == 0);
- secp256k1_scalar_get_b32(bin_tmp, &scalar);
- CHECK(secp256k1_memcmp_var(bin, bin_tmp, 32) == 0);
-
- /* A scalar set to all 1s should overflow. */
- memset(bin, 0xFF, 32);
- secp256k1_scalar_set_b32(&scalar, bin, &overflow);
- CHECK(overflow == 1);
- CHECK(secp256k1_scalar_eq(&scalar, &all_ones_minus_order));
- }
-#endif
-
{
/* Does check_overflow check catch all ones? */
static const secp256k1_scalar overflowed = SECP256K1_SCALAR_CONST(
@@ -1190,9 +1299,7 @@ void run_scalar_tests(void) {
secp256k1_scalar one;
secp256k1_scalar r1;
secp256k1_scalar r2;
-#if defined(USE_SCALAR_INV_NUM)
secp256k1_scalar zzv;
-#endif
int overflow;
unsigned char chal[33][2][32] = {
{{0xff, 0xff, 0x03, 0x07, 0x00, 0x00, 0x00, 0x00,
@@ -1742,10 +1849,8 @@ void run_scalar_tests(void) {
if (!secp256k1_scalar_is_zero(&y)) {
secp256k1_scalar_inverse(&zz, &y);
CHECK(!secp256k1_scalar_check_overflow(&zz));
-#if defined(USE_SCALAR_INV_NUM)
secp256k1_scalar_inverse_var(&zzv, &y);
CHECK(secp256k1_scalar_eq(&zzv, &zz));
-#endif
secp256k1_scalar_mul(&z, &z, &zz);
CHECK(!secp256k1_scalar_check_overflow(&z));
CHECK(secp256k1_scalar_eq(&x, &z));
@@ -1753,12 +1858,6 @@ void run_scalar_tests(void) {
CHECK(!secp256k1_scalar_check_overflow(&zz));
CHECK(secp256k1_scalar_eq(&one, &zz));
}
- secp256k1_scalar_mul(&z, &x, &x);
- CHECK(!secp256k1_scalar_check_overflow(&z));
- secp256k1_scalar_sqr(&zz, &x);
- CHECK(!secp256k1_scalar_check_overflow(&zz));
- CHECK(secp256k1_scalar_eq(&zz, &z));
- CHECK(secp256k1_scalar_eq(&r2, &zz));
}
}
}
@@ -1814,13 +1913,6 @@ int check_fe_equal(const secp256k1_fe *a, const secp256k1_fe *b) {
return secp256k1_fe_equal_var(&an, &bn);
}
-int check_fe_inverse(const secp256k1_fe *a, const secp256k1_fe *ai) {
- secp256k1_fe x;
- secp256k1_fe one = SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 1);
- secp256k1_fe_mul(&x, a, ai);
- return check_fe_equal(&x, &one);
-}
-
void run_field_convert(void) {
static const unsigned char b32[32] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
@@ -1940,30 +2032,6 @@ void run_field_misc(void) {
}
}
-void run_field_inv(void) {
- secp256k1_fe x, xi, xii;
- int i;
- for (i = 0; i < 10*count; i++) {
- random_fe_non_zero(&x);
- secp256k1_fe_inv(&xi, &x);
- CHECK(check_fe_inverse(&x, &xi));
- secp256k1_fe_inv(&xii, &xi);
- CHECK(check_fe_equal(&x, &xii));
- }
-}
-
-void run_field_inv_var(void) {
- secp256k1_fe x, xi, xii;
- int i;
- for (i = 0; i < 10*count; i++) {
- random_fe_non_zero(&x);
- secp256k1_fe_inv_var(&xi, &x);
- CHECK(check_fe_inverse(&x, &xi));
- secp256k1_fe_inv_var(&xii, &xi);
- CHECK(check_fe_equal(&x, &xii));
- }
-}
-
void run_sqr(void) {
secp256k1_fe x, s;
@@ -2028,6 +2096,169 @@ void run_sqrt(void) {
}
}
+/***** FIELD/SCALAR INVERSE TESTS *****/
+
+static const secp256k1_scalar scalar_minus_one = SECP256K1_SCALAR_CONST(
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE,
+ 0xBAAEDCE6, 0xAF48A03B, 0xBFD25E8C, 0xD0364140
+);
+
+static const secp256k1_fe fe_minus_one = SECP256K1_FE_CONST(
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2E
+);
+
+/* These tests test the following identities:
+ *
+ * for x==0: 1/x == 0
+ * for x!=0: x*(1/x) == 1
+ * for x!=0 and x!=1: 1/(1/x - 1) + 1 == -1/(x-1)
+ */
+
+void test_inverse_scalar(secp256k1_scalar* out, const secp256k1_scalar* x, int var)
+{
+ secp256k1_scalar l, r, t;
+
+ (var ? secp256k1_scalar_inverse_var : secp256k1_scalar_inverse_var)(&l, x); /* l = 1/x */
+ if (out) *out = l;
+ if (secp256k1_scalar_is_zero(x)) {
+ CHECK(secp256k1_scalar_is_zero(&l));
+ return;
+ }
+ secp256k1_scalar_mul(&t, x, &l); /* t = x*(1/x) */
+ CHECK(secp256k1_scalar_is_one(&t)); /* x*(1/x) == 1 */
+ secp256k1_scalar_add(&r, x, &scalar_minus_one); /* r = x-1 */
+ if (secp256k1_scalar_is_zero(&r)) return;
+ (var ? secp256k1_scalar_inverse_var : secp256k1_scalar_inverse_var)(&r, &r); /* r = 1/(x-1) */
+ secp256k1_scalar_add(&l, &scalar_minus_one, &l); /* l = 1/x-1 */
+ (var ? secp256k1_scalar_inverse_var : secp256k1_scalar_inverse_var)(&l, &l); /* l = 1/(1/x-1) */
+ secp256k1_scalar_add(&l, &l, &secp256k1_scalar_one); /* l = 1/(1/x-1)+1 */
+ secp256k1_scalar_add(&l, &r, &l); /* l = 1/(1/x-1)+1 + 1/(x-1) */
+ CHECK(secp256k1_scalar_is_zero(&l)); /* l == 0 */
+}
+
+void test_inverse_field(secp256k1_fe* out, const secp256k1_fe* x, int var)
+{
+ secp256k1_fe l, r, t;
+
+ (var ? secp256k1_fe_inv_var : secp256k1_fe_inv)(&l, x) ; /* l = 1/x */
+ if (out) *out = l;
+ t = *x; /* t = x */
+ if (secp256k1_fe_normalizes_to_zero_var(&t)) {
+ CHECK(secp256k1_fe_normalizes_to_zero(&l));
+ return;
+ }
+ secp256k1_fe_mul(&t, x, &l); /* t = x*(1/x) */
+ secp256k1_fe_add(&t, &fe_minus_one); /* t = x*(1/x)-1 */
+ CHECK(secp256k1_fe_normalizes_to_zero(&t)); /* x*(1/x)-1 == 0 */
+ r = *x; /* r = x */
+ secp256k1_fe_add(&r, &fe_minus_one); /* r = x-1 */
+ if (secp256k1_fe_normalizes_to_zero_var(&r)) return;
+ (var ? secp256k1_fe_inv_var : secp256k1_fe_inv)(&r, &r); /* r = 1/(x-1) */
+ secp256k1_fe_add(&l, &fe_minus_one); /* l = 1/x-1 */
+ (var ? secp256k1_fe_inv_var : secp256k1_fe_inv)(&l, &l); /* l = 1/(1/x-1) */
+ secp256k1_fe_add(&l, &secp256k1_fe_one); /* l = 1/(1/x-1)+1 */
+ secp256k1_fe_add(&l, &r); /* l = 1/(1/x-1)+1 + 1/(x-1) */
+ CHECK(secp256k1_fe_normalizes_to_zero_var(&l)); /* l == 0 */
+}
+
+void run_inverse_tests(void)
+{
+ /* Fixed test cases for field inverses: pairs of (x, 1/x) mod p. */
+ static const secp256k1_fe fe_cases[][2] = {
+ /* 0 */
+ {SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0),
+ SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0)},
+ /* 1 */
+ {SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 1),
+ SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 1)},
+ /* -1 */
+ {SECP256K1_FE_CONST(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0xfffffc2e),
+ SECP256K1_FE_CONST(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0xfffffc2e)},
+ /* 2 */
+ {SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 2),
+ SECP256K1_FE_CONST(0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x7ffffe18)},
+ /* 2**128 */
+ {SECP256K1_FE_CONST(0, 0, 0, 1, 0, 0, 0, 0),
+ SECP256K1_FE_CONST(0xbcb223fe, 0xdc24a059, 0xd838091d, 0xd2253530, 0xffffffff, 0xffffffff, 0xffffffff, 0x434dd931)},
+ /* Input known to need 637 divsteps */
+ {SECP256K1_FE_CONST(0xe34e9c95, 0x6bee8a84, 0x0dcb632a, 0xdb8a1320, 0x66885408, 0x06f3f996, 0x7c11ca84, 0x19199ec3),
+ SECP256K1_FE_CONST(0xbd2cbd8f, 0x1c536828, 0x9bccda44, 0x2582ac0c, 0x870152b0, 0x8a3f09fb, 0x1aaadf92, 0x19b618e5)}
+ };
+ /* Fixed test cases for scalar inverses: pairs of (x, 1/x) mod n. */
+ static const secp256k1_scalar scalar_cases[][2] = {
+ /* 0 */
+ {SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0),
+ SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0)},
+ /* 1 */
+ {SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 1),
+ SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 1)},
+ /* -1 */
+ {SECP256K1_SCALAR_CONST(0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0xbaaedce6, 0xaf48a03b, 0xbfd25e8c, 0xd0364140),
+ SECP256K1_SCALAR_CONST(0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0xbaaedce6, 0xaf48a03b, 0xbfd25e8c, 0xd0364140)},
+ /* 2 */
+ {SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 2),
+ SECP256K1_SCALAR_CONST(0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x5d576e73, 0x57a4501d, 0xdfe92f46, 0x681b20a1)},
+ /* 2**128 */
+ {SECP256K1_SCALAR_CONST(0, 0, 0, 1, 0, 0, 0, 0),
+ SECP256K1_SCALAR_CONST(0x50a51ac8, 0x34b9ec24, 0x4b0dff66, 0x5588b13e, 0x9984d5b3, 0xcf80ef0f, 0xd6a23766, 0xa3ee9f22)},
+ /* Input known to need 635 divsteps */
+ {SECP256K1_SCALAR_CONST(0xcb9f1d35, 0xdd4416c2, 0xcd71bf3f, 0x6365da66, 0x3c9b3376, 0x8feb7ae9, 0x32a5ef60, 0x19199ec3),
+ SECP256K1_SCALAR_CONST(0x1d7c7bba, 0xf1893d53, 0xb834bd09, 0x36b411dc, 0x42c2e42f, 0xec72c428, 0x5e189791, 0x8e9bc708)}
+ };
+ int i, var, testrand;
+ unsigned char b32[32];
+ secp256k1_fe x_fe;
+ secp256k1_scalar x_scalar;
+ memset(b32, 0, sizeof(b32));
+ /* Test fixed test cases through test_inverse_{scalar,field}, both ways. */
+ for (i = 0; (size_t)i < sizeof(fe_cases)/sizeof(fe_cases[0]); ++i) {
+ for (var = 0; var <= 1; ++var) {
+ test_inverse_field(&x_fe, &fe_cases[i][0], var);
+ check_fe_equal(&x_fe, &fe_cases[i][1]);
+ test_inverse_field(&x_fe, &fe_cases[i][1], var);
+ check_fe_equal(&x_fe, &fe_cases[i][0]);
+ }
+ }
+ for (i = 0; (size_t)i < sizeof(scalar_cases)/sizeof(scalar_cases[0]); ++i) {
+ for (var = 0; var <= 1; ++var) {
+ test_inverse_scalar(&x_scalar, &scalar_cases[i][0], var);
+ CHECK(secp256k1_scalar_eq(&x_scalar, &scalar_cases[i][1]));
+ test_inverse_scalar(&x_scalar, &scalar_cases[i][1], var);
+ CHECK(secp256k1_scalar_eq(&x_scalar, &scalar_cases[i][0]));
+ }
+ }
+ /* Test inputs 0..999 and their respective negations. */
+ for (i = 0; i < 1000; ++i) {
+ b32[31] = i & 0xff;
+ b32[30] = (i >> 8) & 0xff;
+ secp256k1_scalar_set_b32(&x_scalar, b32, NULL);
+ secp256k1_fe_set_b32(&x_fe, b32);
+ for (var = 0; var <= 1; ++var) {
+ test_inverse_scalar(NULL, &x_scalar, var);
+ test_inverse_field(NULL, &x_fe, var);
+ }
+ secp256k1_scalar_negate(&x_scalar, &x_scalar);
+ secp256k1_fe_negate(&x_fe, &x_fe, 1);
+ for (var = 0; var <= 1; ++var) {
+ test_inverse_scalar(NULL, &x_scalar, var);
+ test_inverse_field(NULL, &x_fe, var);
+ }
+ }
+ /* test 128*count random inputs; half with testrand256_test, half with testrand256 */
+ for (testrand = 0; testrand <= 1; ++testrand) {
+ for (i = 0; i < 64 * count; ++i) {
+ (testrand ? secp256k1_testrand256_test : secp256k1_testrand256)(b32);
+ secp256k1_scalar_set_b32(&x_scalar, b32, NULL);
+ secp256k1_fe_set_b32(&x_fe, b32);
+ for (var = 0; var <= 1; ++var) {
+ test_inverse_scalar(NULL, &x_scalar, var);
+ test_inverse_field(NULL, &x_fe, var);
+ }
+ }
+ }
+}
+
/***** GROUP TESTS *****/
void ge_equals_ge(const secp256k1_ge *a, const secp256k1_ge *b) {
@@ -2407,64 +2638,35 @@ void run_ec_combine(void) {
void test_group_decompress(const secp256k1_fe* x) {
/* The input itself, normalized. */
secp256k1_fe fex = *x;
- secp256k1_fe fez;
- /* Results of set_xquad_var, set_xo_var(..., 0), set_xo_var(..., 1). */
- secp256k1_ge ge_quad, ge_even, ge_odd;
- secp256k1_gej gej_quad;
+ /* Results of set_xo_var(..., 0), set_xo_var(..., 1). */
+ secp256k1_ge ge_even, ge_odd;
/* Return values of the above calls. */
- int res_quad, res_even, res_odd;
+ int res_even, res_odd;
secp256k1_fe_normalize_var(&fex);
- res_quad = secp256k1_ge_set_xquad(&ge_quad, &fex);
res_even = secp256k1_ge_set_xo_var(&ge_even, &fex, 0);
res_odd = secp256k1_ge_set_xo_var(&ge_odd, &fex, 1);
- CHECK(res_quad == res_even);
- CHECK(res_quad == res_odd);
+ CHECK(res_even == res_odd);
- if (res_quad) {
- secp256k1_fe_normalize_var(&ge_quad.x);
+ if (res_even) {
secp256k1_fe_normalize_var(&ge_odd.x);
secp256k1_fe_normalize_var(&ge_even.x);
- secp256k1_fe_normalize_var(&ge_quad.y);
secp256k1_fe_normalize_var(&ge_odd.y);
secp256k1_fe_normalize_var(&ge_even.y);
/* No infinity allowed. */
- CHECK(!ge_quad.infinity);
CHECK(!ge_even.infinity);
CHECK(!ge_odd.infinity);
/* Check that the x coordinates check out. */
- CHECK(secp256k1_fe_equal_var(&ge_quad.x, x));
CHECK(secp256k1_fe_equal_var(&ge_even.x, x));
CHECK(secp256k1_fe_equal_var(&ge_odd.x, x));
- /* Check that the Y coordinate result in ge_quad is a square. */
- CHECK(secp256k1_fe_is_quad_var(&ge_quad.y));
-
/* Check odd/even Y in ge_odd, ge_even. */
CHECK(secp256k1_fe_is_odd(&ge_odd.y));
CHECK(!secp256k1_fe_is_odd(&ge_even.y));
-
- /* Check secp256k1_gej_has_quad_y_var. */
- secp256k1_gej_set_ge(&gej_quad, &ge_quad);
- CHECK(secp256k1_gej_has_quad_y_var(&gej_quad));
- do {
- random_fe_test(&fez);
- } while (secp256k1_fe_is_zero(&fez));
- secp256k1_gej_rescale(&gej_quad, &fez);
- CHECK(secp256k1_gej_has_quad_y_var(&gej_quad));
- secp256k1_gej_neg(&gej_quad, &gej_quad);
- CHECK(!secp256k1_gej_has_quad_y_var(&gej_quad));
- do {
- random_fe_test(&fez);
- } while (secp256k1_fe_is_zero(&fez));
- secp256k1_gej_rescale(&gej_quad, &fez);
- CHECK(!secp256k1_gej_has_quad_y_var(&gej_quad));
- secp256k1_gej_neg(&gej_quad, &gej_quad);
- CHECK(secp256k1_gej_has_quad_y_var(&gej_quad));
}
}
@@ -5606,21 +5808,18 @@ int main(int argc, char **argv) {
run_rand_bits();
run_rand_int();
+ run_ctz_tests();
+ run_modinv_tests();
+ run_inverse_tests();
+
run_sha256_tests();
run_hmac_sha256_tests();
run_rfc6979_hmac_sha256_tests();
-#ifndef USE_NUM_NONE
- /* num tests */
- run_num_smalltests();
-#endif
-
/* scalar tests */
run_scalar_tests();
/* field tests */
- run_field_inv();
- run_field_inv_var();
run_field_misc();
run_field_convert();
run_sqr();
diff --git a/src/util.h b/src/util.h
index b7e457c48b..f78846836c 100644
--- a/src/util.h
+++ b/src/util.h
@@ -276,4 +276,69 @@ SECP256K1_GNUC_EXT typedef __int128 int128_t;
# endif
#endif
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
+/* Determine the number of trailing zero bits in a (non-zero) 32-bit x.
+ * This function is only intended to be used as fallback for
+ * secp256k1_ctz32_var, but permits it to be tested separately. */
+static SECP256K1_INLINE int secp256k1_ctz32_var_debruijn(uint32_t x) {
+ static const uint8_t debruijn[32] = {
+ 0x00, 0x01, 0x02, 0x18, 0x03, 0x13, 0x06, 0x19, 0x16, 0x04, 0x14, 0x0A,
+ 0x10, 0x07, 0x0C, 0x1A, 0x1F, 0x17, 0x12, 0x05, 0x15, 0x09, 0x0F, 0x0B,
+ 0x1E, 0x11, 0x08, 0x0E, 0x1D, 0x0D, 0x1C, 0x1B
+ };
+ return debruijn[((x & -x) * 0x04D7651F) >> 27];
+}
+
+/* Determine the number of trailing zero bits in a (non-zero) 64-bit x.
+ * This function is only intended to be used as fallback for
+ * secp256k1_ctz64_var, but permits it to be tested separately. */
+static SECP256K1_INLINE int secp256k1_ctz64_var_debruijn(uint64_t x) {
+ static const uint8_t debruijn[64] = {
+ 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28,
+ 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
+ 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
+ 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
+ };
+ return debruijn[((x & -x) * 0x022FDD63CC95386D) >> 58];
+}
+
+/* Determine the number of trailing zero bits in a (non-zero) 32-bit x. */
+static SECP256K1_INLINE int secp256k1_ctz32_var(uint32_t x) {
+ VERIFY_CHECK(x != 0);
+#if (__has_builtin(__builtin_ctz) || SECP256K1_GNUC_PREREQ(3,4))
+ /* If the unsigned type is sufficient to represent the largest uint32_t, consider __builtin_ctz. */
+ if (((unsigned)UINT32_MAX) == UINT32_MAX) {
+ return __builtin_ctz(x);
+ }
+#endif
+#if (__has_builtin(__builtin_ctzl) || SECP256K1_GNUC_PREREQ(3,4))
+ /* Otherwise consider __builtin_ctzl (the unsigned long type is always at least 32 bits). */
+ return __builtin_ctzl(x);
+#else
+ /* If no suitable CTZ builtin is available, use a (variable time) software emulation. */
+ return secp256k1_ctz32_var_debruijn(x);
+#endif
+}
+
+/* Determine the number of trailing zero bits in a (non-zero) 64-bit x. */
+static SECP256K1_INLINE int secp256k1_ctz64_var(uint64_t x) {
+ VERIFY_CHECK(x != 0);
+#if (__has_builtin(__builtin_ctzl) || SECP256K1_GNUC_PREREQ(3,4))
+ /* If the unsigned long type is sufficient to represent the largest uint64_t, consider __builtin_ctzl. */
+ if (((unsigned long)UINT64_MAX) == UINT64_MAX) {
+ return __builtin_ctzl(x);
+ }
+#endif
+#if (__has_builtin(__builtin_ctzll) || SECP256K1_GNUC_PREREQ(3,4))
+ /* Otherwise consider __builtin_ctzll (the unsigned long long type is always at least 64 bits). */
+ return __builtin_ctzll(x);
+#else
+ /* If no suitable CTZ builtin is available, use a (variable time) software emulation. */
+ return secp256k1_ctz64_var_debruijn(x);
+#endif
+}
+
#endif /* SECP256K1_UTIL_H */