diff --git a/src/bench_internal.c b/src/bench_internal.c index aed8216127..379f9a9f0f 100644 --- a/src/bench_internal.c +++ b/src/bench_internal.c @@ -209,6 +209,17 @@ void bench_field_sqrt(void* arg, int iters) { CHECK(j <= iters); } +void bench_field_jacobi_var(void* arg, int iters) { + int i, j = 0; + bench_inv *data = (bench_inv*)arg; + + for (i = 0; i < iters; i++) { + j += secp256k1_fe_jacobi_var(&data->fe[0]); + secp256k1_fe_add(&data->fe[0], &data->fe[1]); + } + CHECK(j <= iters); +} + void bench_group_double_var(void* arg, int iters) { int i; bench_inv *data = (bench_inv*)arg; @@ -360,6 +371,7 @@ int main(int argc, char **argv) { if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "mul")) run_benchmark("field_mul", bench_field_mul, bench_setup, NULL, &data, 10, iters*10); if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse", bench_field_inverse, bench_setup, NULL, &data, 10, iters); if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse_var", bench_field_inverse_var, bench_setup, NULL, &data, 10, iters); + if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "jacobi")) run_benchmark("field_jacobi_var", bench_field_jacobi_var, bench_setup, NULL, &data, 10, iters); if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "sqrt")) run_benchmark("field_sqrt", bench_field_sqrt, bench_setup, NULL, &data, 10, iters); if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "double")) run_benchmark("group_double_var", bench_group_double_var, bench_setup, NULL, &data, 10, iters*10); diff --git a/src/field.h b/src/field.h index 55679a2fc1..1f2a5af344 100644 --- a/src/field.h +++ b/src/field.h @@ -124,4 +124,7 @@ static void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_f /** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. Both *r and *a must be initialized.*/ static void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag); +/** Compute the Jacobi symbol of a / p. 0 if a=0; 1 if a square; -1 if a non-square. */ +static int secp256k1_fe_jacobi_var(const secp256k1_fe *a); + #endif /* SECP256K1_FIELD_H */ diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h index 4363e727e7..83c127c913 100644 --- a/src/field_10x26_impl.h +++ b/src/field_10x26_impl.h @@ -1255,4 +1255,32 @@ static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) { VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp)); } +static int secp256k1_fe_jacobi_var(const secp256k1_fe *x) { + secp256k1_fe tmp; + secp256k1_modinv32_signed30 s; + int ret; + + tmp = *x; + secp256k1_fe_normalize_var(&tmp); + secp256k1_fe_to_signed30(&s, &tmp); + ret = secp256k1_jacobi32_maybe_var(&s, &secp256k1_const_modinfo_fe); + if (ret == -2) { + /* secp256k1_jacobi32_maybe_var failed to compute the Jacobi symbol. Fall back + * to computing a square root. This should be extremely rare with random + * input. */ + secp256k1_fe dummy; + ret = 2*secp256k1_fe_sqrt(&dummy, &tmp) - 1; +#ifdef VERIFY + } else { + secp256k1_fe dummy; + if (secp256k1_fe_is_zero(&tmp)) { + VERIFY_CHECK(ret == 0); + } else { + VERIFY_CHECK(ret == 2*secp256k1_fe_sqrt(&dummy, &tmp) - 1); + } +#endif + } + return ret; +} + #endif /* SECP256K1_FIELD_REPR_IMPL_H */ diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h index b56bdd1353..5dc2cae257 100644 --- a/src/field_5x52_impl.h +++ b/src/field_5x52_impl.h @@ -577,4 +577,32 @@ static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) { #endif } +static int secp256k1_fe_jacobi_var(const secp256k1_fe *x) { + secp256k1_fe tmp; + secp256k1_modinv64_signed62 s; + int ret; + + tmp = *x; + secp256k1_fe_normalize_var(&tmp); + secp256k1_fe_to_signed62(&s, &tmp); + ret = secp256k1_jacobi64_maybe_var(&s, &secp256k1_const_modinfo_fe); + if (ret == -2) { + /* secp256k1_jacobi64_maybe_var failed to compute the Jacobi symbol. Fall back + * to computing a square root. This should be extremely rare with random + * input. */ + secp256k1_fe dummy; + ret = 2*secp256k1_fe_sqrt(&dummy, &tmp) - 1; +#ifdef VERIFY + } else { + secp256k1_fe dummy; + if (secp256k1_fe_is_zero(&tmp)) { + VERIFY_CHECK(ret == 0); + } else { + VERIFY_CHECK(ret == 2*secp256k1_fe_sqrt(&dummy, &tmp) - 1); + } +#endif + } + return ret; +} + #endif /* SECP256K1_FIELD_REPR_IMPL_H */ diff --git a/src/modinv32.h b/src/modinv32.h index 0efdda9ab5..593b9019e5 100644 --- a/src/modinv32.h +++ b/src/modinv32.h @@ -39,4 +39,7 @@ static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256 /* Same as secp256k1_modinv32_var, but constant time in x (not in the modulus). */ static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo); +/* Compute the Jacobi symbol for x (which must be normalized). Returns -2 if the result cannot be computed. */ +static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo); + #endif /* SECP256K1_MODINV32_H */ diff --git a/src/modinv32_impl.h b/src/modinv32_impl.h index 661c5fc04c..3acd4de146 100644 --- a/src/modinv32_impl.h +++ b/src/modinv32_impl.h @@ -232,6 +232,21 @@ static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_ return zeta; } +/* inv256[i] = -(2*i+1)^-1 (mod 256) */ +static const uint8_t secp256k1_modinv32_inv256[128] = { + 0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59, + 0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31, + 0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89, + 0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61, + 0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9, + 0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91, + 0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9, + 0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1, + 0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19, + 0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1, + 0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01 +}; + /* Compute the transition matrix and eta for 30 divsteps (variable time). * * Input: eta: initial eta @@ -243,21 +258,6 @@ static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_ * Implements the divsteps_n_matrix_var function from the explanation. */ static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) { - /* inv256[i] = -(2*i+1)^-1 (mod 256) */ - static const uint8_t inv256[128] = { - 0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59, - 0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31, - 0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89, - 0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61, - 0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9, - 0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91, - 0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9, - 0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1, - 0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19, - 0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1, - 0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01 - }; - /* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */ uint32_t u = 1, v = 0, q = 0, r = 1; uint32_t f = f0, g = g0, m; @@ -297,7 +297,7 @@ static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint VERIFY_CHECK(limit > 0 && limit <= 30); m = (UINT32_MAX >> (32 - limit)) & 255U; /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */ - w = (g * inv256[(f >> 1) & 127]) & m; + w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m; /* Do so. */ g += f * w; q += u * w; @@ -317,6 +317,83 @@ static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint return eta; } +/* Compute the transition matrix and eta for 30 posdivsteps (variable time, eta=-delta), and keeps track + * of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^32 rather than 2^30, because + * Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2). + * + * Input: eta: initial eta + * f0: bottom limb of initial f + * g0: bottom limb of initial g + * Output: t: transition matrix + * Return: final eta + */ +static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp) { + /* Transformation matrix. */ + uint32_t u = 1, v = 0, q = 0, r = 1; + uint32_t f = f0, g = g0, m; + uint16_t w; + int i = 30, limit, zeros; + int jac = *jacp; + + for (;;) { + /* Use a sentinel bit to count zeros only up to i. */ + zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i)); + /* Perform zeros divsteps at once; they all just divide g by two. */ + g >>= zeros; + u <<= zeros; + v <<= zeros; + eta -= zeros; + i -= zeros; + /* Update the bottom bit of jac: when dividing g by an odd power of 2, + * if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */ + jac ^= (zeros & ((f >> 1) ^ (f >> 2))); + /* We're done once we've done 30 posdivsteps. */ + if (i == 0) break; + VERIFY_CHECK((f & 1) == 1); + VERIFY_CHECK((g & 1) == 1); + VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i)); + VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i)); + /* If eta is negative, negate it and replace f,g with g,f. */ + if (eta < 0) { + uint32_t tmp; + eta = -eta; + /* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign + * if both f and g are 3 mod 4. */ + jac ^= ((f & g) >> 1); + tmp = f; f = g; g = tmp; + tmp = u; u = q; q = tmp; + tmp = v; v = r; r = tmp; + } + /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more + * than i can be cancelled out (as we'd be done before that point), and no more than eta+1 + * can be done as its sign will flip once that happens. */ + limit = ((int)eta + 1) > i ? i : ((int)eta + 1); + /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */ + VERIFY_CHECK(limit > 0 && limit <= 30); + m = (UINT32_MAX >> (32 - limit)) & 255U; + /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */ + w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m; + /* Do so. */ + g += f * w; + q += u * w; + r += v * w; + VERIFY_CHECK((g & m) == 0); + } + /* Return data in t and return value. */ + t->u = (int32_t)u; + t->v = (int32_t)v; + t->q = (int32_t)q; + t->r = (int32_t)r; + /* The determinant of t must be a power of two. This guarantees that multiplication with t + * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which + * will be divided out again). As each divstep's individual matrix has determinant 2 or -2, + * the aggregate of 30 of them will have determinant 2^30 or -2^30. */ + VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30 || + (int64_t)t->u * t->r - (int64_t)t->v * t->q == -(((int64_t)1) << 30)); + *jacp = jac; + return eta; +} + /* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps. * * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range @@ -584,4 +661,69 @@ static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256 *x = d; } +/* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */ +static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) { + /* Start with f=modulus, g=x, eta=-1. */ + secp256k1_modinv32_signed30 f = modinfo->modulus; + secp256k1_modinv32_signed30 g = *x; + int j, len = 9; + int32_t eta = -1; /* eta = -delta; delta is initially 1 */ + int32_t cond, fn, gn; + int jac = 0; + int count; + + VERIFY_CHECK(g.v[0] >= 0 && g.v[1] >= 0 && g.v[2] >= 0 && g.v[3] >= 0 && g.v[4] >= 0 && g.v[5] >= 0 && g.v[6] >= 0 && g.v[7] >= 0 && g.v[8] >= 0); + + /* The loop below does not converge for input g=0. Deal with this case specifically. */ + if (!(g.v[0] | g.v[1] | g.v[2] | g.v[3] | g.v[4] | g.v[5] | g.v[6] | g.v[7] | g.v[8])) return 0; + + /* Do up to 50 iterations of 30 posdivsteps (up to 1500 steps; more is extremely rare) each until f=1. + * In VERIFY mode use a lower number of iterations (750, close to the median 756), so failure actually occurs. */ +#ifdef VERIFY + for (count = 0; count < 25; ++count) { +#else + for (count = 0; count < 50; ++count) { +#endif + /* Compute transition matrix and new eta after 30 posdivsteps. */ + secp256k1_modinv32_trans2x2 t; + eta = secp256k1_modinv32_posdivsteps_30_var(eta, f.v[0] | ((uint32_t)f.v[1] << 30), g.v[0] | ((uint32_t)g.v[1] << 30), &t, &jac); + /* Update f,g using that transition matrix. */ +#ifdef VERIFY + VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */ + VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */ + VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */ + VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */ +#endif + secp256k1_modinv32_update_fg_30_var(len, &f, &g, &t); + /* If the bottom limb of f is 1, there is a chance that f=1. */ + if (f.v[0] == 1) { + cond = 0; + /* Check if the other limbs are also 0. */ + for (j = 1; j < len; ++j) { + cond |= f.v[j]; + } + /* If so, we're done. */ + if (cond == 0) return 1 - 2*(jac & 1); + } + + /* Determine if len>1 and limb (len-1) of both f and g is 0. */ + fn = f.v[len - 1]; + gn = g.v[len - 1]; + cond = ((int32_t)len - 2) >> 31; + cond |= fn; + cond |= gn; + /* If so, reduce length. */ + if (cond == 0) --len; +#ifdef VERIFY + VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */ + VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */ + VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */ + VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */ +#endif + } + + /* The loop failed to converge to f=g after 1500 iterations. Return -2, indicating unknown result. */ + return -2; +} + #endif /* SECP256K1_MODINV32_IMPL_H */ diff --git a/src/modinv64.h b/src/modinv64.h index da506dfa9f..3a9868b0e5 100644 --- a/src/modinv64.h +++ b/src/modinv64.h @@ -43,4 +43,7 @@ static void secp256k1_modinv64_var(secp256k1_modinv64_signed62 *x, const secp256 /* Same as secp256k1_modinv64_var, but constant time in x (not in the modulus). */ static void secp256k1_modinv64(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo); +/* Compute the Jacobi symbol for x (which must be normalized). Returns -2 if the result cannot be computed. */ +static int secp256k1_jacobi64_maybe_var(const secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo); + #endif /* SECP256K1_MODINV64_H */ diff --git a/src/modinv64_impl.h b/src/modinv64_impl.h index 0743a9c821..f43b10b1e8 100644 --- a/src/modinv64_impl.h +++ b/src/modinv64_impl.h @@ -256,7 +256,7 @@ static int64_t secp256k1_modinv64_divsteps_62_var(int64_t eta, uint64_t f0, uint tmp = v; v = r; r = -tmp; /* Use a formula to cancel out up to 6 bits of g. Also, no more than i can be cancelled * out (as we'd be done before that point), and no more than eta+1 can be done as its - * will flip again once that happens. */ + * sign will flip again once that happens. */ limit = ((int)eta + 1) > i ? i : ((int)eta + 1); VERIFY_CHECK(limit > 0 && limit <= 62); /* m is a mask for the bottom min(limit, 6) bits. */ @@ -294,6 +294,94 @@ static int64_t secp256k1_modinv64_divsteps_62_var(int64_t eta, uint64_t f0, uint return eta; } +/* Compute the transition matrix and eta for 62 posdivsteps (variable time, eta=-delta), and keeps track + * of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^64 rather than 2^62, because + * Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2). + * + * Input: eta: initial eta + * f0: bottom limb of initial f + * g0: bottom limb of initial g + * Output: t: transition matrix + * Return: final eta + */ +static int64_t secp256k1_modinv64_posdivsteps_62_var(int64_t eta, uint64_t f0, uint64_t g0, secp256k1_modinv64_trans2x2 *t, int *jacp) { + /* Transformation matrix; see comments in secp256k1_modinv64_divsteps_62. */ + uint64_t u = 1, v = 0, q = 0, r = 1; + uint64_t f = f0, g = g0, m; + uint32_t w; + int i = 62, limit, zeros; + int jac = *jacp; + + for (;;) { + /* Use a sentinel bit to count zeros only up to i. */ + zeros = secp256k1_ctz64_var(g | (UINT64_MAX << i)); + /* Perform zeros divsteps at once; they all just divide g by two. */ + g >>= zeros; + u <<= zeros; + v <<= zeros; + eta -= zeros; + i -= zeros; + /* Update the bottom bit of jac: when dividing g by an odd power of 2, + * if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */ + jac ^= (zeros & ((f >> 1) ^ (f >> 2))); + /* We're done once we've done 62 posdivsteps. */ + if (i == 0) break; + VERIFY_CHECK((f & 1) == 1); + VERIFY_CHECK((g & 1) == 1); + VERIFY_CHECK((u * f0 + v * g0) == f << (62 - i)); + VERIFY_CHECK((q * f0 + r * g0) == g << (62 - i)); + /* If eta is negative, negate it and replace f,g with g,f. */ + if (eta < 0) { + uint64_t tmp; + eta = -eta; + tmp = f; f = g; g = tmp; + tmp = u; u = q; q = tmp; + tmp = v; v = r; r = tmp; + /* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign + * if both f and g are 3 mod 4. */ + jac ^= ((f & g) >> 1); + /* Use a formula to cancel out up to 6 bits of g. Also, no more than i can be cancelled + * out (as we'd be done before that point), and no more than eta+1 can be done as its + * sign will flip again once that happens. */ + limit = ((int)eta + 1) > i ? i : ((int)eta + 1); + VERIFY_CHECK(limit > 0 && limit <= 62); + /* m is a mask for the bottom min(limit, 6) bits. */ + m = (UINT64_MAX >> (64 - limit)) & 63U; + /* Find what multiple of f must be added to g to cancel its bottom min(limit, 6) + * bits. */ + w = (f * g * (f * f - 2)) & m; + } else { + /* In this branch, use a simpler formula that only lets us cancel up to 4 bits of g, as + * eta tends to be smaller here. */ + limit = ((int)eta + 1) > i ? i : ((int)eta + 1); + VERIFY_CHECK(limit > 0 && limit <= 62); + /* m is a mask for the bottom min(limit, 4) bits. */ + m = (UINT64_MAX >> (64 - limit)) & 15U; + /* Find what multiple of f must be added to g to cancel its bottom min(limit, 4) + * bits. */ + w = f + (((f + 1) & 4) << 1); + w = (-w * g) & m; + } + g += f * w; + q += u * w; + r += v * w; + VERIFY_CHECK((g & m) == 0); + } + /* Return data in t and return value. */ + t->u = (int64_t)u; + t->v = (int64_t)v; + t->q = (int64_t)q; + t->r = (int64_t)r; + /* The determinant of t must be a power of two. This guarantees that multiplication with t + * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which + * will be divided out again). As each divstep's individual matrix has determinant 2 or -2, + * the aggregate of 62 of them will have determinant 2^62 or -2^62. */ + VERIFY_CHECK((int128_t)t->u * t->r - (int128_t)t->v * t->q == ((int128_t)1) << 62 || + (int128_t)t->u * t->r - (int128_t)t->v * t->q == -(((int128_t)1) << 62)); + *jacp = jac; + return eta; +} + /* Compute (t/2^62) * [d, e] mod modulus, where t is a transition matrix scaled by 2^62. * * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range @@ -590,4 +678,69 @@ static void secp256k1_modinv64_var(secp256k1_modinv64_signed62 *x, const secp256 *x = d; } +/* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */ +static int secp256k1_jacobi64_maybe_var(const secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo) { + /* Start with f=modulus, g=x, eta=-1. */ + secp256k1_modinv64_signed62 f = modinfo->modulus; + secp256k1_modinv64_signed62 g = *x; + int j, len = 5; + int64_t eta = -1; /* eta = -delta; delta is initially 1 */ + int64_t cond, fn, gn; + int jac = 0; + int count; + + VERIFY_CHECK(g.v[0] >= 0 && g.v[1] >= 0 && g.v[2] >= 0 && g.v[3] >= 0 && g.v[4] >= 0); + + /* The loop below does not converge for input g=0. Deal with this case specifically. */ + if (!(g.v[0] | g.v[1] | g.v[2] | g.v[3] | g.v[4])) return 0; + + /* Do up to 25 iterations of 62 posdivsteps (up to 1550 steps; more is extremely rare) each until f=1. + * In VERIFY mode use a lower number of iterations (744, close to the median 756), so failure actually occurs. */ +#ifdef VERIFY + for (count = 0; count < 12; ++count) { +#else + for (count = 0; count < 25; ++count) { +#endif + /* Compute transition matrix and new eta after 62 posdivsteps. */ + secp256k1_modinv64_trans2x2 t; + eta = secp256k1_modinv64_posdivsteps_62_var(eta, f.v[0] | ((uint64_t)f.v[1] << 62), g.v[0] | ((uint64_t)g.v[1] << 62), &t, &jac); + /* Update f,g using that transition matrix. */ +#ifdef VERIFY + VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */ + VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */ + VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */ + VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */ +#endif + secp256k1_modinv64_update_fg_62_var(len, &f, &g, &t); + /* If the bottom limb of f is 1, there is a chance that f=1. */ + if (f.v[0] == 1) { + cond = 0; + /* Check if the other limbs are also 0. */ + for (j = 1; j < len; ++j) { + cond |= f.v[j]; + } + /* If so, we're done. */ + if (cond == 0) return 1 - 2*(jac & 1); + } + + /* Determine if len>1 and limb (len-1) of both f and g is 0. */ + fn = f.v[len - 1]; + gn = g.v[len - 1]; + cond = ((int64_t)len - 2) >> 63; + cond |= fn; + cond |= gn; + /* If so, reduce length. */ + if (cond == 0) --len; +#ifdef VERIFY + VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */ + VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */ + VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */ + VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */ +#endif + } + + /* The loop failed to converge to f=g after 1550 iterations. Return -2, indicating unknown result. */ + return -2; +} + #endif /* SECP256K1_MODINV64_IMPL_H */ diff --git a/src/tests.c b/src/tests.c index ebeef510f0..185c2d1d45 100644 --- a/src/tests.c +++ b/src/tests.c @@ -801,12 +801,32 @@ void test_modinv32_uint16(uint16_t* out, const uint16_t* in, const uint16_t* mod uint16_to_signed30(&x, in); nonzero = (x.v[0] | x.v[1] | x.v[2] | x.v[3] | x.v[4] | x.v[5] | x.v[6] | x.v[7] | x.v[8]) != 0; uint16_to_signed30(&m.modulus, mod); - mutate_sign_signed30(&m.modulus); /* compute 1/modulus mod 2^30 */ m.modulus_inv30 = modinv2p64(m.modulus.v[0]) & 0x3fffffff; CHECK(((m.modulus_inv30 * m.modulus.v[0]) & 0x3fffffff) == 1); + /* Test secp256k1_jacobi32_maybe_var. */ + { + int jac; + uint16_t sqr[16], negone[16]; + mulmod256(sqr, in, in, mod); + uint16_to_signed30(&x, sqr); + /* Compute jacobi symbol of in^2, which must be 0 or 1 (or uncomputable). */ + jac = secp256k1_jacobi32_maybe_var(&x, &m); + CHECK(jac == -2 || jac == nonzero); + /* Then compute the jacobi symbol of -(in^2). x and -x have opposite + * jacobi symbols if and only if (mod % 4) == 3. */ + negone[0] = mod[0] - 1; + for (i = 1; i < 16; ++i) negone[i] = mod[i]; + mulmod256(sqr, sqr, negone, mod); + uint16_to_signed30(&x, sqr); + jac = secp256k1_jacobi32_maybe_var(&x, &m); + CHECK(jac == -2 || jac == (1 - (mod[0] & 2)) * nonzero); + } + + uint16_to_signed30(&x, in); + mutate_sign_signed30(&m.modulus); for (vartime = 0; vartime < 2; ++vartime) { /* compute inverse */ (vartime ? secp256k1_modinv32_var : secp256k1_modinv32)(&x, &m); @@ -874,12 +894,32 @@ void test_modinv64_uint16(uint16_t* out, const uint16_t* in, const uint16_t* mod uint16_to_signed62(&x, in); nonzero = (x.v[0] | x.v[1] | x.v[2] | x.v[3] | x.v[4]) != 0; uint16_to_signed62(&m.modulus, mod); - mutate_sign_signed62(&m.modulus); /* compute 1/modulus mod 2^62 */ m.modulus_inv62 = modinv2p64(m.modulus.v[0]) & M62; CHECK(((m.modulus_inv62 * m.modulus.v[0]) & M62) == 1); + /* Test secp256k1_jacobi64_maybe_var. */ + { + int jac; + uint16_t sqr[16], negone[16]; + mulmod256(sqr, in, in, mod); + uint16_to_signed62(&x, sqr); + /* Compute jacobi symbol of in^2, which must be 0 or 1 (or uncomputable). */ + jac = secp256k1_jacobi64_maybe_var(&x, &m); + CHECK(jac == -2 || jac == nonzero); + /* Then compute the jacobi symbol of -(in^2). x and -x have opposite + * jacobi symbols if and only if (mod % 4) == 3. */ + negone[0] = mod[0] - 1; + for (i = 1; i < 16; ++i) negone[i] = mod[i]; + mulmod256(sqr, sqr, negone, mod); + uint16_to_signed62(&x, sqr); + jac = secp256k1_jacobi64_maybe_var(&x, &m); + CHECK(jac == -2 || jac == (1 - (mod[0] & 2)) * nonzero); + } + + uint16_to_signed62(&x, in); + mutate_sign_signed62(&m.modulus); for (vartime = 0; vartime < 2; ++vartime) { /* compute inverse */ (vartime ? secp256k1_modinv64_var : secp256k1_modinv64)(&x, &m); @@ -2649,8 +2689,10 @@ void run_sqrt(void) { for (j = 0; j < count; j++) { random_fe(&x); secp256k1_fe_sqr(&s, &x); + CHECK(secp256k1_fe_jacobi_var(&s) == 1); test_sqrt(&s, &x); secp256k1_fe_negate(&t, &s, 1); + CHECK(secp256k1_fe_jacobi_var(&t) == -1); test_sqrt(&t, NULL); secp256k1_fe_mul(&t, &s, &ns); test_sqrt(&t, NULL);