Skip to content

Commit

Permalink
Faster iterated point doubling for generic A and A == 0 cases
Browse files Browse the repository at this point in the history
Improves ECDSA verify and ECDH agreement; on my machine about 11% for
Brainpool curves, and 8% for secp256k1.
  • Loading branch information
randombit committed Jul 25, 2024
1 parent f2abbaf commit 6e2ddc7
Showing 1 changed file with 50 additions and 24 deletions.
74 changes: 50 additions & 24 deletions src/lib/math/pcurves/pcurves_impl/pcurves_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,7 @@ class ProjectiveCurvePoint {
/*
https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-1998-cmo-2
12M + 4S + 6add + 1*2
Cost: 8M + 3S + 6add + 1*2
*/

const auto Z1Z1 = a.z().square();
Expand Down Expand Up @@ -671,6 +671,8 @@ class ProjectiveCurvePoint {

/*
https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-1998-cmo-2
Cost: 12M + 4S + 6add + 1*2
*/

const auto Z1Z1 = a.z().square();
Expand Down Expand Up @@ -715,30 +717,60 @@ class ProjectiveCurvePoint {
}

constexpr Self dbl_n(size_t n) const {
if constexpr(Self::A_is_minus_3) {
/*
Repeated doubling using an adaptation of Algorithm 3.23 in
"Guide To Elliptic Curve Cryptography"
Hankerson, Menezes, Vanstone
/*
Repeated doubling using an adaptation of Algorithm 3.23 in
"Guide To Elliptic Curve Cryptography"
Hankerson, Menezes, Vanstone
For A == -3
Cost: 2S + 1*2 + n*(4S + 4M + 2*2 + 1*3 + 4A)
Naive doubling
Cost: n*(4S + 4M + 2*2 + 1*3 + 5A + 1*4 + 1*8)
For A == -3
Cost: 2S + 1*2 + 1half + n*(4M + 4S + 4A + 2*2 + 1*3)
Naive: n*(4M + 4S + 5A + 2*2 + 1*3 + 1*4 + 1*8)
TODO adapt this for A == 0 and/or generic A cases
*/
For generic A:
Cost: 2S + 1M + 1*2 + 1half + n*(4M + 4S + 4A + 2*2 + 1*3)
Naive: n*(4M + 6S + 4A + 2*2 + 1*3 + 1*4 + 1*8)
*/

if constexpr(Self::A_is_zero) {
// For A == 0 the savings are minimal because the main point
// is keeping A*Z^4 alive across the loop, but here that value
// is always zero anyway. However using 2*y in the loop saves
// several small constant muls which provides some improvement

auto nx = x();
auto ny = y();
auto ny = y().mul2();
auto nz = z();

while(n > 0) {
const auto ny2 = ny.square();
const auto ny4 = ny2.square();
const auto t1 = nx.square().mul3();
const auto t2 = nx * ny2;
nx = t1.square() - t2.mul2();
nz *= ny;
ny = t1 * (t2 - nx).mul2() - ny4;
n--;
}
return Self(nx, ny.div2(), nz);
} else {
auto nx = x();
auto ny = y().mul2();
auto nz = z();
ny = ny.mul2();
auto w = nz.square().square();

if constexpr(!Self::A_is_minus_3) {
w *= A;
}

while(n > 0) {
const auto ny2 = ny.square();
const auto ny4 = ny2.square();
const auto t1 = (nx.square() - w).mul3();
FieldElement t1;
if constexpr(Self::A_is_minus_3) {
t1 = (nx.square() - w).mul3();
} else {
t1 = nx.square().mul3() + w;
}
const auto t2 = nx * ny2;
nx = t1.square() - t2.mul2();
nz *= ny;
Expand All @@ -748,19 +780,13 @@ class ProjectiveCurvePoint {
w *= ny4;
}
}
ny = ny.div2();
return Self(nx, ny, nz);
} else {
Self pt = (*this);
for(size_t i = 0; i != n; ++i) {
pt = pt.dbl();
}
return pt;
return Self(nx, ny.div2(), nz);
}
}

constexpr Self dbl() const {
//https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-1998-cmo-2
// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-1998-cmo-2
// Cost: 3M + 6S + 1*a + 4add + 2*2 + 1*3 + 1*4 + 1*8

FieldElement m = FieldElement::zero();

Expand Down

0 comments on commit 6e2ddc7

Please sign in to comment.