Skip to content

Commit

Permalink
Add specialized reduction for P-384
Browse files Browse the repository at this point in the history
This is about 20-30% faster on both 32 and 64 bit systems
  • Loading branch information
randombit committed Jun 24, 2024
1 parent f4b0220 commit dd0b2b1
Showing 1 changed file with 113 additions and 1 deletion.
114 changes: 113 additions & 1 deletion src/lib/math/pcurves/pcurves_secp384r1/pcurves_secp384r1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,124 @@

#include <botan/internal/pcurves_instance.h>

#include <botan/internal/pcurves_nist.h>
#include <botan/internal/pcurves_wrap.h>

namespace Botan::PCurve {

namespace {

template <typename Params>
class Secp384r1Rep final {
public:
static constexpr auto P = Params::P;
static constexpr size_t N = Params::N;
typedef typename Params::W W;

constexpr static std::array<W, N> redc(const std::array<W, 2 * N>& z) {
const int64_t X00 = get_uint32(z.data(), 0);
const int64_t X01 = get_uint32(z.data(), 1);
const int64_t X02 = get_uint32(z.data(), 2);
const int64_t X03 = get_uint32(z.data(), 3);
const int64_t X04 = get_uint32(z.data(), 4);
const int64_t X05 = get_uint32(z.data(), 5);
const int64_t X06 = get_uint32(z.data(), 6);
const int64_t X07 = get_uint32(z.data(), 7);
const int64_t X08 = get_uint32(z.data(), 8);
const int64_t X09 = get_uint32(z.data(), 9);
const int64_t X10 = get_uint32(z.data(), 10);
const int64_t X11 = get_uint32(z.data(), 11);
const int64_t X12 = get_uint32(z.data(), 12);
const int64_t X13 = get_uint32(z.data(), 13);
const int64_t X14 = get_uint32(z.data(), 14);
const int64_t X15 = get_uint32(z.data(), 15);
const int64_t X16 = get_uint32(z.data(), 16);
const int64_t X17 = get_uint32(z.data(), 17);
const int64_t X18 = get_uint32(z.data(), 18);
const int64_t X19 = get_uint32(z.data(), 19);
const int64_t X20 = get_uint32(z.data(), 20);
const int64_t X21 = get_uint32(z.data(), 21);
const int64_t X22 = get_uint32(z.data(), 22);
const int64_t X23 = get_uint32(z.data(), 23);

// One copy of P-384 is added to prevent underflow
const int64_t S0 = 0xFFFFFFFF + X00 + X12 + X20 + X21 - X23;
const int64_t S1 = 0x00000000 + X01 + X13 + X22 + X23 - X12 - X20;
const int64_t S2 = 0x00000000 + X02 + X14 + X23 - X13 - X21;
const int64_t S3 = 0xFFFFFFFF + X03 + X12 + X15 + X20 + X21 - X14 - X22 - X23;
const int64_t S4 = 0xFFFFFFFE + X04 + X12 + X13 + X16 + X20 + X21 * 2 + X22 - X15 - X23 * 2;
const int64_t S5 = 0xFFFFFFFF + X05 + X13 + X14 + X17 + X21 + X22 * 2 + X23 - X16;
const int64_t S6 = 0xFFFFFFFF + X06 + X14 + X15 + X18 + X22 + X23 * 2 - X17;
const int64_t S7 = 0xFFFFFFFF + X07 + X15 + X16 + X19 + X23 - X18;
const int64_t S8 = 0xFFFFFFFF + X08 + X16 + X17 + X20 - X19;
const int64_t S9 = 0xFFFFFFFF + X09 + X17 + X18 + X21 - X20;
const int64_t SA = 0xFFFFFFFF + X10 + X18 + X19 + X22 - X21;
const int64_t SB = 0xFFFFFFFF + X11 + X19 + X20 + X23 - X22;

std::array<W, N> r = {};

SumAccum sum(r);

sum.accum(S0);
sum.accum(S1);
sum.accum(S2);
sum.accum(S3);
sum.accum(S4);
sum.accum(S5);
sum.accum(S6);
sum.accum(S7);
sum.accum(S8);
sum.accum(S9);
sum.accum(SA);
sum.accum(SB);
const auto S = sum.final_carry(0);

CT::unpoison(S);
BOTAN_ASSERT(S <= 4, "Expected overflow");

const auto correction = p384_mul_mod_384(S);
W borrow = bigint_sub2(r.data(), N, correction.data(), N);

bigint_cnd_add(borrow, r.data(), N, P.data(), N);

return r;
}

constexpr static std::array<W, N> one() { return std::array<W, N>{1}; }

constexpr static std::array<W, N> to_rep(const std::array<W, N>& x) { return x; }

constexpr static std::array<W, N> wide_to_rep(const std::array<W, 2 * N>& x) { return redc(x); }

constexpr static std::array<W, N> from_rep(const std::array<W, N>& z) { return z; }

private:
// Return (i*P-384) % 2**384
//
// Assumes i is small
constexpr static std::array<W, N> p384_mul_mod_384(W i) {
static_assert(WordInfo<W>::bits == 32 || WordInfo<W>::bits == 64);

// For small i, multiples of P-384 have a simple structure so it's faster to
// compute the value directly vs a (constant time) table lookup

auto r = P;
if constexpr(WordInfo<W>::bits == 32) {
r[4] -= i;
r[3] -= i;
r[1] += i;
r[0] -= i;
} else {
const uint64_t i32 = static_cast<uint64_t>(i) << 32;
r[2] -= i;
r[1] -= i32;
r[0] += i32;
r[0] -= i;
}
return r;
}
};

// clang-format off
namespace secp384r1 {

Expand All @@ -25,7 +137,7 @@ class Params final : public EllipticCurveParameters<
-12> {
};

class Curve final : public EllipticCurve<Params> {};
class Curve final : public EllipticCurve<Params, Secp384r1Rep> {};

}

Expand Down

0 comments on commit dd0b2b1

Please sign in to comment.