Skip to content

Commit

Permalink
Merge litecoin-project#486: Add pippenger_wnaf for multi-multiplication
Browse files Browse the repository at this point in the history
d2f9c6b Use more precise pippenger bucket windows (Jonas Nick)
4c950bb Save some additions per window in _pippenger_wnaf (Peter Dettman)
a58f543 Add flags for choosing algorithm in ecmult_multi benchmark (Jonas Nick)
36b22c9 Use scratch space dependent batching in ecmult_multi (Jonas Nick)
355a38f Add pippenger_wnaf ecmult_multi (Jonas Nick)
bc65aa7 Add bench_ecmult (Pieter Wuille)
dba5471 Add ecmult_multi tests (Andrew Poelstra)
8c1c831 Generalize Strauss to support multiple points (Pieter Wuille)
548de42 add resizeable scratch space API (Andrew Poelstra)

Pull request description:

  This PR is based on litecoin-project#473 and adds a variant of "Pippengers algorithm" (see [Bernstein et al., Faster batch forgery identification](https://eprint.iacr.org/2012/549.pdf), page 15 and scipr-lab/libff#10) for point multi-multiplication that performs better with a large number of points than Strauss' algorithm.

  ![aggsig](https://user-images.githubusercontent.com/2582071/32731185-12c0f108-c881-11e7-83c7-c2432b5fadf5.png)

  Thanks to @sipa for providing `wnaf_fixed`, benchmarking, and the crucial suggestion to use affine addition.

  The PR also makes `ecmult_multi` decide which algorithm to use, based on the number of points and the available scratch space.
  For restricted scratch spaces this can be further optimized in the future (f.e. a 35kB scratch space allows batches of 11 points with strauss or 95 points with pippenger; choosing pippenger would be 5% faster).

  As soon as this PR has received some feedback I'll repeat the benchmarks to determine the optimal `pippenger_bucket_window` with the new benchmarking code in litecoin-project#473.

Tree-SHA512: 8e155107a00d35f412300275803f912b1d228b7adff578bc4754c5b29641100b51b9d37f989316b636f7144e6b199febe7de302a44f498bbfd8d463bdbe31a5c
  • Loading branch information
sipa committed Dec 8, 2017
2 parents 6ad5cdb + d2f9c6b commit c77fc08
Show file tree
Hide file tree
Showing 16 changed files with 1,601 additions and 83 deletions.
8 changes: 7 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ noinst_HEADERS += src/field_5x52_asm_impl.h
noinst_HEADERS += src/java/org_bitcoin_NativeSecp256k1.h
noinst_HEADERS += src/java/org_bitcoin_Secp256k1Context.h
noinst_HEADERS += src/util.h
noinst_HEADERS += src/scratch.h
noinst_HEADERS += src/scratch_impl.h
noinst_HEADERS += src/testrand.h
noinst_HEADERS += src/testrand_impl.h
noinst_HEADERS += src/hash.h
Expand Down Expand Up @@ -79,14 +81,17 @@ libsecp256k1_jni_la_CPPFLAGS = -DSECP256K1_BUILD $(JNI_INCLUDES)

noinst_PROGRAMS =
if USE_BENCHMARK
noinst_PROGRAMS += bench_verify bench_sign bench_internal
noinst_PROGRAMS += bench_verify bench_sign bench_internal bench_ecmult
bench_verify_SOURCES = src/bench_verify.c
bench_verify_LDADD = libsecp256k1.la $(SECP_LIBS) $(SECP_TEST_LIBS) $(COMMON_LIB)
bench_sign_SOURCES = src/bench_sign.c
bench_sign_LDADD = libsecp256k1.la $(SECP_LIBS) $(SECP_TEST_LIBS) $(COMMON_LIB)
bench_internal_SOURCES = src/bench_internal.c
bench_internal_LDADD = $(SECP_LIBS) $(COMMON_LIB)
bench_internal_CPPFLAGS = -DSECP256K1_BUILD $(SECP_INCLUDES)
bench_ecmult_SOURCES = src/bench_ecmult.c
bench_ecmult_LDADD = $(SECP_LIBS) $(COMMON_LIB)
bench_ecmult_CPPFLAGS = -DSECP256K1_BUILD $(SECP_INCLUDES)
endif

TESTS =
Expand Down Expand Up @@ -159,6 +164,7 @@ $(gen_context_BIN): $(gen_context_OBJECTS)
$(libsecp256k1_la_OBJECTS): src/ecmult_static_context.h
$(tests_OBJECTS): src/ecmult_static_context.h
$(bench_internal_OBJECTS): src/ecmult_static_context.h
$(bench_ecmult_OBJECTS): src/ecmult_static_context.h

src/ecmult_static_context.h: $(gen_context_BIN)
./$(gen_context_BIN)
Expand Down
35 changes: 35 additions & 0 deletions include/secp256k1.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,19 @@ extern "C" {
*/
typedef struct secp256k1_context_struct secp256k1_context;

/** Opaque data structure that holds rewriteable "scratch space"
*
* The purpose of this structure is to replace dynamic memory allocations,
* because we target architectures where this may not be available. It is
* essentially a resizable (within specified parameters) block of bytes,
* which is initially created either by memory allocation or TODO as a pointer
* into some fixed rewritable space.
*
* Unlike the context object, this cannot safely be shared between threads
* without additional synchronization logic.
*/
typedef struct secp256k1_scratch_space_struct secp256k1_scratch_space;

/** Opaque data structure that holds a parsed and valid public key.
*
* The exact representation of data inside is implementation defined and not
Expand Down Expand Up @@ -243,6 +256,28 @@ SECP256K1_API void secp256k1_context_set_error_callback(
const void* data
) SECP256K1_ARG_NONNULL(1);

/** Create a secp256k1 scratch space object.
*
* Returns: a newly created scratch space.
* Args: ctx: an existing context object (cannot be NULL)
* In: init_size: initial amount of memory to allocate
* max_size: maximum amount of memory to allocate
*/
SECP256K1_API SECP256K1_WARN_UNUSED_RESULT secp256k1_scratch_space* secp256k1_scratch_space_create(
const secp256k1_context* ctx,
size_t init_size,
size_t max_size
) SECP256K1_ARG_NONNULL(1);

/** Destroy a secp256k1 scratch space.
*
* The pointer may not be used afterwards.
* Args: scratch: space to destroy
*/
SECP256K1_API void secp256k1_scratch_space_destroy(
secp256k1_scratch_space* scratch
);

/** Parse a variable-length public key into the pubkey object.
*
* Returns: 1 if the public key was fully valid.
Expand Down
16 changes: 16 additions & 0 deletions src/bench.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#define SECP256K1_BENCH_H

#include <stdio.h>
#include <string.h>
#include <math.h>
#include "sys/time.h"

Expand Down Expand Up @@ -63,4 +64,19 @@ void run_benchmark(char *name, void (*benchmark)(void*), void (*setup)(void*), v
printf("us\n");
}

int have_flag(int argc, char** argv, char *flag) {
char** argm = argv + argc;
argv++;
if (argv == argm) {
return 1;
}
while (argv != NULL && argv != argm) {
if (strcmp(*argv, flag) == 0) {
return 1;
}
argv++;
}
return 0;
}

#endif /* SECP256K1_BENCH_H */
196 changes: 196 additions & 0 deletions src/bench_ecmult.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
/**********************************************************************
* Copyright (c) 2017 Pieter Wuille *
* Distributed under the MIT software license, see the accompanying *
* file COPYING or http://www.opensource.org/licenses/mit-license.php.*
**********************************************************************/
#include <stdio.h>

#include "include/secp256k1.h"

#include "util.h"
#include "hash_impl.h"
#include "num_impl.h"
#include "field_impl.h"
#include "group_impl.h"
#include "scalar_impl.h"
#include "ecmult_impl.h"
#include "bench.h"
#include "secp256k1.c"

#define POINTS 32768
#define ITERS 10000

typedef struct {
/* Setup once in advance */
secp256k1_context* ctx;
secp256k1_scratch_space* scratch;
secp256k1_scalar* scalars;
secp256k1_ge* pubkeys;
secp256k1_scalar* seckeys;
secp256k1_gej* expected_output;
secp256k1_ecmult_multi_func ecmult_multi;

/* Changes per test */
size_t count;
int includes_g;

/* Changes per test iteration */
size_t offset1;
size_t offset2;

/* Test output. */
secp256k1_gej* output;
} bench_data;

static int bench_callback(secp256k1_scalar* sc, secp256k1_ge* ge, size_t idx, void* arg) {
bench_data* data = (bench_data*)arg;
if (data->includes_g) ++idx;
if (idx == 0) {
*sc = data->scalars[data->offset1];
*ge = secp256k1_ge_const_g;
} else {
*sc = data->scalars[(data->offset1 + idx) % POINTS];
*ge = data->pubkeys[(data->offset2 + idx - 1) % POINTS];
}
return 1;
}

static void bench_ecmult(void* arg) {
bench_data* data = (bench_data*)arg;

size_t count = data->count;
int includes_g = data->includes_g;
size_t iters = 1 + ITERS / count;
size_t iter;

for (iter = 0; iter < iters; ++iter) {
data->ecmult_multi(&data->ctx->ecmult_ctx, data->scratch, &data->output[iter], data->includes_g ? &data->scalars[data->offset1] : NULL, bench_callback, arg, count - includes_g);
data->offset1 = (data->offset1 + count) % POINTS;
data->offset2 = (data->offset2 + count - 1) % POINTS;
}
}

static void bench_ecmult_setup(void* arg) {
bench_data* data = (bench_data*)arg;
data->offset1 = (data->count * 0x537b7f6f + 0x8f66a481) % POINTS;
data->offset2 = (data->count * 0x7f6f537b + 0x6a1a8f49) % POINTS;
}

static void bench_ecmult_teardown(void* arg) {
bench_data* data = (bench_data*)arg;
size_t iters = 1 + ITERS / data->count;
size_t iter;
/* Verify the results in teardown, to avoid doing comparisons while benchmarking. */
for (iter = 0; iter < iters; ++iter) {
secp256k1_gej tmp;
secp256k1_gej_add_var(&tmp, &data->output[iter], &data->expected_output[iter], NULL);
CHECK(secp256k1_gej_is_infinity(&tmp));
}
}

static void generate_scalar(uint32_t num, secp256k1_scalar* scalar) {
secp256k1_sha256 sha256;
unsigned char c[11] = {'e', 'c', 'm', 'u', 'l', 't', 0, 0, 0, 0};
unsigned char buf[32];
int overflow = 0;
c[6] = num;
c[7] = num >> 8;
c[8] = num >> 16;
c[9] = num >> 24;
secp256k1_sha256_initialize(&sha256);
secp256k1_sha256_write(&sha256, c, sizeof(c));
secp256k1_sha256_finalize(&sha256, buf);
secp256k1_scalar_set_b32(scalar, buf, &overflow);
CHECK(!overflow);
}

static void run_test(bench_data* data, size_t count, int includes_g) {
char str[32];
static const secp256k1_scalar zero = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0);
size_t iters = 1 + ITERS / count;
size_t iter;

data->count = count;
data->includes_g = includes_g;

/* Compute (the negation of) the expected results directly. */
data->offset1 = (data->count * 0x537b7f6f + 0x8f66a481) % POINTS;
data->offset2 = (data->count * 0x7f6f537b + 0x6a1a8f49) % POINTS;
for (iter = 0; iter < iters; ++iter) {
secp256k1_scalar tmp;
secp256k1_scalar total = data->scalars[(data->offset1++) % POINTS];
size_t i = 0;
for (i = 0; i + 1 < count; ++i) {
secp256k1_scalar_mul(&tmp, &data->seckeys[(data->offset2++) % POINTS], &data->scalars[(data->offset1++) % POINTS]);
secp256k1_scalar_add(&total, &total, &tmp);
}
secp256k1_scalar_negate(&total, &total);
secp256k1_ecmult(&data->ctx->ecmult_ctx, &data->expected_output[iter], NULL, &zero, &total);
}

/* Run the benchmark. */
sprintf(str, includes_g ? "ecmult_%ig" : "ecmult_%i", (int)count);
run_benchmark(str, bench_ecmult, bench_ecmult_setup, bench_ecmult_teardown, data, 10, count * (1 + ITERS / count));
}

int main(int argc, char **argv) {
bench_data data;
int i, p;
secp256k1_gej* pubkeys_gej;
size_t scratch_size;

if (argc > 1) {
if(have_flag(argc, argv, "pippenger_wnaf")) {
printf("Using pippenger_wnaf:\n");
data.ecmult_multi = secp256k1_ecmult_pippenger_batch_single;
} else if(have_flag(argc, argv, "strauss_wnaf")) {
printf("Using strauss_wnaf:\n");
data.ecmult_multi = secp256k1_ecmult_strauss_batch_single;
}
} else {
data.ecmult_multi = secp256k1_ecmult_multi_var;
}

/* Allocate stuff */
data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY);
scratch_size = secp256k1_strauss_scratch_size(POINTS) + STRAUSS_SCRATCH_OBJECTS*16;
data.scratch = secp256k1_scratch_space_create(data.ctx, scratch_size, scratch_size);
data.scalars = malloc(sizeof(secp256k1_scalar) * POINTS);
data.seckeys = malloc(sizeof(secp256k1_scalar) * POINTS);
data.pubkeys = malloc(sizeof(secp256k1_ge) * POINTS);
data.expected_output = malloc(sizeof(secp256k1_gej) * (ITERS + 1));
data.output = malloc(sizeof(secp256k1_gej) * (ITERS + 1));

/* Generate a set of scalars, and private/public keypairs. */
pubkeys_gej = malloc(sizeof(secp256k1_gej) * POINTS);
secp256k1_gej_set_ge(&pubkeys_gej[0], &secp256k1_ge_const_g);
secp256k1_scalar_set_int(&data.seckeys[0], 1);
for (i = 0; i < POINTS; ++i) {
generate_scalar(i, &data.scalars[i]);
if (i) {
secp256k1_gej_double_var(&pubkeys_gej[i], &pubkeys_gej[i - 1], NULL);
secp256k1_scalar_add(&data.seckeys[i], &data.seckeys[i - 1], &data.seckeys[i - 1]);
}
}
secp256k1_ge_set_all_gej_var(data.pubkeys, pubkeys_gej, POINTS, &data.ctx->error_callback);
free(pubkeys_gej);

for (i = 1; i <= 8; ++i) {
run_test(&data, i, 1);
}

for (p = 0; p <= 11; ++p) {
for (i = 9; i <= 16; ++i) {
run_test(&data, i << p, 1);
}
}
secp256k1_context_destroy(data.ctx);
secp256k1_scratch_space_destroy(data.scratch);
free(data.scalars);
free(data.pubkeys);
free(data.seckeys);
free(data.output);
free(data.expected_output);

return(0);
}
15 changes: 0 additions & 15 deletions src/bench_internal.c
Original file line number Diff line number Diff line change
Expand Up @@ -324,21 +324,6 @@ void bench_num_jacobi(void* arg) {
}
#endif

int have_flag(int argc, char** argv, char *flag) {
char** argm = argv + argc;
argv++;
if (argv == argm) {
return 1;
}
while (argv != NULL && argv != argm) {
if (strcmp(*argv, flag) == 0) {
return 1;
}
argv++;
}
return 0;
}

int main(int argc, char **argv) {
bench_inv data;
if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "add")) run_benchmark("scalar_add", bench_scalar_add, bench_setup, NULL, &data, 10, 2000000);
Expand Down
18 changes: 17 additions & 1 deletion src/ecmult.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**********************************************************************
* Copyright (c) 2013, 2014 Pieter Wuille *
* Copyright (c) 2013, 2014, 2017 Pieter Wuille, Andrew Poelstra *
* Distributed under the MIT software license, see the accompanying *
* file COPYING or http://www.opensource.org/licenses/mit-license.php.*
**********************************************************************/
Expand All @@ -9,6 +9,8 @@

#include "num.h"
#include "group.h"
#include "scalar.h"
#include "scratch.h"

typedef struct {
/* For accelerating the computation of a*P + b*G: */
Expand All @@ -28,4 +30,18 @@ static int secp256k1_ecmult_context_is_built(const secp256k1_ecmult_context *ctx
/** Double multiply: R = na*A + ng*G */
static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng);

typedef int (secp256k1_ecmult_multi_callback)(secp256k1_scalar *sc, secp256k1_ge *pt, size_t idx, void *data);

/**
* Multi-multiply: R = inp_g_sc * G + sum_i ni * Ai.
* Chooses the right algorithm for a given number of points and scratch space
* size. Resets and overwrites the given scratch space. If the points do not
* fit in the scratch space the algorithm is repeatedly run with batches of
* points.
* Returns: 1 on success (including when inp_g_sc is NULL and n is 0)
* 0 if there is not enough scratch space for a single point or
* callback returns 0
*/
static int secp256k1_ecmult_multi_var(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n);

#endif /* SECP256K1_ECMULT_H */
7 changes: 0 additions & 7 deletions src/ecmult_const_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,6 @@
#include "ecmult_const.h"
#include "ecmult_impl.h"

#ifdef USE_ENDOMORPHISM
#define WNAF_BITS 128
#else
#define WNAF_BITS 256
#endif
#define WNAF_SIZE(w) ((WNAF_BITS + (w) - 1) / (w))

/* This is like `ECMULT_TABLE_GET_GE` but is constant time */
#define ECMULT_CONST_TABLE_GET_GE(r,pre,n,w) do { \
int m; \
Expand Down
Loading

0 comments on commit c77fc08

Please sign in to comment.