Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce the number of allocations in ECP operations #3512

Merged
131 changes: 81 additions & 50 deletions library/bignum.c
Original file line number Diff line number Diff line change
Expand Up @@ -1339,29 +1339,32 @@ int mbedtls_mpi_add_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi
/**
* Helper for mbedtls_mpi subtraction.
*
* Calculate d - s where d and s have the same size.
* Calculate l - r where l and r have the same size.
* This function operates modulo (2^ciL)^n and returns the carry
* (1 if there was a wraparound, i.e. if `d < s`, and 0 otherwise).
* (1 if there was a wraparound, i.e. if `l < r`, and 0 otherwise).
*
* \param n Number of limbs of \p d and \p s.
* \param[in,out] d On input, the left operand.
* On output, the result of the subtraction:
* \param[in] s The right operand.
* d may be aliased to l or r.
*
* \return 1 if `d < s`.
* 0 if `d >= s`.
* \param n Number of limbs of \p d, \p l and \p r.
* \param[out] d The result of the subtraction.
* \param[in] l The left operand.
* \param[in] r The right operand.
*
* \return 1 if `l < r`.
* 0 if `l >= r`.
*/
static mbedtls_mpi_uint mpi_sub_hlp( size_t n,
mbedtls_mpi_uint *d,
const mbedtls_mpi_uint *s )
const mbedtls_mpi_uint *l,
const mbedtls_mpi_uint *r )
{
size_t i;
mbedtls_mpi_uint c, z;
mbedtls_mpi_uint c = 0, t, z;

for( i = c = 0; i < n; i++, s++, d++ )
for( i = 0; i < n; i++ )
{
z = ( *d < c ); *d -= c;
c = ( *d < *s ) + z; *d -= *s;
z = ( l[i] < c ); t = l[i] - c;
c = ( t < r[i] ) + z; d[i] = t - r[i];
}

return( c );
Expand All @@ -1372,37 +1375,28 @@ static mbedtls_mpi_uint mpi_sub_hlp( size_t n,
*/
int mbedtls_mpi_sub_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B )
{
mbedtls_mpi TB;
int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
size_t n;
mbedtls_mpi_uint carry;
MPI_VALIDATE_RET( X != NULL );
MPI_VALIDATE_RET( A != NULL );
MPI_VALIDATE_RET( B != NULL );

mbedtls_mpi_init( &TB );

if( X == B )
{
MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, B ) );
B = &TB;
}

if( X != A )
MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, A ) );

/*
* X should always be positive as a result of unsigned subtractions.
*/
X->s = 1;

ret = 0;

for( n = B->n; n > 0; n-- )
if( B->p[n - 1] != 0 )
break;

carry = mpi_sub_hlp( n, X->p, B->p );
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, A->n ) );

/* Set the high limbs of X to match A. Don't touch the lower limbs
* because X might be aliased to B, and we must not overwrite the
* significant digits of B. */
if( A->n > n )
memcpy( X->p + n, A->p + n, ( A->n - n ) * ciL );
if( X->n > A->n )
memset( X->p + A->n, 0, ( X->n - A->n ) * ciL );

carry = mpi_sub_hlp( n, X->p, A->p, B->p );
if( carry != 0 )
{
/* Propagate the carry to the first nonzero limb of X. */
Expand All @@ -1418,10 +1412,10 @@ int mbedtls_mpi_sub_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi
--X->p[n];
}

cleanup:

mbedtls_mpi_free( &TB );
/* X should always be positive as a result of unsigned subtractions. */
X->s = 1;

cleanup:
return( ret );
}

Expand Down Expand Up @@ -1531,8 +1525,21 @@ int mbedtls_mpi_sub_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_sint
return( mbedtls_mpi_sub_mpi( X, A, &_B ) );
}

/*
* Helper for mbedtls_mpi multiplication
/** Helper for mbedtls_mpi multiplication.
*
* Add \p b * \p s to \p d.
*
* \param i The number of limbs of \p s.
* \param[in] s A bignum to multiply, of size \p i.
* It may overlap with \p d, but only if
* \p d <= \p s.
* Its leading limb must not be \c 0.
* \param[in,out] d The bignum to add to.
* It must be sufficiently large to store the
* result of the multiplication. This means
* \p i + 1 limbs if \p d[\p i - 1] started as 0 and \p b
* is not known a priori.
* \param b A scalar to multiply.
*/
static
#if defined(__APPLE__) && defined(__arm__)
Expand All @@ -1542,7 +1549,10 @@ static
*/
__attribute__ ((noinline))
#endif
void mpi_mul_hlp( size_t i, mbedtls_mpi_uint *s, mbedtls_mpi_uint *d, mbedtls_mpi_uint b )
void mpi_mul_hlp( size_t i,
const mbedtls_mpi_uint *s,
mbedtls_mpi_uint *d,
mbedtls_mpi_uint b )
{
mbedtls_mpi_uint c = 0, t = 0;

Expand Down Expand Up @@ -1597,10 +1607,10 @@ void mpi_mul_hlp( size_t i, mbedtls_mpi_uint *s, mbedtls_mpi_uint *d, mbedtls_mp

t++;

do {
while( c != 0 )
{
*d += c; c = ( *d < c ); d++;
}
while( c != 0 );
}

/*
Expand Down Expand Up @@ -1648,17 +1658,38 @@ int mbedtls_mpi_mul_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi
*/
int mbedtls_mpi_mul_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_uint b )
{
mbedtls_mpi _B;
mbedtls_mpi_uint p[1];
MPI_VALIDATE_RET( X != NULL );
MPI_VALIDATE_RET( A != NULL );

_B.s = 1;
_B.n = 1;
_B.p = p;
p[0] = b;
/* mpi_mul_hlp can't deal with a leading 0. */
size_t n = A->n;
while( n > 0 && A->p[n - 1] == 0 )
--n;

/* The general method below doesn't work if n==0 or b==0. By chance
* calculating the result is trivial in those cases. */
if( b == 0 || n == 0 )
{
mbedtls_mpi_lset( X, 0 );
return( 0 );
}

return( mbedtls_mpi_mul_mpi( X, A, &_B ) );
/* Calculate A*b as A + A*(b-1) to take advantage of mpi_mul_hlp */
int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
/* In general, A * b requires 1 limb more than b. If
* A->p[n - 1] * b / b == A->p[n - 1], then A * b fits in the same
* number of limbs as A and the call to grow() is not required since
* copy() will take care of the growth if needed. However, experimentally,
* making the call to grow() unconditional causes slightly fewer
* calls to calloc() in ECP code, presumably because it reuses the
* same mpi for a while and this way the mpi is more likely to directly
* grow to its final size. */
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, n + 1 ) );
MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, A ) );
mpi_mul_hlp( n, A->p, X->p, b - 1 );

cleanup:
return( ret );
}

/*
Expand Down Expand Up @@ -1799,7 +1830,7 @@ int mbedtls_mpi_div_mpi( mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A,

MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &Z, A->n + 2 ) );
MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &Z, 0 ) );
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T1, 2 ) );
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T1, A->n + 2 ) );

k = mbedtls_mpi_bitlen( &Y ) % biL;
if( k < biL - 1 )
Expand Down Expand Up @@ -2065,7 +2096,7 @@ static void mpi_montmul( mbedtls_mpi *A, const mbedtls_mpi *B, const mbedtls_mpi
* do the calculation without using conditional tests. */
/* Set d to d0 + (2^biL)^n - N where d0 is the current value of d. */
d[n] += 1;
d[n] -= mpi_sub_hlp( n, d, N->p );
d[n] -= mpi_sub_hlp( n, d, d, N->p );
/* If d0 < N then d < (2^biL)^n
* so d[n] == 0 and we want to keep A as it is.
* If d0 >= N then d >= (2^biL)^n, and d <= (2^biL)^n + N < 2 * (2^biL)^n
Expand Down
54 changes: 24 additions & 30 deletions library/ecp_curves.c
Original file line number Diff line number Diff line change
Expand Up @@ -1000,25 +1000,20 @@ static inline void sub32( uint32_t *dst, uint32_t src, signed char *carry )
#define ADD( j ) add32( &cur, A( j ), &c );
#define SUB( j ) sub32( &cur, A( j ), &c );

#define ciL (sizeof(mbedtls_mpi_uint)) /* chars in limb */
#define biL (ciL << 3) /* bits in limb */

/*
* Helpers for the main 'loop'
* (see fix_negative for the motivation of C)
*/
#define INIT( b ) \
int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; \
int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; \
signed char c = 0, cc; \
uint32_t cur; \
size_t i = 0, bits = (b); \
mbedtls_mpi C; \
mbedtls_mpi_uint Cp[ (b) / 8 / sizeof( mbedtls_mpi_uint) + 1 ]; \
\
C.s = 1; \
C.n = (b) / 8 / sizeof( mbedtls_mpi_uint) + 1; \
C.p = Cp; \
memset( Cp, 0, C.n * sizeof( mbedtls_mpi_uint ) ); \
\
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( N, (b) * 2 / 8 / \
sizeof( mbedtls_mpi_uint ) ) ); \
/* N is the size of the product of two b-bit numbers, plus one */ \
/* limb for fix_negative */ \
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( N, ( b ) * 2 / biL + 1 ) ); \
LOAD32;

#define NEXT \
Expand All @@ -1033,33 +1028,32 @@ static inline void sub32( uint32_t *dst, uint32_t src, signed char *carry )
STORE32; i++; \
cur = c > 0 ? c : 0; STORE32; \
cur = 0; while( ++i < MAX32 ) { STORE32; } \
if( c < 0 ) MBEDTLS_MPI_CHK( fix_negative( N, c, &C, bits ) );
if( c < 0 ) fix_negative( N, c, bits );

/*
* If the result is negative, we get it in the form
* c * 2^(bits + 32) + N, with c negative and N positive shorter than 'bits'
*/
static inline int fix_negative( mbedtls_mpi *N, signed char c, mbedtls_mpi *C, size_t bits )
static inline void fix_negative( mbedtls_mpi *N, signed char c, size_t bits )
{
int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;

/* C = - c * 2^(bits + 32) */
#if !defined(MBEDTLS_HAVE_INT64)
((void) bits);
#else
if( bits == 224 )
C->p[ C->n - 1 ] = ((mbedtls_mpi_uint) -c) << 32;
else
#endif
C->p[ C->n - 1 ] = (mbedtls_mpi_uint) -c;
size_t i;

/* N = - ( C - N ) */
MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( N, C, N ) );
/* Set N := N - 2^bits */
--N->p[0];
for( i = 0; i <= bits / 8 / sizeof( mbedtls_mpi_uint ); i++ )
{
N->p[i] = ~(mbedtls_mpi_uint)0 - N->p[i];
}
N->s = -1;

cleanup:

return( ret );
/* Add |c| * 2^(bits + 32) to the absolute value. Since c and N are
* negative, this adds c * 2^(bits + 32). */
mbedtls_mpi_uint msw = (mbedtls_mpi_uint) -c;
#if defined(MBEDTLS_HAVE_INT64)
if( bits == 224 )
msw <<= 32;
#endif
N->p[bits / 8 / sizeof( mbedtls_mpi_uint)] += msw;
}

#if defined(MBEDTLS_ECP_DP_SECP224R1_ENABLED)
Expand Down
Loading