Skip to content

Commit

Permalink
bn: Move x86-64 argument-based dispatching of bn_mul_mont to C. (aws#…
Browse files Browse the repository at this point in the history
  • Loading branch information
justsmth authored Aug 27, 2024
1 parent 005b26f commit 35d5287
Show file tree
Hide file tree
Showing 7 changed files with 135 additions and 133 deletions.
59 changes: 17 additions & 42 deletions crypto/fipsmodule/bn/asm/x86_64-mont.pl
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
$addx = 1;
for (@ARGV) { $addx = 0 if (/-DMY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX/); }

# int bn_mul_mont(
# int bn_mul_mont_nohw(
$rp="%rdi"; # BN_ULONG *rp,
$ap="%rsi"; # const BN_ULONG *ap,
$bp="%rdx"; # const BN_ULONG *bp,
Expand All @@ -91,35 +91,15 @@
.extern OPENSSL_ia32cap_P
.globl bn_mul_mont
.type bn_mul_mont,\@function,6
.globl bn_mul_mont_nohw
.type bn_mul_mont_nohw,\@function,6
.align 16
bn_mul_mont:
bn_mul_mont_nohw:
.cfi_startproc
_CET_ENDBR
mov ${num}d,${num}d
mov %rsp,%rax
.cfi_def_cfa_register %rax
test \$3,${num}d
jnz .Lmul_enter
cmp \$8,${num}d
jb .Lmul_enter
___
$code.=<<___ if ($addx);
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX
leaq OPENSSL_ia32cap_P(%rip),%r11
mov 8(%r11),%r11d
#endif
___
$code.=<<___;
cmp $ap,$bp
jne .Lmul4x_enter
test \$7,${num}d
jz .Lsqr8x_enter
jmp .Lmul4x_enter
.align 16
.Lmul_enter:
push %rbx
.cfi_push %rbx
push %rbp
Expand Down Expand Up @@ -354,29 +334,21 @@
.Lmul_epilogue:
ret
.cfi_endproc
.size bn_mul_mont,.-bn_mul_mont
.size bn_mul_mont_nohw,.-bn_mul_mont_nohw
___
{{{
my @A=("%r10","%r11");
my @N=("%r13","%rdi");
$code.=<<___;
.globl bn_mul4x_mont
.type bn_mul4x_mont,\@function,6
.align 16
bn_mul4x_mont:
.cfi_startproc
_CET_ENDBR
mov ${num}d,${num}d
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmul4x_enter:
___
$code.=<<___ if ($addx);
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX
and \$0x80100,%r11d
cmp \$0x80100,%r11d
je .Lmulx4x_enter
#endif
___
$code.=<<___;
push %rbx
.cfi_push %rbx
push %rbp
Expand Down Expand Up @@ -814,7 +786,7 @@
}}}
{{{
######################################################################
# void bn_sqr8x_mont(
# int bn_sqr8x_mont(
my $rptr="%rdi"; # const BN_ULONG *rptr,
my $aptr="%rsi"; # const BN_ULONG *aptr,
my $bptr="%rdx"; # not used
Expand All @@ -835,13 +807,15 @@
$code.=<<___;
.extern bn_sqr8x_internal # see x86_64-mont5 module
.globl bn_sqr8x_mont
.type bn_sqr8x_mont,\@function,6
.align 32
bn_sqr8x_mont:
.cfi_startproc
_CET_ENDBR
mov ${num}d,${num}d
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lsqr8x_enter:
push %rbx
.cfi_push %rbx
push %rbp
Expand Down Expand Up @@ -1037,13 +1011,14 @@

$code.=<<___;
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX
.globl bn_mulx4x_mont
.type bn_mulx4x_mont,\@function,6
.align 32
bn_mulx4x_mont:
.cfi_startproc
_CET_ENDBR
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmulx4x_enter:
push %rbx
.cfi_push %rbx
push %rbp
Expand Down Expand Up @@ -1549,9 +1524,9 @@
.section .pdata
.align 4
.rva .LSEH_begin_bn_mul_mont
.rva .LSEH_end_bn_mul_mont
.rva .LSEH_info_bn_mul_mont
.rva .LSEH_begin_bn_mul_mont_nohw
.rva .LSEH_end_bn_mul_mont_nohw
.rva .LSEH_info_bn_mul_mont_nohw
.rva .LSEH_begin_bn_mul4x_mont
.rva .LSEH_end_bn_mul4x_mont
Expand All @@ -1571,7 +1546,7 @@
$code.=<<___;
.section .xdata
.align 8
.LSEH_info_bn_mul_mont:
.LSEH_info_bn_mul_mont_nohw:
.byte 9,0,0,0
.rva mul_handler
.rva .Lmul_body,.Lmul_epilogue # HandlerData[]
Expand Down
27 changes: 27 additions & 0 deletions crypto/fipsmodule/bn/bn_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2843,10 +2843,37 @@ TEST_F(BNTest, BNMulMontABI) {
a[0] = 1;
b[0] = 42;

#if defined(OPENSSL_X86_64)
#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX)
if (bn_mulx4x_mont_capable(words)) {
CHECK_ABI(bn_mulx4x_mont, r.data(), a.data(), b.data(), mont->N.d,
mont->n0, words);
CHECK_ABI(bn_mulx4x_mont, r.data(), a.data(), a.data(), mont->N.d,
mont->n0, words);
}
#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX)
if (bn_mul4x_mont_capable(words)) {
CHECK_ABI(bn_mul4x_mont, r.data(), a.data(), b.data(), mont->N.d,
mont->n0, words);
CHECK_ABI(bn_mul4x_mont, r.data(), a.data(), a.data(), mont->N.d,
mont->n0, words);
}
CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), b.data(), mont->N.d,
mont->n0, words);
CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), a.data(), mont->N.d,
mont->n0, words);
#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX)
if (bn_sqr8x_mont_capable(words)) {
CHECK_ABI(bn_sqr8x_mont, r.data(), a.data(), a.data(), mont->N.d,
mont->n0, words);
}
#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX)
#else
CHECK_ABI(bn_mul_mont, r.data(), a.data(), b.data(), mont->N.d, mont->n0,
words);
CHECK_ABI(bn_mul_mont, r.data(), a.data(), a.data(), mont->N.d, mont->n0,
words);
#endif
}
}
#endif // OPENSSL_BN_ASM_MONT && SUPPORTS_ABI_TEST
Expand Down
26 changes: 26 additions & 0 deletions crypto/fipsmodule/bn/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ OPENSSL_MSVC_PRAGMA(warning(pop))
#endif

#include "../../internal.h"
#include "../cpucap/internal.h"

#if defined(__cplusplus)
extern "C" {
Expand Down Expand Up @@ -404,6 +405,31 @@ int bn_rand_secret_range(BIGNUM *r, int *out_is_uniform, BN_ULONG min_inclusive,
// inputs.
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);

#if defined(OPENSSL_X86_64)
int bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
OPENSSL_INLINE int bn_mul4x_mont_capable(size_t num) {
return (num >= 8) && ((num & 3) == 0);
}
int bn_mul4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX)
OPENSSL_INLINE int bn_mulx4x_mont_capable(size_t num) {
// MULX is in BMI2.
return bn_mul4x_mont_capable(num) && CRYPTO_is_BMI2_capable() &&
CRYPTO_is_ADX_capable();
}
int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
OPENSSL_INLINE int bn_sqr8x_mont_capable(size_t num) {
return (num >= 8) && ((num & 7) == 0);
}
int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *unused_bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX)
#endif // defined(OPENSSL_X86_64)

#endif

#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
Expand Down
19 changes: 19 additions & 0 deletions crypto/fipsmodule/bn/montgomery.c
Original file line number Diff line number Diff line change
Expand Up @@ -632,3 +632,22 @@ void bn_mod_mul_montgomery_small(BN_ULONG *r, const BN_ULONG *a,
}
OPENSSL_cleanse(tmp, 2 * num * sizeof(BN_ULONG));
}

#if defined(OPENSSL_BN_ASM_MONT) && defined(OPENSSL_X86_64)
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num)
{
#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX)
if (ap == bp && bn_sqr8x_mont_capable(num)) {
return bn_sqr8x_mont(rp, ap, bp, np, n0, num);
}
if (bn_mulx4x_mont_capable(num)) {
return bn_mulx4x_mont(rp, ap, bp, np, n0, num);
}
#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX)
if (bn_mul4x_mont_capable(num)) {
return bn_mul4x_mont(rp, ap, bp, np, n0, num);
}
return bn_mul_mont_nohw(rp, ap, bp, np, n0, num);
}
#endif
44 changes: 15 additions & 29 deletions generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont.S
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,16 @@
.extern OPENSSL_ia32cap_P
.hidden OPENSSL_ia32cap_P

.globl bn_mul_mont
.hidden bn_mul_mont
.type bn_mul_mont,@function
.globl bn_mul_mont_nohw
.hidden bn_mul_mont_nohw
.type bn_mul_mont_nohw,@function
.align 16
bn_mul_mont:
bn_mul_mont_nohw:
.cfi_startproc
_CET_ENDBR
movl %r9d,%r9d
movq %rsp,%rax
.cfi_def_cfa_register %rax
testl $3,%r9d
jnz .Lmul_enter
cmpl $8,%r9d
jb .Lmul_enter
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX
leaq OPENSSL_ia32cap_P(%rip),%r11
movl 8(%r11),%r11d
#endif
cmpq %rsi,%rdx
jne .Lmul4x_enter
testl $7,%r9d
jz .Lsqr8x_enter
jmp .Lmul4x_enter

.align 16
.Lmul_enter:
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
Expand Down Expand Up @@ -266,20 +250,17 @@ _CET_ENDBR
.Lmul_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size bn_mul_mont,.-bn_mul_mont
.size bn_mul_mont_nohw,.-bn_mul_mont_nohw
.globl bn_mul4x_mont
.hidden bn_mul4x_mont
.type bn_mul4x_mont,@function
.align 16
bn_mul4x_mont:
.cfi_startproc
_CET_ENDBR
movl %r9d,%r9d
movq %rsp,%rax
.cfi_def_cfa_register %rax
.Lmul4x_enter:
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX
andl $0x80100,%r11d
cmpl $0x80100,%r11d
je .Lmulx4x_enter
#endif
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
Expand Down Expand Up @@ -710,13 +691,16 @@ bn_mul4x_mont:
.extern bn_sqr8x_internal
.hidden bn_sqr8x_internal

.globl bn_sqr8x_mont
.hidden bn_sqr8x_mont
.type bn_sqr8x_mont,@function
.align 32
bn_sqr8x_mont:
.cfi_startproc
_CET_ENDBR
movl %r9d,%r9d
movq %rsp,%rax
.cfi_def_cfa_register %rax
.Lsqr8x_enter:
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
Expand Down Expand Up @@ -901,13 +885,15 @@ bn_sqr8x_mont:
.cfi_endproc
.size bn_sqr8x_mont,.-bn_sqr8x_mont
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX
.globl bn_mulx4x_mont
.hidden bn_mulx4x_mont
.type bn_mulx4x_mont,@function
.align 32
bn_mulx4x_mont:
.cfi_startproc
_CET_ENDBR
movq %rsp,%rax
.cfi_def_cfa_register %rax
.Lmulx4x_enter:
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
Expand Down
Loading

0 comments on commit 35d5287

Please sign in to comment.