Skip to content

Commit

Permalink
Generated ASM
Browse files Browse the repository at this point in the history
  • Loading branch information
justsmth committed Jun 25, 2024
1 parent 2bc30f1 commit a06623e
Show file tree
Hide file tree
Showing 14 changed files with 192 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <openssl/asm_base.h>

#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__)
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
.text
.globl gcm_init_avx512
.hidden gcm_init_avx512
Expand Down Expand Up @@ -136464,3 +136465,4 @@ byte64_len_to_mask_table:
.quad 0xffffffffffffffff
.text
#endif
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <openssl/asm_base.h>

#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__)
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
.text
.globl aes_hw_xts_encrypt_avx512
.hidden aes_hw_xts_encrypt_avx512
Expand Down Expand Up @@ -5226,3 +5227,4 @@ shufb_15_7:

.text
#endif
#endif
37 changes: 36 additions & 1 deletion generated-src/linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,13 @@ ecp_nistz256_neg:
.align 32
ecp_nistz256_ord_mul_mont:
.cfi_startproc
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
leaq OPENSSL_ia32cap_P(%rip),%rcx
movq 8(%rcx),%rcx
andl $0x80100,%ecx
cmpl $0x80100,%ecx
je .Lecp_nistz256_ord_mul_montx
#endif
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
Expand Down Expand Up @@ -434,11 +436,13 @@ ecp_nistz256_ord_mul_mont:
.align 32
ecp_nistz256_ord_sqr_mont:
.cfi_startproc
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
leaq OPENSSL_ia32cap_P(%rip),%rcx
movq 8(%rcx),%rcx
andl $0x80100,%ecx
cmpl $0x80100,%ecx
je .Lecp_nistz256_ord_sqr_montx
#endif
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
Expand Down Expand Up @@ -725,6 +729,7 @@ ecp_nistz256_ord_sqr_mont:
.byte 0xf3,0xc3
.cfi_endproc
.size ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX

.type ecp_nistz256_ord_mul_montx,@function
.align 32
Expand Down Expand Up @@ -1181,6 +1186,7 @@ ecp_nistz256_ord_sqr_montx:
.byte 0xf3,0xc3
.cfi_endproc
.size ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx
#endif



Expand All @@ -1193,9 +1199,11 @@ ecp_nistz256_ord_sqr_montx:
.align 32
ecp_nistz256_mul_mont:
.cfi_startproc
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
leaq OPENSSL_ia32cap_P(%rip),%rcx
movq 8(%rcx),%rcx
andl $0x80100,%ecx
#endif
.Lmul_mont:
pushq %rbp
.cfi_adjust_cfa_offset 8
Expand All @@ -1216,8 +1224,10 @@ ecp_nistz256_mul_mont:
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
.Lmul_body:
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
cmpl $0x80100,%ecx
je .Lmul_montx
#endif
movq %rdx,%rbx
movq 0(%rdx),%rax
movq 0(%rsi),%r9
Expand All @@ -1226,6 +1236,7 @@ ecp_nistz256_mul_mont:
movq 24(%rsi),%r12

call __ecp_nistz256_mul_montq
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
jmp .Lmul_mont_done

.align 32
Expand All @@ -1239,6 +1250,7 @@ ecp_nistz256_mul_mont:
leaq -128(%rsi),%rsi

call __ecp_nistz256_mul_montx
#endif
.Lmul_mont_done:
movq 0(%rsp),%r15
.cfi_restore %r15
Expand Down Expand Up @@ -1490,9 +1502,11 @@ __ecp_nistz256_mul_montq:
.align 32
ecp_nistz256_sqr_mont:
.cfi_startproc
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
leaq OPENSSL_ia32cap_P(%rip),%rcx
movq 8(%rcx),%rcx
andl $0x80100,%ecx
#endif
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
Expand All @@ -1512,14 +1526,17 @@ ecp_nistz256_sqr_mont:
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
.Lsqr_body:
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
cmpl $0x80100,%ecx
je .Lsqr_montx
#endif
movq 0(%rsi),%rax
movq 8(%rsi),%r14
movq 16(%rsi),%r15
movq 24(%rsi),%r8

call __ecp_nistz256_sqr_montq
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
jmp .Lsqr_mont_done

.align 32
Expand All @@ -1531,6 +1548,7 @@ ecp_nistz256_sqr_mont:
leaq -128(%rsi),%rsi

call __ecp_nistz256_sqr_montx
#endif
.Lsqr_mont_done:
movq 0(%rsp),%r15
.cfi_restore %r15
Expand Down Expand Up @@ -1714,6 +1732,7 @@ __ecp_nistz256_sqr_montq:
.byte 0xf3,0xc3
.cfi_endproc
.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
.type __ecp_nistz256_mul_montx,@function
.align 32
__ecp_nistz256_mul_montx:
Expand Down Expand Up @@ -2012,6 +2031,7 @@ __ecp_nistz256_sqr_montx:
.byte 0xf3,0xc3
.cfi_endproc
.size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
#endif


.globl ecp_nistz256_select_w5
Expand All @@ -2020,10 +2040,12 @@ __ecp_nistz256_sqr_montx:
.align 32
ecp_nistz256_select_w5:
.cfi_startproc
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
leaq OPENSSL_ia32cap_P(%rip),%rax
movq 8(%rax),%rax
testl $32,%eax
jnz .Lavx2_select_w5
#endif
movdqa .LOne(%rip),%xmm0
movd %edx,%xmm1

Expand Down Expand Up @@ -2087,10 +2109,12 @@ ecp_nistz256_select_w5:
.align 32
ecp_nistz256_select_w7:
.cfi_startproc
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
leaq OPENSSL_ia32cap_P(%rip),%rax
movq 8(%rax),%rax
testl $32,%eax
jnz .Lavx2_select_w7
#endif
movdqa .LOne(%rip),%xmm8
movd %edx,%xmm1

Expand Down Expand Up @@ -2134,6 +2158,7 @@ ecp_nistz256_select_w7:
.cfi_endproc
.LSEH_end_ecp_nistz256_select_w7:
.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX


.type ecp_nistz256_avx2_select_w5,@function
Expand Down Expand Up @@ -2197,7 +2222,8 @@ ecp_nistz256_avx2_select_w5:
.cfi_endproc
.LSEH_end_ecp_nistz256_avx2_select_w5:
.size ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5

#endif
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX


.globl ecp_nistz256_avx2_select_w7
Expand Down Expand Up @@ -2278,6 +2304,7 @@ ecp_nistz256_avx2_select_w7:
.cfi_endproc
.LSEH_end_ecp_nistz256_avx2_select_w7:
.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
#endif
.type __ecp_nistz256_add_toq,@function
.align 32
__ecp_nistz256_add_toq:
Expand Down Expand Up @@ -2413,11 +2440,13 @@ __ecp_nistz256_mul_by_2q:
.align 32
ecp_nistz256_point_double:
.cfi_startproc
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
leaq OPENSSL_ia32cap_P(%rip),%rcx
movq 8(%rcx),%rcx
andl $0x80100,%ecx
cmpl $0x80100,%ecx
je .Lpoint_doublex
#endif
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
Expand Down Expand Up @@ -2647,11 +2676,13 @@ ecp_nistz256_point_double:
.align 32
ecp_nistz256_point_add:
.cfi_startproc
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
leaq OPENSSL_ia32cap_P(%rip),%rcx
movq 8(%rcx),%rcx
andl $0x80100,%ecx
cmpl $0x80100,%ecx
je .Lpoint_addx
#endif
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
Expand Down Expand Up @@ -3084,11 +3115,13 @@ ecp_nistz256_point_add:
.align 32
ecp_nistz256_point_add_affine:
.cfi_startproc
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
leaq OPENSSL_ia32cap_P(%rip),%rcx
movq 8(%rcx),%rcx
andl $0x80100,%ecx
cmpl $0x80100,%ecx
je .Lpoint_add_affinex
#endif
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
Expand Down Expand Up @@ -3412,6 +3445,7 @@ ecp_nistz256_point_add_affine:
.byte 0xf3,0xc3
.cfi_endproc
.size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
.type __ecp_nistz256_add_tox,@function
.align 32
__ecp_nistz256_add_tox:
Expand Down Expand Up @@ -3547,6 +3581,7 @@ __ecp_nistz256_mul_by_2x:
.byte 0xf3,0xc3
.cfi_endproc
.size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
#endif
.type ecp_nistz256_point_doublex,@function
.align 32
ecp_nistz256_point_doublex:
Expand Down
10 changes: 10 additions & 0 deletions generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont.S
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ bn_mul_mont:
jnz .Lmul_enter
cmpl $8,%r9d
jb .Lmul_enter
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
leaq OPENSSL_ia32cap_P(%rip),%r11
movl 8(%r11),%r11d
#endif
cmpq %rsi,%rdx
jne .Lmul4x_enter
testl $7,%r9d
Expand Down Expand Up @@ -272,9 +274,11 @@ bn_mul4x_mont:
movq %rsp,%rax
.cfi_def_cfa_register %rax
.Lmul4x_enter:
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
andl $0x80100,%r11d
cmpl $0x80100,%r11d
je .Lmulx4x_enter
#endif
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
Expand Down Expand Up @@ -698,8 +702,10 @@ bn_mul4x_mont:
.byte 0xf3,0xc3
.cfi_endproc
.size bn_mul4x_mont,.-bn_mul4x_mont
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
.extern bn_sqrx8x_internal
.hidden bn_sqrx8x_internal
#endif
.extern bn_sqr8x_internal
.hidden bn_sqr8x_internal

Expand Down Expand Up @@ -784,6 +790,7 @@ bn_sqr8x_mont:
pxor %xmm0,%xmm0
.byte 102,72,15,110,207
.byte 102,73,15,110,218
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
leaq OPENSSL_ia32cap_P(%rip),%rax
movl 8(%rax),%eax
andl $0x80100,%eax
Expand All @@ -804,6 +811,7 @@ bn_sqr8x_mont:

.align 32
.Lsqr8x_nox:
#endif
call bn_sqr8x_internal


Expand Down Expand Up @@ -891,6 +899,7 @@ bn_sqr8x_mont:
.byte 0xf3,0xc3
.cfi_endproc
.size bn_sqr8x_mont,.-bn_sqr8x_mont
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
.type bn_mulx4x_mont,@function
.align 32
bn_mulx4x_mont:
Expand Down Expand Up @@ -1247,6 +1256,7 @@ bn_mulx4x_mont:
.byte 0xf3,0xc3
.cfi_endproc
.size bn_mulx4x_mont,.-bn_mulx4x_mont
#endif
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 16
#endif
8 changes: 8 additions & 0 deletions generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ bn_mul_mont_gather5:
.cfi_def_cfa_register %rax
testl $7,%r9d
jnz .Lmul_enter
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
leaq OPENSSL_ia32cap_P(%rip),%r11
movl 8(%r11),%r11d
#endif
jmp .Lmul4x_enter

.align 16
Expand Down Expand Up @@ -460,9 +462,11 @@ bn_mul4x_mont_gather5:
movq %rsp,%rax
.cfi_def_cfa_register %rax
.Lmul4x_enter:
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
andl $0x80108,%r11d
cmpl $0x80108,%r11d
je .Lmulx4x_enter
#endif
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
Expand Down Expand Up @@ -1095,11 +1099,13 @@ bn_power5:
.cfi_startproc
movq %rsp,%rax
.cfi_def_cfa_register %rax
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
leaq OPENSSL_ia32cap_P(%rip),%r11
movl 8(%r11),%r11d
andl $0x80108,%r11d
cmpl $0x80108,%r11d
je .Lpowerx5_enter
#endif
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
Expand Down Expand Up @@ -2064,6 +2070,7 @@ __bn_post4x_internal:
.byte 0xf3,0xc3
.cfi_endproc
.size __bn_post4x_internal,.-__bn_post4x_internal
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX
.type bn_mulx4x_mont_gather5,@function
.align 32
bn_mulx4x_mont_gather5:
Expand Down Expand Up @@ -3410,6 +3417,7 @@ __bn_postx4x_internal:
.byte 0xf3,0xc3
.cfi_endproc
.size __bn_postx4x_internal,.-__bn_postx4x_internal
#endif
.globl bn_scatter5
.hidden bn_scatter5
.type bn_scatter5,@function
Expand Down
Loading

0 comments on commit a06623e

Please sign in to comment.