Skip to content

Commit

Permalink
Merge GH #3206 Add initial AVX512 support
Browse files Browse the repository at this point in the history
  • Loading branch information
randombit committed May 1, 2023
2 parents 2f7d529 + 8053269 commit 1881cd8
Show file tree
Hide file tree
Showing 18 changed files with 876 additions and 12 deletions.
1 change: 1 addition & 0 deletions src/build-data/arch/x86_64.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,5 @@ sse2
sse41
sse42
ssse3
avx512
</isa_extensions>
2 changes: 2 additions & 0 deletions src/build-data/cc/clang.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ ssse3 -> "-mssse3"
sse41 -> "-msse4.1"
sse42 -> "-msse4.2"
avx2 -> "-mavx2"
avx512 -> "-mavx512f -mavx512bw -mavx512dq -mavx512vbmi -mavx512vbmi2 -mavx512bitalg -mavx512vl -mavx512ifma"

bmi2 -> "-mbmi -mbmi2"
aesni -> "-maes -mpclmul"
rdrand -> "-mrdrnd"
Expand Down
2 changes: 2 additions & 0 deletions src/build-data/cc/gcc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ ssse3 -> "-mssse3"
sse41 -> "-msse4.1"
sse42 -> "-msse4.2"
avx2 -> "-mavx2"
avx512 -> "-mavx512f -mavx512bw -mavx512dq -mavx512vbmi -mavx512vbmi2 -mavx512bitalg -mavx512vl -mavx512ifma"

bmi2 -> "-mbmi -mbmi2"
aesni -> "-maes -mpclmul"
rdrand -> "-mrdrnd"
Expand Down
1 change: 1 addition & 0 deletions src/build-data/cc/msvc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ ssse3 -> ""
sse41 -> ""
sse42 -> ""
x86_64:avx2 -> "/arch:AVX"
x86_64:avx512 -> "/arch:AVX512"
bmi2 -> ""
aesni -> ""
clmul -> ""
Expand Down
35 changes: 34 additions & 1 deletion src/lib/block/serpent/serpent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include <botan/internal/rotate.h>
#include <botan/internal/serpent_sbox.h>

#if defined(BOTAN_HAS_SERPENT_SIMD) || defined(BOTAN_HAS_SERPENT_AVX2)
#if defined(BOTAN_HAS_SERPENT_SIMD) || defined(BOTAN_HAS_SERPENT_AVX2) || defined(BOTAN_HAS_SERPENT_AVX512)
#include <botan/internal/cpuid.h>
#endif

Expand All @@ -26,6 +26,19 @@ void Serpent::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const

assert_key_material_set();

#if defined(BOTAN_HAS_SERPENT_AVX512)
if(CPUID::has_avx512())
{
while(blocks >= 16)
{
avx512_encrypt_16(in, out);
in += 16 * BLOCK_SIZE;
out += 16 * BLOCK_SIZE;
blocks -= 16;
}
}
#endif

#if defined(BOTAN_HAS_SERPENT_AVX2)
if(CPUID::has_avx2())
{
Expand Down Expand Up @@ -105,6 +118,19 @@ void Serpent::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const

assert_key_material_set();

#if defined(BOTAN_HAS_SERPENT_AVX512)
if(CPUID::has_avx512())
{
while(blocks >= 16)
{
avx512_decrypt_16(in, out);
in += 16 * BLOCK_SIZE;
out += 16 * BLOCK_SIZE;
blocks -= 16;
}
}
#endif

#if defined(BOTAN_HAS_SERPENT_AVX2)
if(CPUID::has_avx2())
{
Expand Down Expand Up @@ -252,6 +278,13 @@ void Serpent::clear()

std::string Serpent::provider() const
{
#if defined(BOTAN_HAS_SERPENT_AVX512)
if(CPUID::has_avx512())
{
return "avx512";
}
#endif

#if defined(BOTAN_HAS_SERPENT_AVX2)
if(CPUID::has_avx2())
{
Expand Down
13 changes: 9 additions & 4 deletions src/lib/block/serpent/serpent.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,18 @@ class Serpent final : public Block_Cipher_Fixed_Params<16, 16, 32, 8>
private:

#if defined(BOTAN_HAS_SERPENT_SIMD)
void simd_encrypt_4(const uint8_t in[64], uint8_t out[64]) const;
void simd_decrypt_4(const uint8_t in[64], uint8_t out[64]) const;
void simd_encrypt_4(const uint8_t in[16*4], uint8_t out[16*4]) const;
void simd_decrypt_4(const uint8_t in[16*4], uint8_t out[16*4]) const;
#endif

#if defined(BOTAN_HAS_SERPENT_AVX2)
void avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const;
void avx2_decrypt_8(const uint8_t in[128], uint8_t out[128]) const;
void avx2_encrypt_8(const uint8_t in[16*8], uint8_t out[16*8]) const;
void avx2_decrypt_8(const uint8_t in[16*8], uint8_t out[16*8]) const;
#endif

#if defined(BOTAN_HAS_SERPENT_AVX512)
void avx512_encrypt_16(const uint8_t in[16*16], uint8_t out[16*16]) const;
void avx512_decrypt_16(const uint8_t in[16*16], uint8_t out[16*16]) const;
#endif

void key_schedule(const uint8_t key[], size_t length) override;
Expand Down
16 changes: 16 additions & 0 deletions src/lib/block/serpent/serpent_avx512/info.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<defines>
SERPENT_AVX512 -> 20230101
</defines>

<module_info>
name -> "Serpent AVX512"
brief -> "Serpent using AVX512 instructions"
</module_info>

<isa>
avx512
</isa>

<requires>
simd_avx512
</requires>
132 changes: 132 additions & 0 deletions src/lib/block/serpent/serpent_avx512/serpent_avx512.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
/*
* (C) 2023 Jack Lloyd
*
* Botan is released under the Simplified BSD License (see license.txt)
*/

#include <botan/internal/serpent.h>
#include <botan/internal/simd_avx512.h>
#include <botan/internal/serpent_sbox.h>

namespace Botan {

BOTAN_AVX512_FN
void Serpent::avx512_encrypt_16(const uint8_t in[16*16], uint8_t out[16*16]) const
{
using namespace Botan::Serpent_F;

SIMD_16x32 B0 = SIMD_16x32::load_le(in);
SIMD_16x32 B1 = SIMD_16x32::load_le(in + 64);
SIMD_16x32 B2 = SIMD_16x32::load_le(in + 128);
SIMD_16x32 B3 = SIMD_16x32::load_le(in + 192);

SIMD_16x32::transpose(B0, B1, B2, B3);

const Key_Inserter key_xor(m_round_key.data());

key_xor( 0,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 1,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 2,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 3,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 4,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 5,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 6,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 7,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);

key_xor( 8,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 9,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(10,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(11,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(12,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(13,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(14,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(15,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);

key_xor(16,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(17,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(18,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(19,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(20,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(21,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(22,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(23,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);

key_xor(24,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(25,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(26,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(27,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(28,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(29,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(30,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(31,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3);

SIMD_16x32::transpose(B0, B1, B2, B3);
B0.store_le(out);
B1.store_le(out + 64);
B2.store_le(out + 128);
B3.store_le(out + 192);

SIMD_16x32::zero_registers();
}

BOTAN_AVX512_FN
void Serpent::avx512_decrypt_16(const uint8_t in[16*16], uint8_t out[16*16]) const
{
using namespace Botan::Serpent_F;

SIMD_16x32 B0 = SIMD_16x32::load_le(in);
SIMD_16x32 B1 = SIMD_16x32::load_le(in + 64);
SIMD_16x32 B2 = SIMD_16x32::load_le(in + 128);
SIMD_16x32 B3 = SIMD_16x32::load_le(in + 192);

SIMD_16x32::transpose(B0, B1, B2, B3);

const Key_Inserter key_xor(m_round_key.data());

key_xor(32,B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(28,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(27,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(26,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(25,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(24,B0,B1,B2,B3);

i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(23,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(22,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(21,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(20,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(19,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(18,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(17,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(16,B0,B1,B2,B3);

i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(15,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(14,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(13,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(12,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(11,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 8,B0,B1,B2,B3);

i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor( 7,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor( 6,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor( 5,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor( 4,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor( 3,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3);

SIMD_16x32::transpose(B0, B1, B2, B3);

B0.store_le(out);
B1.store_le(out + 64);
B2.store_le(out + 128);
B3.store_le(out + 192);

SIMD_16x32::zero_registers();
}

}
1 change: 1 addition & 0 deletions src/lib/block/serpent/serpent_sbox.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#define BOTAN_SERPENT_FUNCS_H_

#include <botan/types.h>
#include <botan/internal/rotate.h>

namespace Botan::Serpent_F {

Expand Down
26 changes: 25 additions & 1 deletion src/lib/stream/chacha/chacha.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* ChaCha
* (C) 2014,2018 Jack Lloyd
* (C) 2014,2018,2023 Jack Lloyd
*
* Botan is released under the Simplified BSD License (see license.txt)
*/
Expand Down Expand Up @@ -73,6 +73,11 @@ ChaCha::ChaCha(size_t rounds) : m_rounds(rounds)

size_t ChaCha::parallelism()
{
#if defined(BOTAN_HAS_CHACHA_AVX512)
if(CPUID::has_avx512())
return 16;
#endif

#if defined(BOTAN_HAS_CHACHA_AVX2)
if(CPUID::has_avx2())
return 8;
Expand All @@ -83,6 +88,13 @@ size_t ChaCha::parallelism()

std::string ChaCha::provider() const
{
#if defined(BOTAN_HAS_CHACHA_AVX512)
if(CPUID::has_avx512())
{
return "avx512";
}
#endif

#if defined(BOTAN_HAS_CHACHA_AVX2)
if(CPUID::has_avx2())
{
Expand All @@ -106,6 +118,18 @@ void ChaCha::chacha(uint8_t output[],
{
BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds");

#if defined(BOTAN_HAS_CHACHA_AVX512)
if(CPUID::has_avx512())
{
while(output_blocks >= 16)
{
ChaCha::chacha_avx512_x16(output, state, rounds);
output += 16*64;
output_blocks -= 16;
}
}
#endif

#if defined(BOTAN_HAS_CHACHA_AVX2)
if(CPUID::has_avx2())
{
Expand Down
4 changes: 4 additions & 0 deletions src/lib/stream/chacha/chacha.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ class ChaCha final : public StreamCipher
static void chacha_avx2_x8(uint8_t output[64*8], uint32_t state[16], size_t rounds);
#endif

#if defined(BOTAN_HAS_CHACHA_AVX512)
static void chacha_avx512_x16(uint8_t output[64*16], uint32_t state[16], size_t rounds);
#endif

size_t m_rounds;
secure_vector<uint32_t> m_key;
secure_vector<uint32_t> m_state;
Expand Down
Loading

0 comments on commit 1881cd8

Please sign in to comment.