From b286c092d1a4518805eac5b01cf4fede4ccac601 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Wed, 31 Jul 2024 18:59:16 +0300 Subject: [PATCH 1/9] aes: rework backends --- aes/src/armv8.rs | 168 ++++++++++++------------- aes/src/armv8/encdec.rs | 238 +++++++++++++++++++---------------- aes/src/armv8/expand.rs | 15 ++- aes/src/armv8/test_expand.rs | 8 +- aes/src/lib.rs | 2 + aes/src/macros.rs | 102 +++++++++++++++ aes/src/ni.rs | 150 ++++++++++------------ aes/src/ni/aes128.rs | 145 --------------------- aes/src/ni/aes192.rs | 198 ----------------------------- aes/src/ni/aes256.rs | 196 ----------------------------- aes/src/ni/encdec.rs | 159 +++++++++++++++++++++++ aes/src/ni/expand.rs | 202 +++++++++++++++++++++++++++++ aes/src/ni/hazmat.rs | 36 ++++-- aes/src/ni/test_expand.rs | 55 +++++--- aes/src/ni/utils.rs | 92 -------------- 15 files changed, 816 insertions(+), 950 deletions(-) create mode 100644 aes/src/macros.rs delete mode 100644 aes/src/ni/aes128.rs delete mode 100644 aes/src/ni/aes192.rs delete mode 100644 aes/src/ni/aes256.rs create mode 100644 aes/src/ni/encdec.rs create mode 100644 aes/src/ni/expand.rs delete mode 100644 aes/src/ni/utils.rs diff --git a/aes/src/armv8.rs b/aes/src/armv8.rs index 4ac12959..a6bd438c 100644 --- a/aes/src/armv8.rs +++ b/aes/src/armv8.rs @@ -17,20 +17,55 @@ mod expand; #[cfg(test)] mod test_expand; -use self::{ - encdec::{decrypt1, decrypt8, encrypt1, encrypt8}, - expand::{expand_key, inv_expanded_keys}, -}; -use crate::{Block, Block8}; use cipher::{ - consts::{U16, U24, U32, U8}, - inout::InOut, - AlgorithmName, BlockBackend, BlockCipher, BlockCipherDecrypt, BlockCipherEncrypt, BlockClosure, - BlockSizeUser, Key, KeyInit, KeySizeUser, ParBlocksSizeUser, + consts::{self, U16, U24, U32}, + AlgorithmName, BlockCipher, BlockCipherDecrypt, BlockCipherEncrypt, BlockClosure, + BlockSizeUser, Key, KeyInit, KeySizeUser, }; -use core::arch::aarch64::*; use core::fmt; +impl_backends!( + enc_name = Aes128BackEnc, + dec_name = Aes128BackDec, + key_size = consts::U16, + keys_ty = expand::Aes128RoundKeys, + par_size = consts::U15, + expand_keys = expand::expand_key, + inv_keys = expand::inv_expanded_keys, + encrypt = encdec::encrypt, + encrypt_par = encdec::encrypt_par, + decrypt = encdec::decrypt, + decrypt_par = encdec::decrypt_par, +); + +impl_backends!( + enc_name = Aes192BackEnc, + dec_name = Aes192BackDec, + key_size = consts::U24, + keys_ty = expand::Aes192RoundKeys, + par_size = consts::U15, + expand_keys = expand::expand_key, + inv_keys = expand::inv_expanded_keys, + encrypt = encdec::encrypt, + encrypt_par = encdec::encrypt_par, + decrypt = encdec::decrypt, + decrypt_par = encdec::decrypt_par, +); + +impl_backends!( + enc_name = Aes256BackEnc, + dec_name = Aes256BackDec, + key_size = consts::U32, + keys_ty = expand::Aes256RoundKeys, + par_size = consts::U15, + expand_keys = expand::expand_key, + inv_keys = expand::inv_expanded_keys, + encrypt = encdec::encrypt, + encrypt_par = encdec::encrypt_par, + decrypt = encdec::decrypt, + decrypt_par = encdec::decrypt_par, +); + macro_rules! define_aes_impl { ( $name:ident, @@ -46,19 +81,19 @@ macro_rules! define_aes_impl { #[doc = "block cipher"] #[derive(Clone)] pub struct $name { - encrypt: $name_enc, - decrypt: $name_dec, + encrypt: $name_back_enc, + decrypt: $name_back_dec, } impl $name { #[inline(always)] - pub(crate) fn get_enc_backend(&self) -> $name_back_enc<'_> { - self.encrypt.get_enc_backend() + pub(crate) fn get_enc_backend(&self) -> &$name_back_enc { + &self.encrypt } #[inline(always)] - pub(crate) fn get_dec_backend(&self) -> $name_back_dec<'_> { - self.decrypt.get_dec_backend() + pub(crate) fn get_dec_backend(&self) -> &$name_back_dec { + &self.decrypt } } @@ -71,8 +106,8 @@ macro_rules! define_aes_impl { impl KeyInit for $name { #[inline] fn new(key: &Key) -> Self { - let encrypt = $name_enc::new(key); - let decrypt = $name_dec::from(&encrypt); + let encrypt = $name_back_enc::new(key); + let decrypt = $name_back_dec::from(&encrypt); Self { encrypt, decrypt } } } @@ -80,6 +115,7 @@ macro_rules! define_aes_impl { impl From<$name_enc> for $name { #[inline] fn from(encrypt: $name_enc) -> $name { + let encrypt = encrypt.backend.clone(); let decrypt = (&encrypt).into(); Self { encrypt, decrypt } } @@ -88,8 +124,8 @@ macro_rules! define_aes_impl { impl From<&$name_enc> for $name { #[inline] fn from(encrypt: &$name_enc) -> $name { - let decrypt = encrypt.into(); - let encrypt = encrypt.clone(); + let encrypt = encrypt.backend.clone(); + let decrypt = (&encrypt).into(); Self { encrypt, decrypt } } } @@ -100,13 +136,13 @@ macro_rules! define_aes_impl { impl BlockCipherEncrypt for $name { fn encrypt_with_backend(&self, f: impl BlockClosure) { - self.encrypt.encrypt_with_backend(f) + f.call(&mut &self.encrypt) } } impl BlockCipherDecrypt for $name { fn decrypt_with_backend(&self, f: impl BlockClosure) { - self.decrypt.decrypt_with_backend(f) + f.call(&mut &self.decrypt) } } @@ -129,13 +165,13 @@ macro_rules! define_aes_impl { #[doc = "block cipher (encrypt-only)"] #[derive(Clone)] pub struct $name_enc { - round_keys: [uint8x16_t; $rounds], + backend: $name_back_enc, } impl $name_enc { #[inline(always)] - pub(crate) fn get_enc_backend(&self) -> $name_back_enc<'_> { - $name_back_enc(self) + pub(crate) fn get_enc_backend(&self) -> &$name_back_enc { + &self.backend } } @@ -148,9 +184,8 @@ macro_rules! define_aes_impl { impl KeyInit for $name_enc { #[inline] fn new(key: &Key) -> Self { - Self { - round_keys: unsafe { expand_key(key.as_ref()) }, - } + let backend = $name_back_enc::new(key); + Self { backend } } } @@ -160,7 +195,7 @@ macro_rules! define_aes_impl { impl BlockCipherEncrypt for $name_enc { fn encrypt_with_backend(&self, f: impl BlockClosure) { - f.call(&mut self.get_enc_backend()) + f.call(&mut &self.backend) } } @@ -180,7 +215,9 @@ macro_rules! define_aes_impl { #[inline] fn drop(&mut self) { #[cfg(feature = "zeroize")] - zeroize::Zeroize::zeroize(&mut self.round_keys); + unsafe { + zeroize::zeroize_flat_type(&mut self.backend); + } } } @@ -191,13 +228,13 @@ macro_rules! define_aes_impl { #[doc = "block cipher (decrypt-only)"] #[derive(Clone)] pub struct $name_dec { - round_keys: [uint8x16_t; $rounds], + backend: $name_back_dec, } impl $name_dec { #[inline(always)] - pub(crate) fn get_dec_backend(&self) -> $name_back_dec<'_> { - $name_back_dec(self) + pub(crate) fn get_dec_backend(&self) -> &$name_back_dec { + &self.backend } } @@ -210,7 +247,9 @@ macro_rules! define_aes_impl { impl KeyInit for $name_dec { #[inline] fn new(key: &Key) -> Self { - $name_enc::new(key).into() + let encrypt = $name_back_enc::new(key); + let backend = (&encrypt).into(); + Self { backend } } } @@ -222,10 +261,9 @@ macro_rules! define_aes_impl { } impl From<&$name_enc> for $name_dec { - fn from(enc: &$name_enc) -> $name_dec { - let mut round_keys = enc.round_keys; - unsafe { inv_expanded_keys(&mut round_keys) }; - Self { round_keys } + fn from(encrypt: &$name_enc) -> $name_dec { + let backend = (&encrypt.backend).into(); + Self { backend } } } @@ -235,7 +273,7 @@ macro_rules! define_aes_impl { impl BlockCipherDecrypt for $name_dec { fn decrypt_with_backend(&self, f: impl BlockClosure) { - f.call(&mut self.get_dec_backend()); + f.call(&mut &self.backend); } } @@ -255,60 +293,14 @@ macro_rules! define_aes_impl { #[inline] fn drop(&mut self) { #[cfg(feature = "zeroize")] - zeroize::Zeroize::zeroize(&mut self.round_keys); - } - } - - #[cfg(feature = "zeroize")] - impl zeroize::ZeroizeOnDrop for $name_dec {} - - pub(crate) struct $name_back_enc<'a>(&'a $name_enc); - - impl<'a> BlockSizeUser for $name_back_enc<'a> { - type BlockSize = U16; - } - - impl<'a> ParBlocksSizeUser for $name_back_enc<'a> { - type ParBlocksSize = U8; - } - - impl<'a> BlockBackend for $name_back_enc<'a> { - #[inline(always)] - fn proc_block(&mut self, block: InOut<'_, '_, Block>) { unsafe { - encrypt1(&self.0.round_keys, block); + zeroize::zeroize_flat_type(&mut self.backend); } } - - #[inline(always)] - fn proc_par_blocks(&mut self, blocks: InOut<'_, '_, Block8>) { - unsafe { encrypt8(&self.0.round_keys, blocks) } - } - } - - pub(crate) struct $name_back_dec<'a>(&'a $name_dec); - - impl<'a> BlockSizeUser for $name_back_dec<'a> { - type BlockSize = U16; - } - - impl<'a> ParBlocksSizeUser for $name_back_dec<'a> { - type ParBlocksSize = U8; } - impl<'a> BlockBackend for $name_back_dec<'a> { - #[inline(always)] - fn proc_block(&mut self, block: InOut<'_, '_, Block>) { - unsafe { - decrypt1(&self.0.round_keys, block); - } - } - - #[inline(always)] - fn proc_par_blocks(&mut self, blocks: InOut<'_, '_, Block8>) { - unsafe { decrypt8(&self.0.round_keys, blocks) } - } - } + #[cfg(feature = "zeroize")] + impl zeroize::ZeroizeOnDrop for $name_dec {} }; } diff --git a/aes/src/armv8/encdec.rs b/aes/src/armv8/encdec.rs index 7f462564..0147887f 100644 --- a/aes/src/armv8/encdec.rs +++ b/aes/src/armv8/encdec.rs @@ -1,158 +1,178 @@ //! AES encryption support +//! +//! Note that `aes` target feature implicitly enables `neon`, see: +//! https://doc.rust-lang.org/reference/attributes/codegen.html#aarch64 -use crate::{Block, Block8}; -use cipher::inout::InOut; -use core::arch::aarch64::*; +use crate::Block; +use cipher::{ + array::{Array, ArraySize}, + inout::InOut, +}; +use core::{arch::aarch64::*, mem}; /// Perform AES encryption using the given expanded keys. #[target_feature(enable = "aes")] -#[target_feature(enable = "neon")] -pub(super) unsafe fn encrypt1( - expanded_keys: &[uint8x16_t; N], +pub(super) unsafe fn encrypt( + keys: &[uint8x16_t; KEYS], block: InOut<'_, '_, Block>, ) { - let rounds = N - 1; - assert!(rounds == 10 || rounds == 12 || rounds == 14); - + assert!(KEYS == 11 || KEYS == 13 || KEYS == 15); let (in_ptr, out_ptr) = block.into_raw(); + let mut state = vld1q_u8(in_ptr.cast()); - let mut state = vld1q_u8(in_ptr as *const u8); - - for k in expanded_keys.iter().take(rounds - 1) { + for &key in &keys[..KEYS - 2] { // AES single round encryption - state = vaeseq_u8(state, *k); - + state = vaeseq_u8(state, key); // Mix columns state = vaesmcq_u8(state); } // AES single round encryption - state = vaeseq_u8(state, expanded_keys[rounds - 1]); - + state = vaeseq_u8(state, keys[KEYS - 2]); // Final add (bitwise XOR) - state = veorq_u8(state, expanded_keys[rounds]); + state = veorq_u8(state, keys[KEYS - 1]); - vst1q_u8(out_ptr as *mut u8, state); + vst1q_u8(out_ptr.cast(), state); } -/// Perform parallel AES encryption 8-blocks-at-a-time using the given expanded keys. +/// Perform AES decryption using the given expanded keys. #[target_feature(enable = "aes")] -#[target_feature(enable = "neon")] -pub(super) unsafe fn encrypt8( - expanded_keys: &[uint8x16_t; N], - blocks: InOut<'_, '_, Block8>, +pub(super) unsafe fn decrypt( + keys: &[uint8x16_t; KEYS], + block: InOut<'_, '_, Block>, ) { - let rounds = N - 1; - assert!(rounds == 10 || rounds == 12 || rounds == 14); + assert!(KEYS == 11 || KEYS == 13 || KEYS == 15); - let (in_ptr, out_ptr) = blocks.into_raw(); - let in_ptr = in_ptr as *const Block; - let out_ptr = out_ptr as *const Block; - - let mut state = [ - vld1q_u8(in_ptr.add(0) as *const u8), - vld1q_u8(in_ptr.add(1) as *const u8), - vld1q_u8(in_ptr.add(2) as *const u8), - vld1q_u8(in_ptr.add(3) as *const u8), - vld1q_u8(in_ptr.add(4) as *const u8), - vld1q_u8(in_ptr.add(5) as *const u8), - vld1q_u8(in_ptr.add(6) as *const u8), - vld1q_u8(in_ptr.add(7) as *const u8), - ]; - - for k in expanded_keys.iter().take(rounds - 1) { - for i in 0..8 { - // AES single round encryption - state[i] = vaeseq_u8(state[i], *k); - - // Mix columns - state[i] = vaesmcq_u8(state[i]); - } - } + let (in_ptr, out_ptr) = block.into_raw(); + let mut state = vld1q_u8(in_ptr.cast()); - for i in 0..8 { - // AES single round encryption - state[i] = vaeseq_u8(state[i], expanded_keys[rounds - 1]); + for &key in &keys[..KEYS - 2] { + // AES single round decryption + state = vaesdq_u8(state, key); + // Inverse mix columns + state = vaesimcq_u8(state); + } - // Final add (bitwise XOR) - state[i] = veorq_u8(state[i], expanded_keys[rounds]); + // AES single round decryption + state = vaesdq_u8(state, keys[KEYS - 2]); + // Final add (bitwise XOR) + state = veorq_u8(state, keys[KEYS - 1]); - vst1q_u8(out_ptr.add(i) as *mut u8, state[i]); - } + vst1q_u8(out_ptr.cast(), state); } -/// Perform AES decryption using the given expanded keys. +/// Perform parallel AES encryption 8-blocks-at-a-time using the given expanded keys. #[target_feature(enable = "aes")] -#[target_feature(enable = "neon")] -pub(super) unsafe fn decrypt1( - expanded_keys: &[uint8x16_t; N], - block: InOut<'_, '_, Block>, +pub(super) unsafe fn encrypt_par( + keys: &[uint8x16_t; KEYS], + blocks: InOut<'_, '_, Array>, ) { - let rounds = N - 1; - assert!(rounds == 10 || rounds == 12 || rounds == 14); + #[inline(always)] + unsafe fn par_round( + key: uint8x16_t, + state: &mut Array, + ) { + for s in state { + // AES single round encryption and mix columns + *s = vaesmcq_u8(vaeseq_u8(*s, key)); + } + } - let (in_ptr, out_ptr) = block.into_raw(); - let mut state = vld1q_u8(in_ptr as *const u8); + assert!(KEYS == 11 || KEYS == 13 || KEYS == 15); - for k in expanded_keys.iter().take(rounds - 1) { - // AES single round decryption - state = vaesdq_u8(state, *k); + let (in_ptr, out_ptr) = blocks.into_raw(); + let in_ptr: *const Block = in_ptr.cast(); + let out_ptr: *mut Block = out_ptr.cast(); - // Inverse mix columns - state = vaesimcq_u8(state); + // Load plaintext blocks + let mut state: Array = mem::zeroed(); + for i in 0..ParBlocks::USIZE { + state[i] = vld1q_u8(in_ptr.add(i).cast()); } - // AES single round decryption - state = vaesdq_u8(state, expanded_keys[rounds - 1]); - - // Final add (bitwise XOR) - state = veorq_u8(state, expanded_keys[rounds]); + // Loop is intentionally not used here to enforce inlining + par_round(keys[0], &mut state); + par_round(keys[1], &mut state); + par_round(keys[2], &mut state); + par_round(keys[3], &mut state); + par_round(keys[4], &mut state); + par_round(keys[5], &mut state); + par_round(keys[6], &mut state); + par_round(keys[7], &mut state); + par_round(keys[8], &mut state); + if KEYS >= 13 { + par_round(keys[9], &mut state); + par_round(keys[10], &mut state); + } + if KEYS == 15 { + par_round(keys[11], &mut state); + par_round(keys[12], &mut state); + } - vst1q_u8(out_ptr as *mut u8, state); + for i in 0..ParBlocks::USIZE { + // AES single round encryption + state[i] = vaeseq_u8(state[i], keys[KEYS - 2]); + // Final add (bitwise XOR) + state[i] = veorq_u8(state[i], keys[KEYS - 1]); + // Save encrypted blocks + vst1q_u8(out_ptr.add(i).cast(), state[i]); + } } /// Perform parallel AES decryption 8-blocks-at-a-time using the given expanded keys. #[target_feature(enable = "aes")] -#[target_feature(enable = "neon")] -pub(super) unsafe fn decrypt8( - expanded_keys: &[uint8x16_t; N], - blocks: InOut<'_, '_, Block8>, +pub(super) unsafe fn decrypt_par( + keys: &[uint8x16_t; KEYS], + blocks: InOut<'_, '_, Array>, ) { - let rounds = N - 1; - assert!(rounds == 10 || rounds == 12 || rounds == 14); + #[inline(always)] + unsafe fn par_round( + key: uint8x16_t, + state: &mut Array, + ) { + for s in state { + // AES single round decryption and inverse mix columns + *s = vaesimcq_u8(vaesdq_u8(*s, key)); + } + } + + assert!(KEYS == 11 || KEYS == 13 || KEYS == 15); let (in_ptr, out_ptr) = blocks.into_raw(); - let in_ptr = in_ptr as *const Block; - let out_ptr = out_ptr as *const Block; - - let mut state = [ - vld1q_u8(in_ptr.add(0) as *const u8), - vld1q_u8(in_ptr.add(1) as *const u8), - vld1q_u8(in_ptr.add(2) as *const u8), - vld1q_u8(in_ptr.add(3) as *const u8), - vld1q_u8(in_ptr.add(4) as *const u8), - vld1q_u8(in_ptr.add(5) as *const u8), - vld1q_u8(in_ptr.add(6) as *const u8), - vld1q_u8(in_ptr.add(7) as *const u8), - ]; - - for k in expanded_keys.iter().take(rounds - 1) { - for i in 0..8 { - // AES single round decryption - state[i] = vaesdq_u8(state[i], *k); - - // Inverse mix columns - state[i] = vaesimcq_u8(state[i]); - } + let in_ptr: *const Block = in_ptr.cast(); + let out_ptr: *mut Block = out_ptr.cast(); + + // Load encrypted blocks + let mut state: Array = mem::zeroed(); + for i in 0..ParBlocks::USIZE { + state[i] = vld1q_u8(in_ptr.add(i).cast()); } - for i in 0..8 { - // AES single round decryption - state[i] = vaesdq_u8(state[i], expanded_keys[rounds - 1]); + // Loop is intentionally not used here to enforce inlining + par_round(keys[0], &mut state); + par_round(keys[1], &mut state); + par_round(keys[2], &mut state); + par_round(keys[3], &mut state); + par_round(keys[4], &mut state); + par_round(keys[5], &mut state); + par_round(keys[6], &mut state); + par_round(keys[7], &mut state); + par_round(keys[8], &mut state); + if KEYS >= 13 { + par_round(keys[9], &mut state); + par_round(keys[10], &mut state); + } + if KEYS == 15 { + par_round(keys[11], &mut state); + par_round(keys[12], &mut state); + } + for i in 0..ParBlocks::USIZE { + // AES single round decryption + state[i] = vaesdq_u8(state[i], keys[KEYS - 2]); // Final add (bitwise XOR) - state[i] = veorq_u8(state[i], expanded_keys[rounds]); - + state[i] = veorq_u8(state[i], keys[KEYS - 1]); + // Save plaintext blocks vst1q_u8(out_ptr.add(i) as *mut u8, state[i]); } } diff --git a/aes/src/armv8/expand.rs b/aes/src/armv8/expand.rs index e932c7ca..f6c2fa7b 100644 --- a/aes/src/armv8/expand.rs +++ b/aes/src/armv8/expand.rs @@ -2,6 +2,10 @@ use core::{arch::aarch64::*, mem, slice}; +pub(super) type Aes128RoundKeys = [uint8x16_t; 11]; +pub(super) type Aes192RoundKeys = [uint8x16_t; 13]; +pub(super) type Aes256RoundKeys = [uint8x16_t; 15]; + /// There are 4 AES words in a block. const BLOCK_WORDS: usize = 4; @@ -52,14 +56,17 @@ pub unsafe fn expand_key(key: &[u8; L]) -> [uint /// This is the reverse of the encryption keys, with the Inverse Mix Columns /// operation applied to all but the first and last expanded key. #[target_feature(enable = "aes")] -pub(super) unsafe fn inv_expanded_keys(expanded_keys: &mut [uint8x16_t; N]) { +pub(super) unsafe fn inv_expanded_keys(keys: &[uint8x16_t; N]) -> [uint8x16_t; N] { assert!(N == 11 || N == 13 || N == 15); - for ek in expanded_keys.iter_mut().take(N - 1).skip(1) { - *ek = vaesimcq_u8(*ek); + let mut inv_keys: [uint8x16_t; N] = core::mem::zeroed(); + inv_keys[0] = keys[N - 1]; + for i in 1..N - 1 { + inv_keys[i] = vaesimcq_u8(keys[N - 1 - i]); } + inv_keys[N - 1] = keys[0]; - expanded_keys.reverse(); + inv_keys } /// Sub bytes for a single AES word: used for key expansion. diff --git a/aes/src/armv8/test_expand.rs b/aes/src/armv8/test_expand.rs index 23ca90aa..7e510b59 100644 --- a/aes/src/armv8/test_expand.rs +++ b/aes/src/armv8/test_expand.rs @@ -1,4 +1,4 @@ -use super::{expand_key, inv_expanded_keys}; +use super::expand::{expand_key, inv_expanded_keys}; use core::arch::aarch64::*; use hex_literal::hex; @@ -112,9 +112,9 @@ fn aes128_key_expansion() { #[test] fn aes128_key_expansion_inv() { - let mut ek = load_expanded_keys(AES128_EXP_KEYS); - unsafe { inv_expanded_keys(&mut ek) }; - assert_eq!(store_expanded_keys(ek), AES128_EXP_INVKEYS); + let ek = load_expanded_keys(AES128_EXP_KEYS); + let inv_ek = unsafe { inv_expanded_keys(&ek) }; + assert_eq!(store_expanded_keys(inv_ek), AES128_EXP_INVKEYS); } #[test] diff --git a/aes/src/lib.rs b/aes/src/lib.rs index 4f5c08b5..8be29189 100644 --- a/aes/src/lib.rs +++ b/aes/src/lib.rs @@ -123,6 +123,8 @@ #[cfg_attr(docsrs, doc(cfg(feature = "hazmat")))] pub mod hazmat; +#[macro_use] +mod macros; mod soft; use cfg_if::cfg_if; diff --git a/aes/src/macros.rs b/aes/src/macros.rs new file mode 100644 index 00000000..b223add8 --- /dev/null +++ b/aes/src/macros.rs @@ -0,0 +1,102 @@ +macro_rules! impl_backends { + ( + enc_name = $enc_name:ident, + dec_name = $dec_name:ident, + key_size = $key_size:ty, + keys_ty = $keys_ty:ty, + par_size = $par_size:ty, + expand_keys = $expand_keys:expr, + inv_keys = $inv_keys:expr, + encrypt = $encrypt:expr, + encrypt_par = $encrypt_par:expr, + decrypt = $decrypt:expr, + decrypt_par = $decrypt_par:expr, +) => { + #[derive(Clone)] + pub(crate) struct $enc_name { + keys: $keys_ty, + } + + impl cipher::BlockSizeUser for &$enc_name { + type BlockSize = cipher::consts::U16; + } + + impl cipher::ParBlocksSizeUser for &$enc_name { + type ParBlocksSize = $par_size; + } + + impl cipher::KeySizeUser for $enc_name { + type KeySize = $key_size; + } + + impl cipher::KeyInit for $enc_name { + #[inline] + fn new(key: &cipher::Key) -> Self { + let keys = unsafe { $expand_keys(key.as_ref()) }; + Self { keys } + } + } + + impl cipher::BlockBackend for &$enc_name { + #[inline(always)] + fn proc_block(&mut self, block: cipher::inout::InOut<'_, '_, cipher::Block>) { + unsafe { $encrypt(&self.keys, block) } + } + + #[inline(always)] + fn proc_par_blocks( + &mut self, + blocks: cipher::inout::InOut<'_, '_, cipher::ParBlocks>, + ) { + unsafe { $encrypt_par(&self.keys, blocks) } + } + } + + #[derive(Clone)] + pub(crate) struct $dec_name { + keys: $keys_ty, + } + + impl cipher::BlockSizeUser for &$dec_name { + type BlockSize = cipher::consts::U16; + } + + impl cipher::ParBlocksSizeUser for &$dec_name { + type ParBlocksSize = $par_size; + } + + impl cipher::KeySizeUser for $dec_name { + type KeySize = $key_size; + } + + impl cipher::KeyInit for $dec_name { + #[inline] + fn new(key: &cipher::Key) -> Self { + From::from(&$enc_name::new(key)) + } + } + + impl From<&$enc_name> for $dec_name { + #[inline] + fn from(enc: &$enc_name) -> $dec_name { + let keys = unsafe { $inv_keys(&enc.keys) }; + Self { keys } + } + } + + impl cipher::BlockBackend for &$dec_name { + #[inline(always)] + fn proc_block(&mut self, block: cipher::inout::InOut<'_, '_, cipher::Block>) { + unsafe { $decrypt(&self.keys, block) } + } + + #[inline(always)] + fn proc_par_blocks( + &mut self, + blocks: cipher::inout::InOut<'_, '_, cipher::ParBlocks>, + ) { + unsafe { $decrypt_par(&self.keys, blocks) } + } + } + }; +} diff --git a/aes/src/ni.rs b/aes/src/ni.rs index 95117aad..d08137bc 100644 --- a/aes/src/ni.rs +++ b/aes/src/ni.rs @@ -15,13 +15,8 @@ //! - [Intel AES-NI whitepaper](https://software.intel.com/sites/default/files/article/165683/aes-wp-2012-09-22-v01.pdf) //! - [Use of the AES Instruction Set](https://www.cosic.esat.kuleuven.be/ecrypt/AESday/slides/Use_of_the_AES_Instruction_Set.pdf) -#[macro_use] -mod utils; - -mod aes128; -mod aes192; -mod aes256; - +mod encdec; +mod expand; #[cfg(test)] mod test_expand; @@ -33,15 +28,55 @@ use core::arch::x86 as arch; #[cfg(target_arch = "x86_64")] use core::arch::x86_64 as arch; -use crate::{Block, Block8}; use cipher::{ - consts::{U16, U24, U32, U8}, - inout::InOut, - AlgorithmName, BlockBackend, BlockCipher, BlockCipherDecrypt, BlockCipherEncrypt, BlockClosure, - BlockSizeUser, Key, KeyInit, KeySizeUser, ParBlocksSizeUser, + consts::{self, U16, U24, U32}, + AlgorithmName, BlockCipher, BlockCipherDecrypt, BlockCipherEncrypt, BlockClosure, + BlockSizeUser, Key, KeyInit, KeySizeUser, }; use core::fmt; +impl_backends!( + enc_name = Aes128BackEnc, + dec_name = Aes128BackDec, + key_size = consts::U16, + keys_ty = expand::Aes128RoundKeys, + par_size = consts::U15, + expand_keys = expand::aes128_expand_key, + inv_keys = expand::inv_keys, + encrypt = encdec::encrypt, + encrypt_par = encdec::encrypt_par, + decrypt = encdec::decrypt, + decrypt_par = encdec::decrypt_par, +); + +impl_backends!( + enc_name = Aes192BackEnc, + dec_name = Aes192BackDec, + key_size = consts::U24, + keys_ty = expand::Aes192RoundKeys, + par_size = consts::U15, + expand_keys = expand::aes192_expand_key, + inv_keys = expand::inv_keys, + encrypt = encdec::encrypt, + encrypt_par = encdec::encrypt_par, + decrypt = encdec::decrypt, + decrypt_par = encdec::decrypt_par, +); + +impl_backends!( + enc_name = Aes256BackEnc, + dec_name = Aes256BackDec, + key_size = consts::U32, + keys_ty = expand::Aes256RoundKeys, + par_size = consts::U15, + expand_keys = expand::aes256_expand_key, + inv_keys = expand::inv_keys, + encrypt = encdec::encrypt, + encrypt_par = encdec::encrypt_par, + decrypt = encdec::decrypt, + decrypt_par = encdec::decrypt_par, +); + macro_rules! define_aes_impl { ( $name:tt, @@ -49,7 +84,6 @@ macro_rules! define_aes_impl { $name_dec:ident, $name_back_enc:ident, $name_back_dec:ident, - $module:tt, $key_size:ty, $doc:expr $(,)? ) => { @@ -63,12 +97,12 @@ macro_rules! define_aes_impl { impl $name { #[inline(always)] - pub(crate) fn get_enc_backend(&self) -> $name_back_enc<'_> { + pub(crate) fn get_enc_backend(&self) -> &$name_back_enc { self.encrypt.get_enc_backend() } #[inline(always)] - pub(crate) fn get_dec_backend(&self) -> $name_back_dec<'_> { + pub(crate) fn get_dec_backend(&self) -> &$name_back_dec { self.decrypt.get_dec_backend() } } @@ -140,13 +174,13 @@ macro_rules! define_aes_impl { #[doc = "block cipher (encrypt-only)"] #[derive(Clone)] pub struct $name_enc { - round_keys: $module::RoundKeys, + backend: $name_back_enc, } impl $name_enc { #[inline(always)] - pub(crate) fn get_enc_backend(&self) -> $name_back_enc<'_> { - $name_back_enc(self) + pub(crate) fn get_enc_backend(&self) -> &$name_back_enc { + &self.backend } } @@ -159,10 +193,8 @@ macro_rules! define_aes_impl { impl KeyInit for $name_enc { #[inline] fn new(key: &Key) -> Self { - // SAFETY: we enforce that this code is called only when - // target features required by `expand` were properly checked. Self { - round_keys: unsafe { $module::expand_key(key.as_ref()) }, + backend: $name_back_enc::new(key), } } } @@ -173,7 +205,7 @@ macro_rules! define_aes_impl { impl BlockCipherEncrypt for $name_enc { fn encrypt_with_backend(&self, f: impl BlockClosure) { - f.call(&mut self.get_enc_backend()) + f.call(&mut &self.backend) } } @@ -193,7 +225,9 @@ macro_rules! define_aes_impl { #[inline] fn drop(&mut self) { #[cfg(feature = "zeroize")] - zeroize::Zeroize::zeroize(&mut self.round_keys); + unsafe { + zeroize::zeroize_flat_type(&mut self.backend) + } } } @@ -204,13 +238,13 @@ macro_rules! define_aes_impl { #[doc = "block cipher (decrypt-only)"] #[derive(Clone)] pub struct $name_dec { - round_keys: $module::RoundKeys, + backend: $name_back_dec, } impl $name_dec { #[inline(always)] - pub(crate) fn get_dec_backend(&self) -> $name_back_dec<'_> { - $name_back_dec(self) + pub(crate) fn get_dec_backend(&self) -> &$name_back_dec { + &self.backend } } @@ -237,8 +271,9 @@ macro_rules! define_aes_impl { impl From<&$name_enc> for $name_dec { #[inline] fn from(enc: &$name_enc) -> $name_dec { - let round_keys = unsafe { $module::inv_expanded_keys(&enc.round_keys) }; - Self { round_keys } + Self { + backend: (&enc.backend).into(), + } } } @@ -268,64 +303,14 @@ macro_rules! define_aes_impl { #[inline] fn drop(&mut self) { #[cfg(feature = "zeroize")] - zeroize::Zeroize::zeroize(&mut self.round_keys); - } - } - - #[cfg(feature = "zeroize")] - impl zeroize::ZeroizeOnDrop for $name_dec {} - - pub(crate) struct $name_back_enc<'a>(&'a $name_enc); - - impl<'a> BlockSizeUser for $name_back_enc<'a> { - type BlockSize = U16; - } - - impl<'a> ParBlocksSizeUser for $name_back_enc<'a> { - type ParBlocksSize = U8; - } - - impl<'a> BlockBackend for $name_back_enc<'a> { - #[inline(always)] - fn proc_block(&mut self, block: InOut<'_, '_, Block>) { - unsafe { - $module::encrypt1(&self.0.round_keys, block); - } - } - - #[inline(always)] - fn proc_par_blocks(&mut self, blocks: InOut<'_, '_, Block8>) { unsafe { - $module::encrypt8(&self.0.round_keys, blocks); + zeroize::zeroize_flat_type(&mut self.backend) } } } - pub(crate) struct $name_back_dec<'a>(&'a $name_dec); - - impl<'a> BlockSizeUser for $name_back_dec<'a> { - type BlockSize = U16; - } - - impl<'a> ParBlocksSizeUser for $name_back_dec<'a> { - type ParBlocksSize = U8; - } - - impl<'a> BlockBackend for $name_back_dec<'a> { - #[inline(always)] - fn proc_block(&mut self, block: InOut<'_, '_, Block>) { - unsafe { - $module::decrypt1(&self.0.round_keys, block); - } - } - - #[inline(always)] - fn proc_par_blocks(&mut self, blocks: InOut<'_, '_, Block8>) { - unsafe { - $module::decrypt8(&self.0.round_keys, blocks); - } - } - } + #[cfg(feature = "zeroize")] + impl zeroize::ZeroizeOnDrop for $name_dec {} }; } @@ -335,7 +320,6 @@ define_aes_impl!( Aes128Dec, Aes128BackEnc, Aes128BackDec, - aes128, U16, "AES-128", ); @@ -346,7 +330,6 @@ define_aes_impl!( Aes192Dec, Aes192BackEnc, Aes192BackDec, - aes192, U24, "AES-192", ); @@ -357,7 +340,6 @@ define_aes_impl!( Aes256Dec, Aes256BackEnc, Aes256BackDec, - aes256, U32, "AES-256", ); diff --git a/aes/src/ni/aes128.rs b/aes/src/ni/aes128.rs deleted file mode 100644 index b0836a16..00000000 --- a/aes/src/ni/aes128.rs +++ /dev/null @@ -1,145 +0,0 @@ -use super::{arch::*, utils::*}; -use crate::{Block, Block8}; -use cipher::inout::InOut; -use core::mem; - -/// AES-128 round keys -pub(super) type RoundKeys = [__m128i; 11]; - -#[inline] -#[target_feature(enable = "aes")] -pub(super) unsafe fn encrypt1(keys: &RoundKeys, block: InOut<'_, '_, Block>) { - let (in_ptr, out_ptr) = block.into_raw(); - let mut b = _mm_loadu_si128(in_ptr as *const __m128i); - b = _mm_xor_si128(b, keys[0]); - b = _mm_aesenc_si128(b, keys[1]); - b = _mm_aesenc_si128(b, keys[2]); - b = _mm_aesenc_si128(b, keys[3]); - b = _mm_aesenc_si128(b, keys[4]); - b = _mm_aesenc_si128(b, keys[5]); - b = _mm_aesenc_si128(b, keys[6]); - b = _mm_aesenc_si128(b, keys[7]); - b = _mm_aesenc_si128(b, keys[8]); - b = _mm_aesenc_si128(b, keys[9]); - b = _mm_aesenclast_si128(b, keys[10]); - _mm_storeu_si128(out_ptr as *mut __m128i, b); -} - -#[inline] -#[target_feature(enable = "aes")] -pub(super) unsafe fn encrypt8(keys: &RoundKeys, blocks: InOut<'_, '_, Block8>) { - let (in_ptr, out_ptr) = blocks.into_raw(); - let mut b = load8(in_ptr); - xor8(&mut b, keys[0]); - aesenc8(&mut b, keys[1]); - aesenc8(&mut b, keys[2]); - aesenc8(&mut b, keys[3]); - aesenc8(&mut b, keys[4]); - aesenc8(&mut b, keys[5]); - aesenc8(&mut b, keys[6]); - aesenc8(&mut b, keys[7]); - aesenc8(&mut b, keys[8]); - aesenc8(&mut b, keys[9]); - aesenclast8(&mut b, keys[10]); - store8(out_ptr, b); -} - -#[inline] -#[target_feature(enable = "aes")] -pub(super) unsafe fn decrypt1(keys: &RoundKeys, block: InOut<'_, '_, Block>) { - let (in_ptr, out_ptr) = block.into_raw(); - let mut b = _mm_loadu_si128(in_ptr as *const __m128i); - b = _mm_xor_si128(b, keys[10]); - b = _mm_aesdec_si128(b, keys[9]); - b = _mm_aesdec_si128(b, keys[8]); - b = _mm_aesdec_si128(b, keys[7]); - b = _mm_aesdec_si128(b, keys[6]); - b = _mm_aesdec_si128(b, keys[5]); - b = _mm_aesdec_si128(b, keys[4]); - b = _mm_aesdec_si128(b, keys[3]); - b = _mm_aesdec_si128(b, keys[2]); - b = _mm_aesdec_si128(b, keys[1]); - b = _mm_aesdeclast_si128(b, keys[0]); - _mm_storeu_si128(out_ptr as *mut __m128i, b); -} - -#[inline] -#[target_feature(enable = "aes")] -pub(super) unsafe fn decrypt8(keys: &RoundKeys, blocks: InOut<'_, '_, Block8>) { - let (in_ptr, out_ptr) = blocks.into_raw(); - let mut b = load8(in_ptr); - xor8(&mut b, keys[10]); - aesdec8(&mut b, keys[9]); - aesdec8(&mut b, keys[8]); - aesdec8(&mut b, keys[7]); - aesdec8(&mut b, keys[6]); - aesdec8(&mut b, keys[5]); - aesdec8(&mut b, keys[4]); - aesdec8(&mut b, keys[3]); - aesdec8(&mut b, keys[2]); - aesdec8(&mut b, keys[1]); - aesdeclast8(&mut b, keys[0]); - store8(out_ptr, b); -} - -macro_rules! expand_round { - ($keys:expr, $pos:expr, $round:expr) => { - let mut t1 = $keys[$pos - 1]; - let mut t2; - let mut t3; - - t2 = _mm_aeskeygenassist_si128(t1, $round); - t2 = _mm_shuffle_epi32(t2, 0xff); - t3 = _mm_slli_si128(t1, 0x4); - t1 = _mm_xor_si128(t1, t3); - t3 = _mm_slli_si128(t3, 0x4); - t1 = _mm_xor_si128(t1, t3); - t3 = _mm_slli_si128(t3, 0x4); - t1 = _mm_xor_si128(t1, t3); - t1 = _mm_xor_si128(t1, t2); - - $keys[$pos] = t1; - }; -} - -#[inline] -#[target_feature(enable = "aes")] -pub(super) unsafe fn expand_key(key: &[u8; 16]) -> RoundKeys { - // SAFETY: `RoundKeys` is a `[__m128i; 11]` which can be initialized - // with all zeroes. - let mut keys: RoundKeys = mem::zeroed(); - - let k = _mm_loadu_si128(key.as_ptr() as *const __m128i); - keys[0] = k; - - expand_round!(keys, 1, 0x01); - expand_round!(keys, 2, 0x02); - expand_round!(keys, 3, 0x04); - expand_round!(keys, 4, 0x08); - expand_round!(keys, 5, 0x10); - expand_round!(keys, 6, 0x20); - expand_round!(keys, 7, 0x40); - expand_round!(keys, 8, 0x80); - expand_round!(keys, 9, 0x1B); - expand_round!(keys, 10, 0x36); - - keys -} - -#[inline] -#[target_feature(enable = "aes")] -pub(super) unsafe fn inv_expanded_keys(keys: &RoundKeys) -> RoundKeys { - [ - keys[0], - _mm_aesimc_si128(keys[1]), - _mm_aesimc_si128(keys[2]), - _mm_aesimc_si128(keys[3]), - _mm_aesimc_si128(keys[4]), - _mm_aesimc_si128(keys[5]), - _mm_aesimc_si128(keys[6]), - _mm_aesimc_si128(keys[7]), - _mm_aesimc_si128(keys[8]), - _mm_aesimc_si128(keys[9]), - keys[10], - ] -} diff --git a/aes/src/ni/aes192.rs b/aes/src/ni/aes192.rs deleted file mode 100644 index 74a4e374..00000000 --- a/aes/src/ni/aes192.rs +++ /dev/null @@ -1,198 +0,0 @@ -use super::{arch::*, utils::*}; -use crate::{Block, Block8}; -use cipher::inout::InOut; -use core::{mem, ptr}; - -/// AES-192 round keys -pub(super) type RoundKeys = [__m128i; 13]; - -#[inline] -#[target_feature(enable = "aes")] -pub(super) unsafe fn encrypt1(keys: &RoundKeys, block: InOut<'_, '_, Block>) { - let (in_ptr, out_ptr) = block.into_raw(); - let mut b = _mm_loadu_si128(in_ptr as *const __m128i); - b = _mm_xor_si128(b, keys[0]); - b = _mm_aesenc_si128(b, keys[1]); - b = _mm_aesenc_si128(b, keys[2]); - b = _mm_aesenc_si128(b, keys[3]); - b = _mm_aesenc_si128(b, keys[4]); - b = _mm_aesenc_si128(b, keys[5]); - b = _mm_aesenc_si128(b, keys[6]); - b = _mm_aesenc_si128(b, keys[7]); - b = _mm_aesenc_si128(b, keys[8]); - b = _mm_aesenc_si128(b, keys[9]); - b = _mm_aesenc_si128(b, keys[10]); - b = _mm_aesenc_si128(b, keys[11]); - b = _mm_aesenclast_si128(b, keys[12]); - _mm_storeu_si128(out_ptr as *mut __m128i, b); -} - -#[inline] -#[target_feature(enable = "aes")] -pub(super) unsafe fn encrypt8(keys: &RoundKeys, blocks: InOut<'_, '_, Block8>) { - let (in_ptr, out_ptr) = blocks.into_raw(); - let mut b = load8(in_ptr); - xor8(&mut b, keys[0]); - aesenc8(&mut b, keys[1]); - aesenc8(&mut b, keys[2]); - aesenc8(&mut b, keys[3]); - aesenc8(&mut b, keys[4]); - aesenc8(&mut b, keys[5]); - aesenc8(&mut b, keys[6]); - aesenc8(&mut b, keys[7]); - aesenc8(&mut b, keys[8]); - aesenc8(&mut b, keys[9]); - aesenc8(&mut b, keys[10]); - aesenc8(&mut b, keys[11]); - aesenclast8(&mut b, keys[12]); - store8(out_ptr, b); -} - -#[inline] -#[target_feature(enable = "aes")] -pub(super) unsafe fn decrypt1(keys: &RoundKeys, block: InOut<'_, '_, Block>) { - let (in_ptr, out_ptr) = block.into_raw(); - let mut b = _mm_loadu_si128(in_ptr as *const __m128i); - b = _mm_xor_si128(b, keys[12]); - b = _mm_aesdec_si128(b, keys[11]); - b = _mm_aesdec_si128(b, keys[10]); - b = _mm_aesdec_si128(b, keys[9]); - b = _mm_aesdec_si128(b, keys[8]); - b = _mm_aesdec_si128(b, keys[7]); - b = _mm_aesdec_si128(b, keys[6]); - b = _mm_aesdec_si128(b, keys[5]); - b = _mm_aesdec_si128(b, keys[4]); - b = _mm_aesdec_si128(b, keys[3]); - b = _mm_aesdec_si128(b, keys[2]); - b = _mm_aesdec_si128(b, keys[1]); - b = _mm_aesdeclast_si128(b, keys[0]); - _mm_storeu_si128(out_ptr as *mut __m128i, b); -} - -#[inline] -#[target_feature(enable = "aes")] -pub(super) unsafe fn decrypt8(keys: &RoundKeys, blocks: InOut<'_, '_, Block8>) { - let (in_ptr, out_ptr) = blocks.into_raw(); - let mut b = load8(in_ptr); - xor8(&mut b, keys[12]); - aesdec8(&mut b, keys[11]); - aesdec8(&mut b, keys[10]); - aesdec8(&mut b, keys[9]); - aesdec8(&mut b, keys[8]); - aesdec8(&mut b, keys[7]); - aesdec8(&mut b, keys[6]); - aesdec8(&mut b, keys[5]); - aesdec8(&mut b, keys[4]); - aesdec8(&mut b, keys[3]); - aesdec8(&mut b, keys[2]); - aesdec8(&mut b, keys[1]); - aesdeclast8(&mut b, keys[0]); - store8(out_ptr, b); -} - -macro_rules! expand_round { - ($t1:expr, $t3:expr, $round:expr) => {{ - let mut t1 = $t1; - let mut t2; - let mut t3 = $t3; - let mut t4; - - t2 = _mm_aeskeygenassist_si128(t3, $round); - t2 = _mm_shuffle_epi32(t2, 0x55); - t4 = _mm_slli_si128(t1, 0x4); - t1 = _mm_xor_si128(t1, t4); - t4 = _mm_slli_si128(t4, 0x4); - t1 = _mm_xor_si128(t1, t4); - t4 = _mm_slli_si128(t4, 0x4); - t1 = _mm_xor_si128(t1, t4); - t1 = _mm_xor_si128(t1, t2); - t2 = _mm_shuffle_epi32(t1, 0xff); - t4 = _mm_slli_si128(t3, 0x4); - t3 = _mm_xor_si128(t3, t4); - t3 = _mm_xor_si128(t3, t2); - - (t1, t3) - }}; -} - -#[inline(always)] -unsafe fn shuffle(a: __m128i, b: __m128i, i: usize) -> __m128i { - let a: [u64; 2] = mem::transmute(a); - let b: [u64; 2] = mem::transmute(b); - mem::transmute([a[i], b[0]]) -} - -#[inline] -#[target_feature(enable = "aes")] -pub(super) unsafe fn expand_key(key: &[u8; 24]) -> RoundKeys { - // SAFETY: `RoundKeys` is a `[__m128i; 13]` which can be initialized - // with all zeroes. - let mut keys: RoundKeys = mem::zeroed(); - // we are being extra pedantic here to remove out-of-bound access. - // this should be optimized out into movups, movsd sequence - // note that unaligned load MUST be used here, even though we read - // from the array (compiler missoptimizes aligned load) - let (k0, k1l) = { - let mut t = [0u8; 32]; - ptr::write(t.as_mut_ptr() as *mut [u8; 24], *key); - - ( - _mm_loadu_si128(t.as_ptr() as *const __m128i), - _mm_loadu_si128(t.as_ptr().offset(16) as *const __m128i), - ) - }; - - keys[0] = k0; - - let (k1_2, k2r) = expand_round!(k0, k1l, 0x01); - keys[1] = shuffle(k1l, k1_2, 0); - keys[2] = shuffle(k1_2, k2r, 1); - - let (k3, k4l) = expand_round!(k1_2, k2r, 0x02); - keys[3] = k3; - - let (k4_5, k5r) = expand_round!(k3, k4l, 0x04); - let k4 = shuffle(k4l, k4_5, 0); - let k5 = shuffle(k4_5, k5r, 1); - keys[4] = k4; - keys[5] = k5; - - let (k6, k7l) = expand_round!(k4_5, k5r, 0x08); - keys[6] = k6; - - let (k7_8, k8r) = expand_round!(k6, k7l, 0x10); - keys[7] = shuffle(k7l, k7_8, 0); - keys[8] = shuffle(k7_8, k8r, 1); - - let (k9, k10l) = expand_round!(k7_8, k8r, 0x20); - keys[9] = k9; - - let (k10_11, k11r) = expand_round!(k9, k10l, 0x40); - keys[10] = shuffle(k10l, k10_11, 0); - keys[11] = shuffle(k10_11, k11r, 1); - - let (k12, _) = expand_round!(k10_11, k11r, 0x80); - keys[12] = k12; - - keys -} - -#[inline] -#[target_feature(enable = "aes")] -pub(super) unsafe fn inv_expanded_keys(keys: &RoundKeys) -> RoundKeys { - [ - keys[0], - _mm_aesimc_si128(keys[1]), - _mm_aesimc_si128(keys[2]), - _mm_aesimc_si128(keys[3]), - _mm_aesimc_si128(keys[4]), - _mm_aesimc_si128(keys[5]), - _mm_aesimc_si128(keys[6]), - _mm_aesimc_si128(keys[7]), - _mm_aesimc_si128(keys[8]), - _mm_aesimc_si128(keys[9]), - _mm_aesimc_si128(keys[10]), - _mm_aesimc_si128(keys[11]), - keys[12], - ] -} diff --git a/aes/src/ni/aes256.rs b/aes/src/ni/aes256.rs deleted file mode 100644 index b42e2e65..00000000 --- a/aes/src/ni/aes256.rs +++ /dev/null @@ -1,196 +0,0 @@ -use super::{arch::*, utils::*}; -use crate::{Block, Block8}; -use cipher::inout::InOut; -use core::mem; - -/// AES-256 round keys -pub(super) type RoundKeys = [__m128i; 15]; - -#[inline] -#[target_feature(enable = "aes")] -pub(super) unsafe fn encrypt1(keys: &RoundKeys, block: InOut<'_, '_, Block>) { - let (in_ptr, out_ptr) = block.into_raw(); - let mut b = _mm_loadu_si128(in_ptr as *const __m128i); - b = _mm_xor_si128(b, keys[0]); - b = _mm_aesenc_si128(b, keys[1]); - b = _mm_aesenc_si128(b, keys[2]); - b = _mm_aesenc_si128(b, keys[3]); - b = _mm_aesenc_si128(b, keys[4]); - b = _mm_aesenc_si128(b, keys[5]); - b = _mm_aesenc_si128(b, keys[6]); - b = _mm_aesenc_si128(b, keys[7]); - b = _mm_aesenc_si128(b, keys[8]); - b = _mm_aesenc_si128(b, keys[9]); - b = _mm_aesenc_si128(b, keys[10]); - b = _mm_aesenc_si128(b, keys[11]); - b = _mm_aesenc_si128(b, keys[12]); - b = _mm_aesenc_si128(b, keys[13]); - b = _mm_aesenclast_si128(b, keys[14]); - _mm_storeu_si128(out_ptr as *mut __m128i, b); -} - -#[inline] -#[target_feature(enable = "aes")] -pub(super) unsafe fn encrypt8(keys: &RoundKeys, blocks: InOut<'_, '_, Block8>) { - let (in_ptr, out_ptr) = blocks.into_raw(); - let mut b = load8(in_ptr); - xor8(&mut b, keys[0]); - aesenc8(&mut b, keys[1]); - aesenc8(&mut b, keys[2]); - aesenc8(&mut b, keys[3]); - aesenc8(&mut b, keys[4]); - aesenc8(&mut b, keys[5]); - aesenc8(&mut b, keys[6]); - aesenc8(&mut b, keys[7]); - aesenc8(&mut b, keys[8]); - aesenc8(&mut b, keys[9]); - aesenc8(&mut b, keys[10]); - aesenc8(&mut b, keys[11]); - aesenc8(&mut b, keys[12]); - aesenc8(&mut b, keys[13]); - aesenclast8(&mut b, keys[14]); - store8(out_ptr, b); -} - -#[inline] -#[target_feature(enable = "aes")] -pub(super) unsafe fn decrypt1(keys: &RoundKeys, block: InOut<'_, '_, Block>) { - let (in_ptr, out_ptr) = block.into_raw(); - let mut b = _mm_loadu_si128(in_ptr as *const __m128i); - b = _mm_xor_si128(b, keys[14]); - b = _mm_aesdec_si128(b, keys[13]); - b = _mm_aesdec_si128(b, keys[12]); - b = _mm_aesdec_si128(b, keys[11]); - b = _mm_aesdec_si128(b, keys[10]); - b = _mm_aesdec_si128(b, keys[9]); - b = _mm_aesdec_si128(b, keys[8]); - b = _mm_aesdec_si128(b, keys[7]); - b = _mm_aesdec_si128(b, keys[6]); - b = _mm_aesdec_si128(b, keys[5]); - b = _mm_aesdec_si128(b, keys[4]); - b = _mm_aesdec_si128(b, keys[3]); - b = _mm_aesdec_si128(b, keys[2]); - b = _mm_aesdec_si128(b, keys[1]); - b = _mm_aesdeclast_si128(b, keys[0]); - _mm_storeu_si128(out_ptr as *mut __m128i, b); -} - -#[inline] -#[target_feature(enable = "aes")] -pub(super) unsafe fn decrypt8(keys: &RoundKeys, blocks: InOut<'_, '_, Block8>) { - let (in_ptr, out_ptr) = blocks.into_raw(); - let mut b = load8(in_ptr); - xor8(&mut b, keys[14]); - aesdec8(&mut b, keys[13]); - aesdec8(&mut b, keys[12]); - aesdec8(&mut b, keys[11]); - aesdec8(&mut b, keys[10]); - aesdec8(&mut b, keys[9]); - aesdec8(&mut b, keys[8]); - aesdec8(&mut b, keys[7]); - aesdec8(&mut b, keys[6]); - aesdec8(&mut b, keys[5]); - aesdec8(&mut b, keys[4]); - aesdec8(&mut b, keys[3]); - aesdec8(&mut b, keys[2]); - aesdec8(&mut b, keys[1]); - aesdeclast8(&mut b, keys[0]); - store8(out_ptr, b); -} - -macro_rules! expand_round { - ($keys:expr, $pos:expr, $round:expr) => { - let mut t1 = $keys[$pos - 2]; - let mut t2; - let mut t3 = $keys[$pos - 1]; - let mut t4; - - t2 = _mm_aeskeygenassist_si128(t3, $round); - t2 = _mm_shuffle_epi32(t2, 0xff); - t4 = _mm_slli_si128(t1, 0x4); - t1 = _mm_xor_si128(t1, t4); - t4 = _mm_slli_si128(t4, 0x4); - t1 = _mm_xor_si128(t1, t4); - t4 = _mm_slli_si128(t4, 0x4); - t1 = _mm_xor_si128(t1, t4); - t1 = _mm_xor_si128(t1, t2); - - $keys[$pos] = t1; - - t4 = _mm_aeskeygenassist_si128(t1, 0x00); - t2 = _mm_shuffle_epi32(t4, 0xaa); - t4 = _mm_slli_si128(t3, 0x4); - t3 = _mm_xor_si128(t3, t4); - t4 = _mm_slli_si128(t4, 0x4); - t3 = _mm_xor_si128(t3, t4); - t4 = _mm_slli_si128(t4, 0x4); - t3 = _mm_xor_si128(t3, t4); - t3 = _mm_xor_si128(t3, t2); - - $keys[$pos + 1] = t3; - }; -} - -macro_rules! expand_round_last { - ($keys:expr, $pos:expr, $round:expr) => { - let mut t1 = $keys[$pos - 2]; - let mut t2; - let t3 = $keys[$pos - 1]; - let mut t4; - - t2 = _mm_aeskeygenassist_si128(t3, $round); - t2 = _mm_shuffle_epi32(t2, 0xff); - t4 = _mm_slli_si128(t1, 0x4); - t1 = _mm_xor_si128(t1, t4); - t4 = _mm_slli_si128(t4, 0x4); - t1 = _mm_xor_si128(t1, t4); - t4 = _mm_slli_si128(t4, 0x4); - t1 = _mm_xor_si128(t1, t4); - t1 = _mm_xor_si128(t1, t2); - - $keys[$pos] = t1; - }; -} - -#[inline(always)] -pub(super) unsafe fn expand_key(key: &[u8; 32]) -> RoundKeys { - // SAFETY: `RoundKeys` is a `[__m128i; 15]` which can be initialized - // with all zeroes. - let mut keys: RoundKeys = mem::zeroed(); - - let kp = key.as_ptr() as *const __m128i; - keys[0] = _mm_loadu_si128(kp); - keys[1] = _mm_loadu_si128(kp.add(1)); - - expand_round!(keys, 2, 0x01); - expand_round!(keys, 4, 0x02); - expand_round!(keys, 6, 0x04); - expand_round!(keys, 8, 0x08); - expand_round!(keys, 10, 0x10); - expand_round!(keys, 12, 0x20); - expand_round_last!(keys, 14, 0x40); - - keys -} - -#[inline] -#[target_feature(enable = "aes")] -pub(super) unsafe fn inv_expanded_keys(keys: &RoundKeys) -> RoundKeys { - [ - keys[0], - _mm_aesimc_si128(keys[1]), - _mm_aesimc_si128(keys[2]), - _mm_aesimc_si128(keys[3]), - _mm_aesimc_si128(keys[4]), - _mm_aesimc_si128(keys[5]), - _mm_aesimc_si128(keys[6]), - _mm_aesimc_si128(keys[7]), - _mm_aesimc_si128(keys[8]), - _mm_aesimc_si128(keys[9]), - _mm_aesimc_si128(keys[10]), - _mm_aesimc_si128(keys[11]), - _mm_aesimc_si128(keys[12]), - _mm_aesimc_si128(keys[13]), - keys[14], - ] -} diff --git a/aes/src/ni/encdec.rs b/aes/src/ni/encdec.rs new file mode 100644 index 00000000..2f763ee4 --- /dev/null +++ b/aes/src/ni/encdec.rs @@ -0,0 +1,159 @@ +use super::arch::*; +use crate::Block; +use cipher::{ + array::{Array, ArraySize}, + inout::InOut, +}; + +#[target_feature(enable = "aes")] +pub(super) unsafe fn encrypt( + keys: &[__m128i; KEYS], + block: InOut<'_, '_, Block>, +) { + assert!(KEYS == 11 || KEYS == 13 || KEYS == 15); + + let (block_in, block_out) = block.into_raw(); + let mut b = _mm_loadu_si128(block_in.cast()); + b = _mm_xor_si128(b, keys[0]); + for &key in &keys[1..KEYS - 1] { + b = _mm_aesenc_si128(b, key); + } + b = _mm_aesenclast_si128(b, keys[KEYS - 1]); + _mm_storeu_si128(block_out.cast(), b); +} + +#[target_feature(enable = "aes")] +pub(super) unsafe fn decrypt( + keys: &[__m128i; KEYS], + block: InOut<'_, '_, Block>, +) { + assert!(KEYS == 11 || KEYS == 13 || KEYS == 15); + + let (block_in, block_out) = block.into_raw(); + let mut b = _mm_loadu_si128(block_in.cast()); + b = _mm_xor_si128(b, keys[0]); + for &key in &keys[1..KEYS - 1] { + b = _mm_aesdec_si128(b, key); + } + b = _mm_aesdeclast_si128(b, keys[KEYS - 1]); + _mm_storeu_si128(block_out.cast(), b); +} + +#[target_feature(enable = "aes")] +pub(super) unsafe fn encrypt_par( + keys: &[__m128i; KEYS], + blocks: InOut<'_, '_, Array>, +) { + assert!(KEYS == 11 || KEYS == 13 || KEYS == 15); + + let (blocks_in, blocks_out) = blocks.into_raw(); + let mut b = load(blocks_in); + + // Loop over keys is intentionally not used here to force inlining + xor(&mut b, keys[0]); + aesenc(&mut b, keys[1]); + aesenc(&mut b, keys[2]); + aesenc(&mut b, keys[3]); + aesenc(&mut b, keys[4]); + aesenc(&mut b, keys[5]); + aesenc(&mut b, keys[6]); + aesenc(&mut b, keys[7]); + aesenc(&mut b, keys[8]); + aesenc(&mut b, keys[9]); + if KEYS >= 13 { + aesenc(&mut b, keys[10]); + aesenc(&mut b, keys[11]); + } + if KEYS == 15 { + aesenc(&mut b, keys[12]); + aesenc(&mut b, keys[13]); + } + aesenclast(&mut b, keys[KEYS - 1]); + store(blocks_out, b); +} + +#[target_feature(enable = "aes")] +pub(super) unsafe fn decrypt_par( + keys: &[__m128i; KEYS], + blocks: InOut<'_, '_, Array>, +) { + assert!(KEYS == 11 || KEYS == 13 || KEYS == 15); + + let (blocks_in, blocks_out) = blocks.into_raw(); + let mut b = load(blocks_in); + + // Loop over keys is intentionally not used here to force inlining + xor(&mut b, keys[0]); + aesdec(&mut b, keys[1]); + aesdec(&mut b, keys[2]); + aesdec(&mut b, keys[3]); + aesdec(&mut b, keys[4]); + aesdec(&mut b, keys[5]); + aesdec(&mut b, keys[6]); + aesdec(&mut b, keys[7]); + aesdec(&mut b, keys[8]); + aesdec(&mut b, keys[9]); + if KEYS >= 13 { + aesdec(&mut b, keys[10]); + aesdec(&mut b, keys[11]); + } + if KEYS == 15 { + aesdec(&mut b, keys[12]); + aesdec(&mut b, keys[13]); + } + aesdeclast(&mut b, keys[KEYS - 1]); + store(blocks_out, b); +} + +#[target_feature(enable = "sse2")] +pub(crate) unsafe fn load(blocks: *const Array) -> Array<__m128i, N> { + let p = blocks.cast::<__m128i>(); + let mut res: Array<__m128i, N> = core::mem::zeroed(); + for i in 0..N::USIZE { + res[i] = _mm_loadu_si128(p.add(i)); + } + res +} + +#[target_feature(enable = "sse2")] +pub(crate) unsafe fn store(blocks: *mut Array, b: Array<__m128i, N>) { + let p = blocks.cast::<__m128i>(); + for i in 0..N::USIZE { + _mm_storeu_si128(p.add(i), b[i]); + } +} + +#[target_feature(enable = "sse2")] +pub(crate) unsafe fn xor(blocks: &mut Array<__m128i, N>, key: __m128i) { + for block in blocks { + *block = _mm_xor_si128(*block, key); + } +} + +#[target_feature(enable = "aes")] +pub(crate) unsafe fn aesenc(blocks: &mut Array<__m128i, N>, key: __m128i) { + for block in blocks { + *block = _mm_aesenc_si128(*block, key); + } +} + +#[target_feature(enable = "aes")] +pub(crate) unsafe fn aesenclast(blocks: &mut Array<__m128i, N>, key: __m128i) { + for block in blocks { + *block = _mm_aesenclast_si128(*block, key); + } +} + +#[target_feature(enable = "aes")] +pub(crate) unsafe fn aesdec(blocks: &mut Array<__m128i, N>, key: __m128i) { + for block in blocks { + *block = _mm_aesdec_si128(*block, key); + } +} + +#[target_feature(enable = "aes")] +pub(crate) unsafe fn aesdeclast(blocks: &mut Array<__m128i, N>, key: __m128i) { + for block in blocks { + *block = _mm_aesdeclast_si128(*block, key); + } +} diff --git a/aes/src/ni/expand.rs b/aes/src/ni/expand.rs new file mode 100644 index 00000000..afbc6587 --- /dev/null +++ b/aes/src/ni/expand.rs @@ -0,0 +1,202 @@ +use super::arch::*; +use core::mem::{transmute, zeroed}; + +pub(super) type Aes128RoundKeys = [__m128i; 11]; +pub(super) type Aes192RoundKeys = [__m128i; 13]; +pub(super) type Aes256RoundKeys = [__m128i; 15]; + +#[target_feature(enable = "aes")] +pub(super) unsafe fn aes128_expand_key(key: &[u8; 16]) -> Aes128RoundKeys { + unsafe fn expand_round(keys: &mut Aes128RoundKeys, pos: usize) { + let mut t1 = keys[pos - 1]; + let mut t2; + let mut t3; + + t2 = _mm_aeskeygenassist_si128(t1, RK); + t2 = _mm_shuffle_epi32(t2, 0xff); + t3 = _mm_slli_si128(t1, 0x4); + t1 = _mm_xor_si128(t1, t3); + t3 = _mm_slli_si128(t3, 0x4); + t1 = _mm_xor_si128(t1, t3); + t3 = _mm_slli_si128(t3, 0x4); + t1 = _mm_xor_si128(t1, t3); + t1 = _mm_xor_si128(t1, t2); + + keys[pos] = t1; + } + + let mut keys: Aes128RoundKeys = zeroed(); + let k = _mm_loadu_si128(key.as_ptr().cast()); + keys[0] = k; + + let kr = &mut keys; + expand_round::<0x01>(kr, 1); + expand_round::<0x02>(kr, 2); + expand_round::<0x04>(kr, 3); + expand_round::<0x08>(kr, 4); + expand_round::<0x10>(kr, 5); + expand_round::<0x20>(kr, 6); + expand_round::<0x40>(kr, 7); + expand_round::<0x80>(kr, 8); + expand_round::<0x1B>(kr, 9); + expand_round::<0x36>(kr, 10); + + keys +} + +#[target_feature(enable = "aes")] +pub(super) unsafe fn aes192_expand_key(key: &[u8; 24]) -> Aes192RoundKeys { + unsafe fn shuffle(a: __m128i, b: __m128i, i: usize) -> __m128i { + let a: [u64; 2] = transmute(a); + let b: [u64; 2] = transmute(b); + transmute([a[i], b[0]]) + } + + #[target_feature(enable = "aes")] + unsafe fn expand_round(mut t1: __m128i, mut t3: __m128i) -> (__m128i, __m128i) { + let (mut t2, mut t4); + + t2 = _mm_aeskeygenassist_si128(t3, RK); + t2 = _mm_shuffle_epi32(t2, 0x55); + t4 = _mm_slli_si128(t1, 0x4); + t1 = _mm_xor_si128(t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + t1 = _mm_xor_si128(t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + t1 = _mm_xor_si128(t1, t4); + t1 = _mm_xor_si128(t1, t2); + t2 = _mm_shuffle_epi32(t1, 0xff); + t4 = _mm_slli_si128(t3, 0x4); + t3 = _mm_xor_si128(t3, t4); + t3 = _mm_xor_si128(t3, t2); + + (t1, t3) + } + + let mut keys: Aes192RoundKeys = zeroed(); + // We are being extra pedantic here to remove out-of-bound access. + // This should be optimized into movups, movsd sequence. + let (k0, k1l) = { + let mut t = [0u8; 32]; + t[..key.len()].copy_from_slice(key); + ( + _mm_loadu_si128(t.as_ptr().cast()), + _mm_loadu_si128(t.as_ptr().offset(16).cast()), + ) + }; + + keys[0] = k0; + + let (k1_2, k2r) = expand_round::<0x01>(k0, k1l); + keys[1] = shuffle(k1l, k1_2, 0); + keys[2] = shuffle(k1_2, k2r, 1); + + let (k3, k4l) = expand_round::<0x02>(k1_2, k2r); + keys[3] = k3; + + let (k4_5, k5r) = expand_round::<0x04>(k3, k4l); + let k4 = shuffle(k4l, k4_5, 0); + let k5 = shuffle(k4_5, k5r, 1); + keys[4] = k4; + keys[5] = k5; + + let (k6, k7l) = expand_round::<0x08>(k4_5, k5r); + keys[6] = k6; + + let (k7_8, k8r) = expand_round::<0x10>(k6, k7l); + keys[7] = shuffle(k7l, k7_8, 0); + keys[8] = shuffle(k7_8, k8r, 1); + + let (k9, k10l) = expand_round::<0x20>(k7_8, k8r); + keys[9] = k9; + + let (k10_11, k11r) = expand_round::<0x40>(k9, k10l); + keys[10] = shuffle(k10l, k10_11, 0); + keys[11] = shuffle(k10_11, k11r, 1); + + let (k12, _) = expand_round::<0x80>(k10_11, k11r); + keys[12] = k12; + + keys +} + +#[target_feature(enable = "aes")] +pub(super) unsafe fn aes256_expand_key(key: &[u8; 32]) -> Aes256RoundKeys { + unsafe fn expand_round(keys: &mut Aes256RoundKeys, pos: usize) { + let mut t1 = keys[pos - 2]; + let mut t2; + let mut t3 = keys[pos - 1]; + let mut t4; + + t2 = _mm_aeskeygenassist_si128(t3, RK); + t2 = _mm_shuffle_epi32(t2, 0xff); + t4 = _mm_slli_si128(t1, 0x4); + t1 = _mm_xor_si128(t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + t1 = _mm_xor_si128(t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + t1 = _mm_xor_si128(t1, t4); + t1 = _mm_xor_si128(t1, t2); + + keys[pos] = t1; + + t4 = _mm_aeskeygenassist_si128(t1, 0x00); + t2 = _mm_shuffle_epi32(t4, 0xaa); + t4 = _mm_slli_si128(t3, 0x4); + t3 = _mm_xor_si128(t3, t4); + t4 = _mm_slli_si128(t4, 0x4); + t3 = _mm_xor_si128(t3, t4); + t4 = _mm_slli_si128(t4, 0x4); + t3 = _mm_xor_si128(t3, t4); + t3 = _mm_xor_si128(t3, t2); + + keys[pos + 1] = t3; + } + + unsafe fn expand_round_last(keys: &mut Aes256RoundKeys, pos: usize) { + let mut t1 = keys[pos - 2]; + let mut t2; + let t3 = keys[pos - 1]; + let mut t4; + + t2 = _mm_aeskeygenassist_si128(t3, RK); + t2 = _mm_shuffle_epi32(t2, 0xff); + t4 = _mm_slli_si128(t1, 0x4); + t1 = _mm_xor_si128(t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + t1 = _mm_xor_si128(t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + t1 = _mm_xor_si128(t1, t4); + t1 = _mm_xor_si128(t1, t2); + + keys[pos] = t1; + } + + let mut keys: Aes256RoundKeys = zeroed(); + + let kp = key.as_ptr().cast::<__m128i>(); + keys[0] = _mm_loadu_si128(kp); + keys[1] = _mm_loadu_si128(kp.add(1)); + + let k = &mut keys; + expand_round::<0x01>(k, 2); + expand_round::<0x02>(k, 4); + expand_round::<0x04>(k, 6); + expand_round::<0x08>(k, 8); + expand_round::<0x10>(k, 10); + expand_round::<0x20>(k, 12); + expand_round_last::<0x40>(k, 14); + + keys +} + +#[target_feature(enable = "aes")] +pub(super) unsafe fn inv_keys(keys: &[__m128i; N]) -> [__m128i; N] { + let mut inv_keys: [__m128i; N] = zeroed(); + inv_keys[0] = keys[N - 1]; + for i in 1..N - 1 { + inv_keys[i] = _mm_aesimc_si128(keys[N - 1 - i]); + } + inv_keys[N - 1] = keys[0]; + inv_keys +} diff --git a/aes/src/ni/hazmat.rs b/aes/src/ni/hazmat.rs index a2a735a3..00b4b805 100644 --- a/aes/src/ni/hazmat.rs +++ b/aes/src/ni/hazmat.rs @@ -4,11 +4,27 @@ //! implementations in this crate, but instead provides raw AES-NI accelerated //! access to the AES round function gated under the `hazmat` crate feature. -use super::{ - arch::*, - utils::{load8, store8}, -}; +use super::arch::*; use crate::{Block, Block8}; +use cipher::array::{Array, ArraySize}; + +#[target_feature(enable = "sse2")] +pub(crate) unsafe fn load(blocks: *const Array) -> Array<__m128i, N> { + let p = blocks.cast::<__m128i>(); + let mut res: Array<__m128i, N> = core::mem::zeroed(); + for i in 0..N::USIZE { + res[i] = _mm_loadu_si128(p.add(i)); + } + res +} + +#[target_feature(enable = "sse2")] +pub(crate) unsafe fn store(blocks: *mut Array, b: Array<__m128i, N>) { + let p = blocks.cast::<__m128i>(); + for i in 0..N::USIZE { + _mm_storeu_si128(p.add(i), b[i]); + } +} /// AES cipher (encrypt) round function. #[target_feature(enable = "aes")] @@ -23,14 +39,14 @@ pub(crate) unsafe fn cipher_round(block: &mut Block, round_key: &Block) { /// AES cipher (encrypt) round function: parallel version. #[target_feature(enable = "aes")] pub(crate) unsafe fn cipher_round_par(blocks: &mut Block8, round_keys: &Block8) { - let xmm_keys = load8(round_keys); - let mut xmm_blocks = load8(blocks); + let xmm_keys = load(round_keys); + let mut xmm_blocks = load(blocks); for i in 0..8 { xmm_blocks[i] = _mm_aesenc_si128(xmm_blocks[i], xmm_keys[i]); } - store8(blocks, xmm_blocks); + store(blocks, xmm_blocks); } /// AES cipher (encrypt) round function. @@ -46,14 +62,14 @@ pub(crate) unsafe fn equiv_inv_cipher_round(block: &mut Block, round_key: &Block /// AES cipher (encrypt) round function: parallel version. #[target_feature(enable = "aes")] pub(crate) unsafe fn equiv_inv_cipher_round_par(blocks: &mut Block8, round_keys: &Block8) { - let xmm_keys = load8(round_keys); - let mut xmm_blocks = load8(blocks); + let xmm_keys = load(round_keys); + let mut xmm_blocks = load(blocks); for i in 0..8 { xmm_blocks[i] = _mm_aesdec_si128(xmm_blocks[i], xmm_keys[i]); } - store8(blocks, xmm_blocks); + store(blocks, xmm_blocks); } /// AES mix columns function. diff --git a/aes/src/ni/test_expand.rs b/aes/src/ni/test_expand.rs index 6ab87c5e..d6e71b7b 100644 --- a/aes/src/ni/test_expand.rs +++ b/aes/src/ni/test_expand.rs @@ -1,13 +1,24 @@ -use super::utils::check; +use super::{arch::*, expand::*}; use hex_literal::hex; +pub(crate) fn check(a: &[__m128i], b: &[[u64; 2]]) { + assert_eq!(a.len(), b.len()); + for (v1, v2) in a.iter().zip(b) { + let t1: [u64; 2] = unsafe { core::mem::transmute(*v1) }; + let t2 = [v2[0].to_be(), v2[1].to_be()]; + assert_eq!(t1, t2); + } +} + #[test] +#[cfg_attr( + not(target_feature = "aes"), + ignore = "requires enabled `aes` target feature" +)] fn aes128_expand_key_test() { - use super::aes128::expand_key; - let keys = [0x00; 16]; check( - unsafe { &expand_key(&keys) }, + unsafe { &aes128_expand_key(&keys) }, &[ [0x0000000000000000, 0x0000000000000000], [0x6263636362636363, 0x6263636362636363], @@ -25,7 +36,7 @@ fn aes128_expand_key_test() { let keys = [0xff; 16]; check( - unsafe { &expand_key(&keys) }, + unsafe { &aes128_expand_key(&keys) }, &[ [0xffffffffffffffff, 0xffffffffffffffff], [0xe8e9e9e917161616, 0xe8e9e9e917161616], @@ -43,7 +54,7 @@ fn aes128_expand_key_test() { let keys = hex!("000102030405060708090a0b0c0d0e0f"); check( - unsafe { &expand_key(&keys) }, + unsafe { &aes128_expand_key(&keys) }, &[ [0x0001020304050607, 0x08090a0b0c0d0e0f], [0xd6aa74fdd2af72fa, 0xdaa678f1d6ab76fe], @@ -61,7 +72,7 @@ fn aes128_expand_key_test() { let keys = hex!("6920e299a5202a6d656e636869746f2a"); check( - unsafe { &expand_key(&keys) }, + unsafe { &aes128_expand_key(&keys) }, &[ [0x6920e299a5202a6d, 0x656e636869746f2a], [0xfa8807605fa82d0d, 0x3ac64e6553b2214f], @@ -79,7 +90,7 @@ fn aes128_expand_key_test() { let keys = hex!("2b7e151628aed2a6abf7158809cf4f3c"); check( - unsafe { &expand_key(&keys) }, + unsafe { &aes128_expand_key(&keys) }, &[ [0x2b7e151628aed2a6, 0xabf7158809cf4f3c], [0xa0fafe1788542cb1, 0x23a339392a6c7605], @@ -97,12 +108,14 @@ fn aes128_expand_key_test() { } #[test] +#[cfg_attr( + not(target_feature = "aes"), + ignore = "requires enabled `aes` target feature" +)] fn aes192_expand_key_test() { - use super::aes192::expand_key; - let keys = [0x00; 24]; check( - unsafe { &expand_key(&keys) }, + unsafe { &aes192_expand_key(&keys) }, &[ [0x0000000000000000, 0x0000000000000000], [0x0000000000000000, 0x6263636362636363], @@ -122,7 +135,7 @@ fn aes192_expand_key_test() { let keys = [0xff; 24]; check( - unsafe { &expand_key(&keys) }, + unsafe { &aes192_expand_key(&keys) }, &[ [0xffffffffffffffff, 0xffffffffffffffff], [0xffffffffffffffff, 0xe8e9e9e917161616], @@ -142,7 +155,7 @@ fn aes192_expand_key_test() { let keys = hex!("000102030405060708090a0b0c0d0e0f1011121314151617"); check( - unsafe { &expand_key(&keys) }, + unsafe { &aes192_expand_key(&keys) }, &[ [0x0001020304050607, 0x08090a0b0c0d0e0f], [0x1011121314151617, 0x5846f2f95c43f4fe], @@ -162,7 +175,7 @@ fn aes192_expand_key_test() { let keys = hex!("8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b"); check( - unsafe { &expand_key(&keys) }, + unsafe { &aes192_expand_key(&keys) }, &[ [0x8e73b0f7da0e6452, 0xc810f32b809079e5], [0x62f8ead2522c6b7b, 0xfe0c91f72402f5a5], @@ -182,12 +195,14 @@ fn aes192_expand_key_test() { } #[test] +#[cfg_attr( + not(target_feature = "aes"), + ignore = "requires enabled `aes` target feature" +)] fn aes256_expand_key_test() { - use super::aes256::expand_key; - let keys = [0x00; 32]; check( - unsafe { &expand_key(&keys) }, + unsafe { &aes256_expand_key(&keys) }, &[ [0x0000000000000000, 0x0000000000000000], [0x0000000000000000, 0x0000000000000000], @@ -209,7 +224,7 @@ fn aes256_expand_key_test() { let keys = [0xff; 32]; check( - unsafe { &expand_key(&keys) }, + unsafe { &aes256_expand_key(&keys) }, &[ [0xffffffffffffffff, 0xffffffffffffffff], [0xffffffffffffffff, 0xffffffffffffffff], @@ -231,7 +246,7 @@ fn aes256_expand_key_test() { let keys = hex!("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"); check( - unsafe { &expand_key(&keys) }, + unsafe { &aes256_expand_key(&keys) }, &[ [0x0001020304050607, 0x08090a0b0c0d0e0f], [0x1011121314151617, 0x18191a1b1c1d1e1f], @@ -253,7 +268,7 @@ fn aes256_expand_key_test() { let keys = hex!("603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4"); check( - unsafe { &expand_key(&keys) }, + unsafe { &aes256_expand_key(&keys) }, &[ [0x603deb1015ca71be, 0x2b73aef0857d7781], [0x1f352c073b6108d7, 0x2d9810a30914dff4], diff --git a/aes/src/ni/utils.rs b/aes/src/ni/utils.rs deleted file mode 100644 index 1bd6522d..00000000 --- a/aes/src/ni/utils.rs +++ /dev/null @@ -1,92 +0,0 @@ -//! Utility functions - -// TODO(tarcieri): check performance impact / generated assembly changes -#![allow(clippy::needless_range_loop)] - -use super::arch::*; -use crate::{Block, Block8}; - -pub type U128x8 = [__m128i; 8]; - -#[cfg(test)] -pub(crate) fn check(a: &[__m128i], b: &[[u64; 2]]) { - for (v1, v2) in a.iter().zip(b) { - let t1: [u64; 2] = unsafe { core::mem::transmute(*v1) }; - let t2 = [v2[0].to_be(), v2[1].to_be()]; - assert_eq!(t1, t2); - } -} - -#[inline(always)] -pub(crate) fn load8(blocks: *const Block8) -> U128x8 { - unsafe { - let p = blocks as *const Block; - [ - _mm_loadu_si128(p.add(0) as *const __m128i), - _mm_loadu_si128(p.add(1) as *const __m128i), - _mm_loadu_si128(p.add(2) as *const __m128i), - _mm_loadu_si128(p.add(3) as *const __m128i), - _mm_loadu_si128(p.add(4) as *const __m128i), - _mm_loadu_si128(p.add(5) as *const __m128i), - _mm_loadu_si128(p.add(6) as *const __m128i), - _mm_loadu_si128(p.add(7) as *const __m128i), - ] - } -} - -#[inline(always)] -pub(crate) fn store8(blocks: *mut Block8, b: U128x8) { - unsafe { - let p = blocks as *mut Block; - _mm_storeu_si128(p.add(0) as *mut __m128i, b[0]); - _mm_storeu_si128(p.add(1) as *mut __m128i, b[1]); - _mm_storeu_si128(p.add(2) as *mut __m128i, b[2]); - _mm_storeu_si128(p.add(3) as *mut __m128i, b[3]); - _mm_storeu_si128(p.add(4) as *mut __m128i, b[4]); - _mm_storeu_si128(p.add(5) as *mut __m128i, b[5]); - _mm_storeu_si128(p.add(6) as *mut __m128i, b[6]); - _mm_storeu_si128(p.add(7) as *mut __m128i, b[7]); - } -} - -#[inline(always)] -pub(crate) fn xor8(b: &mut U128x8, key: __m128i) { - unsafe { - b[0] = _mm_xor_si128(b[0], key); - b[1] = _mm_xor_si128(b[1], key); - b[2] = _mm_xor_si128(b[2], key); - b[3] = _mm_xor_si128(b[3], key); - b[4] = _mm_xor_si128(b[4], key); - b[5] = _mm_xor_si128(b[5], key); - b[6] = _mm_xor_si128(b[6], key); - b[7] = _mm_xor_si128(b[7], key); - } -} - -#[inline(always)] -pub(crate) fn aesenc8(buffer: &mut U128x8, key: __m128i) { - for i in 0..8 { - buffer[i] = unsafe { _mm_aesenc_si128(buffer[i], key) }; - } -} - -#[inline(always)] -pub(crate) fn aesenclast8(buffer: &mut U128x8, key: __m128i) { - for i in 0..8 { - buffer[i] = unsafe { _mm_aesenclast_si128(buffer[i], key) }; - } -} - -#[inline(always)] -pub(crate) fn aesdec8(buffer: &mut U128x8, key: __m128i) { - for i in 0..8 { - buffer[i] = unsafe { _mm_aesdec_si128(buffer[i], key) }; - } -} - -#[inline(always)] -pub(crate) fn aesdeclast8(buffer: &mut U128x8, key: __m128i) { - for i in 0..8 { - buffer[i] = unsafe { _mm_aesdeclast_si128(buffer[i], key) }; - } -} From 42728f6197b5d1b8702d5814a8e113840f547e14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Wed, 31 Jul 2024 19:04:28 +0300 Subject: [PATCH 2/9] temporarily allow unused macros --- aes/src/macros.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aes/src/macros.rs b/aes/src/macros.rs index b223add8..98f19420 100644 --- a/aes/src/macros.rs +++ b/aes/src/macros.rs @@ -1,3 +1,5 @@ +// TODO: remove this allow after migration of the soft backend +#[allow(unused_macros)] macro_rules! impl_backends { ( enc_name = $enc_name:ident, From f5cec246889c6d13024614c4ef39ed274006c1ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Wed, 31 Jul 2024 19:09:28 +0300 Subject: [PATCH 3/9] Fix zeroize impl --- aes/src/armv8.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/aes/src/armv8.rs b/aes/src/armv8.rs index a6bd438c..3c51a5a0 100644 --- a/aes/src/armv8.rs +++ b/aes/src/armv8.rs @@ -158,6 +158,16 @@ macro_rules! define_aes_impl { } } + impl Drop for $name { + #[inline] + fn drop(&mut self) { + #[cfg(feature = "zeroize")] + unsafe { + zeroize::zeroize_flat_type(self); + } + } + } + #[cfg(feature = "zeroize")] impl zeroize::ZeroizeOnDrop for $name {} @@ -216,7 +226,7 @@ macro_rules! define_aes_impl { fn drop(&mut self) { #[cfg(feature = "zeroize")] unsafe { - zeroize::zeroize_flat_type(&mut self.backend); + zeroize::zeroize_flat_type(self); } } } @@ -294,7 +304,7 @@ macro_rules! define_aes_impl { fn drop(&mut self) { #[cfg(feature = "zeroize")] unsafe { - zeroize::zeroize_flat_type(&mut self.backend); + zeroize::zeroize_flat_type(self); } } } From 69ead99af6dc72e1803bf46ca71ad0665c4f1b0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Wed, 31 Jul 2024 20:50:08 +0300 Subject: [PATCH 4/9] Remove TODO comment --- aes/src/autodetect.rs | 39 ++++++++++++++++++++++++++++++--------- aes/src/macros.rs | 3 ++- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/aes/src/autodetect.rs b/aes/src/autodetect.rs index 3cf17fc4..5605c9d4 100644 --- a/aes/src/autodetect.rs +++ b/aes/src/autodetect.rs @@ -20,12 +20,12 @@ cpufeatures::new!(aes_intrinsics, "aes"); macro_rules! define_aes_impl { ( - $name:ident, - $name_enc:ident, - $name_dec:ident, - $module:tt, - $key_size:ty, - $doc:expr $(,)? + name = $name:ident, + name_enc = $name_enc:ident, + name_dec = $name_dec:ident, + module = $module:tt, + key_size = $key_size:ty, + doc = $doc:expr, ) => { mod $module { use super::{intrinsics, soft}; @@ -425,6 +425,27 @@ macro_rules! define_aes_impl { }; } -define_aes_impl!(Aes128, Aes128Enc, Aes128Dec, aes128, U16, "AES-128"); -define_aes_impl!(Aes192, Aes192Enc, Aes192Dec, aes192, U24, "AES-192"); -define_aes_impl!(Aes256, Aes256Enc, Aes256Dec, aes256, U32, "AES-256"); +define_aes_impl!( + name = Aes128, + name_enc = Aes128Enc, + name_dec = Aes128Dec, + module = aes128, + key_size = U16, + doc = "AES-128", +); +define_aes_impl!( + name = Aes192, + name_enc = Aes192Enc, + name_dec = Aes192Dec, + module = aes192, + key_size = U24, + doc = "AES-192", +); +define_aes_impl!( + name = Aes256, + name_enc = Aes256Enc, + name_dec = Aes256Dec, + module = aes256, + key_size = U32, + doc = "AES-256", +); diff --git a/aes/src/macros.rs b/aes/src/macros.rs index 98f19420..c0f12ba7 100644 --- a/aes/src/macros.rs +++ b/aes/src/macros.rs @@ -1,4 +1,5 @@ -// TODO: remove this allow after migration of the soft backend +// This macro is not used by the soft backend, to simplify the crate code we allow this macro +// to be unused to prevent warnings e.g. when `force-soft` is enabled/ #[allow(unused_macros)] macro_rules! impl_backends { ( From 9c7bcc4f2859456a0191a1d7de76c3b08550d926 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Wed, 31 Jul 2024 20:51:40 +0300 Subject: [PATCH 5/9] Add ignore attribute to armv8 key expansion tests --- aes/src/armv8/test_expand.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/aes/src/armv8/test_expand.rs b/aes/src/armv8/test_expand.rs index 7e510b59..ad41aa7a 100644 --- a/aes/src/armv8/test_expand.rs +++ b/aes/src/armv8/test_expand.rs @@ -105,12 +105,20 @@ fn store_expanded_keys(input: [uint8x16_t; N]) -> [[u8; 16]; N] } #[test] +#[cfg_attr( + not(target_feature = "aes"), + ignore = "requires enabled `aes` target feature" +)] fn aes128_key_expansion() { let ek = unsafe { expand_key(&AES128_KEY) }; assert_eq!(store_expanded_keys(ek), AES128_EXP_KEYS); } #[test] +#[cfg_attr( + not(target_feature = "aes"), + ignore = "requires enabled `aes` target feature" +)] fn aes128_key_expansion_inv() { let ek = load_expanded_keys(AES128_EXP_KEYS); let inv_ek = unsafe { inv_expanded_keys(&ek) }; @@ -118,12 +126,20 @@ fn aes128_key_expansion_inv() { } #[test] +#[cfg_attr( + not(target_feature = "aes"), + ignore = "requires enabled `aes` target feature" +)] fn aes192_key_expansion() { let ek = unsafe { expand_key(&AES192_KEY) }; assert_eq!(store_expanded_keys(ek), AES192_EXP_KEYS); } #[test] +#[cfg_attr( + not(target_feature = "aes"), + ignore = "requires enabled `aes` target feature" +)] fn aes256_key_expansion() { let ek = unsafe { expand_key(&AES256_KEY) }; assert_eq!(store_expanded_keys(ek), AES256_EXP_KEYS); From 0dd075ad6fc02515cdeed2a1d9a3d5be74320f95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Wed, 31 Jul 2024 21:09:26 +0300 Subject: [PATCH 6/9] Change `par_size` for armv8 backends --- aes/src/armv8.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aes/src/armv8.rs b/aes/src/armv8.rs index 3c51a5a0..b7dce6a7 100644 --- a/aes/src/armv8.rs +++ b/aes/src/armv8.rs @@ -29,7 +29,7 @@ impl_backends!( dec_name = Aes128BackDec, key_size = consts::U16, keys_ty = expand::Aes128RoundKeys, - par_size = consts::U15, + par_size = consts::U21, expand_keys = expand::expand_key, inv_keys = expand::inv_expanded_keys, encrypt = encdec::encrypt, @@ -41,7 +41,7 @@ impl_backends!( impl_backends!( enc_name = Aes192BackEnc, dec_name = Aes192BackDec, - key_size = consts::U24, + key_size = consts::U19, keys_ty = expand::Aes192RoundKeys, par_size = consts::U15, expand_keys = expand::expand_key, @@ -57,7 +57,7 @@ impl_backends!( dec_name = Aes256BackDec, key_size = consts::U32, keys_ty = expand::Aes256RoundKeys, - par_size = consts::U15, + par_size = consts::U17, expand_keys = expand::expand_key, inv_keys = expand::inv_expanded_keys, encrypt = encdec::encrypt, From 898fd85bde5f781ab3f0969e5926fa86eb94fc70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Wed, 31 Jul 2024 21:15:12 +0300 Subject: [PATCH 7/9] fix --- aes/src/armv8.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aes/src/armv8.rs b/aes/src/armv8.rs index b7dce6a7..07634feb 100644 --- a/aes/src/armv8.rs +++ b/aes/src/armv8.rs @@ -41,9 +41,9 @@ impl_backends!( impl_backends!( enc_name = Aes192BackEnc, dec_name = Aes192BackDec, - key_size = consts::U19, + key_size = consts::U24, keys_ty = expand::Aes192RoundKeys, - par_size = consts::U15, + par_size = consts::U19, expand_keys = expand::expand_key, inv_keys = expand::inv_expanded_keys, encrypt = encdec::encrypt, From 5901c8ad7780c08227513e004bb4226a45fd7121 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Thu, 1 Aug 2024 08:53:08 +0300 Subject: [PATCH 8/9] Tweak par_size for AES-NI backends --- aes/src/ni.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aes/src/ni.rs b/aes/src/ni.rs index d08137bc..2563df6f 100644 --- a/aes/src/ni.rs +++ b/aes/src/ni.rs @@ -40,7 +40,7 @@ impl_backends!( dec_name = Aes128BackDec, key_size = consts::U16, keys_ty = expand::Aes128RoundKeys, - par_size = consts::U15, + par_size = consts::U9, expand_keys = expand::aes128_expand_key, inv_keys = expand::inv_keys, encrypt = encdec::encrypt, @@ -54,7 +54,7 @@ impl_backends!( dec_name = Aes192BackDec, key_size = consts::U24, keys_ty = expand::Aes192RoundKeys, - par_size = consts::U15, + par_size = consts::U9, expand_keys = expand::aes192_expand_key, inv_keys = expand::inv_keys, encrypt = encdec::encrypt, @@ -68,7 +68,7 @@ impl_backends!( dec_name = Aes256BackDec, key_size = consts::U32, keys_ty = expand::Aes256RoundKeys, - par_size = consts::U15, + par_size = consts::U9, expand_keys = expand::aes256_expand_key, inv_keys = expand::inv_keys, encrypt = encdec::encrypt, From e42ac9efd8bc996300702c273f51446c1618a872 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Wed, 7 Aug 2024 17:33:10 +0300 Subject: [PATCH 9/9] Rename state to blocks --- aes/src/armv8/encdec.rs | 108 ++++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/aes/src/armv8/encdec.rs b/aes/src/armv8/encdec.rs index 0147887f..c3559b54 100644 --- a/aes/src/armv8/encdec.rs +++ b/aes/src/armv8/encdec.rs @@ -18,21 +18,21 @@ pub(super) unsafe fn encrypt( ) { assert!(KEYS == 11 || KEYS == 13 || KEYS == 15); let (in_ptr, out_ptr) = block.into_raw(); - let mut state = vld1q_u8(in_ptr.cast()); + let mut block = vld1q_u8(in_ptr.cast()); for &key in &keys[..KEYS - 2] { // AES single round encryption - state = vaeseq_u8(state, key); + block = vaeseq_u8(block, key); // Mix columns - state = vaesmcq_u8(state); + block = vaesmcq_u8(block); } // AES single round encryption - state = vaeseq_u8(state, keys[KEYS - 2]); + block = vaeseq_u8(block, keys[KEYS - 2]); // Final add (bitwise XOR) - state = veorq_u8(state, keys[KEYS - 1]); + block = veorq_u8(block, keys[KEYS - 1]); - vst1q_u8(out_ptr.cast(), state); + vst1q_u8(out_ptr.cast(), block); } /// Perform AES decryption using the given expanded keys. @@ -44,21 +44,21 @@ pub(super) unsafe fn decrypt( assert!(KEYS == 11 || KEYS == 13 || KEYS == 15); let (in_ptr, out_ptr) = block.into_raw(); - let mut state = vld1q_u8(in_ptr.cast()); + let mut block = vld1q_u8(in_ptr.cast()); for &key in &keys[..KEYS - 2] { // AES single round decryption - state = vaesdq_u8(state, key); + block = vaesdq_u8(block, key); // Inverse mix columns - state = vaesimcq_u8(state); + block = vaesimcq_u8(block); } // AES single round decryption - state = vaesdq_u8(state, keys[KEYS - 2]); + block = vaesdq_u8(block, keys[KEYS - 2]); // Final add (bitwise XOR) - state = veorq_u8(state, keys[KEYS - 1]); + block = veorq_u8(block, keys[KEYS - 1]); - vst1q_u8(out_ptr.cast(), state); + vst1q_u8(out_ptr.cast(), block); } /// Perform parallel AES encryption 8-blocks-at-a-time using the given expanded keys. @@ -70,11 +70,11 @@ pub(super) unsafe fn encrypt_par( #[inline(always)] unsafe fn par_round( key: uint8x16_t, - state: &mut Array, + blocks: &mut Array, ) { - for s in state { + for block in blocks { // AES single round encryption and mix columns - *s = vaesmcq_u8(vaeseq_u8(*s, key)); + *block = vaesmcq_u8(vaeseq_u8(*block, key)); } } @@ -85,37 +85,37 @@ pub(super) unsafe fn encrypt_par( let out_ptr: *mut Block = out_ptr.cast(); // Load plaintext blocks - let mut state: Array = mem::zeroed(); + let mut blocks: Array = mem::zeroed(); for i in 0..ParBlocks::USIZE { - state[i] = vld1q_u8(in_ptr.add(i).cast()); + blocks[i] = vld1q_u8(in_ptr.add(i).cast()); } // Loop is intentionally not used here to enforce inlining - par_round(keys[0], &mut state); - par_round(keys[1], &mut state); - par_round(keys[2], &mut state); - par_round(keys[3], &mut state); - par_round(keys[4], &mut state); - par_round(keys[5], &mut state); - par_round(keys[6], &mut state); - par_round(keys[7], &mut state); - par_round(keys[8], &mut state); + par_round(keys[0], &mut blocks); + par_round(keys[1], &mut blocks); + par_round(keys[2], &mut blocks); + par_round(keys[3], &mut blocks); + par_round(keys[4], &mut blocks); + par_round(keys[5], &mut blocks); + par_round(keys[6], &mut blocks); + par_round(keys[7], &mut blocks); + par_round(keys[8], &mut blocks); if KEYS >= 13 { - par_round(keys[9], &mut state); - par_round(keys[10], &mut state); + par_round(keys[9], &mut blocks); + par_round(keys[10], &mut blocks); } if KEYS == 15 { - par_round(keys[11], &mut state); - par_round(keys[12], &mut state); + par_round(keys[11], &mut blocks); + par_round(keys[12], &mut blocks); } for i in 0..ParBlocks::USIZE { // AES single round encryption - state[i] = vaeseq_u8(state[i], keys[KEYS - 2]); + blocks[i] = vaeseq_u8(blocks[i], keys[KEYS - 2]); // Final add (bitwise XOR) - state[i] = veorq_u8(state[i], keys[KEYS - 1]); + blocks[i] = veorq_u8(blocks[i], keys[KEYS - 1]); // Save encrypted blocks - vst1q_u8(out_ptr.add(i).cast(), state[i]); + vst1q_u8(out_ptr.add(i).cast(), blocks[i]); } } @@ -128,11 +128,11 @@ pub(super) unsafe fn decrypt_par( #[inline(always)] unsafe fn par_round( key: uint8x16_t, - state: &mut Array, + blocks: &mut Array, ) { - for s in state { + for block in blocks { // AES single round decryption and inverse mix columns - *s = vaesimcq_u8(vaesdq_u8(*s, key)); + *block = vaesimcq_u8(vaesdq_u8(*block, key)); } } @@ -143,36 +143,36 @@ pub(super) unsafe fn decrypt_par( let out_ptr: *mut Block = out_ptr.cast(); // Load encrypted blocks - let mut state: Array = mem::zeroed(); + let mut blocks: Array = mem::zeroed(); for i in 0..ParBlocks::USIZE { - state[i] = vld1q_u8(in_ptr.add(i).cast()); + blocks[i] = vld1q_u8(in_ptr.add(i).cast()); } // Loop is intentionally not used here to enforce inlining - par_round(keys[0], &mut state); - par_round(keys[1], &mut state); - par_round(keys[2], &mut state); - par_round(keys[3], &mut state); - par_round(keys[4], &mut state); - par_round(keys[5], &mut state); - par_round(keys[6], &mut state); - par_round(keys[7], &mut state); - par_round(keys[8], &mut state); + par_round(keys[0], &mut blocks); + par_round(keys[1], &mut blocks); + par_round(keys[2], &mut blocks); + par_round(keys[3], &mut blocks); + par_round(keys[4], &mut blocks); + par_round(keys[5], &mut blocks); + par_round(keys[6], &mut blocks); + par_round(keys[7], &mut blocks); + par_round(keys[8], &mut blocks); if KEYS >= 13 { - par_round(keys[9], &mut state); - par_round(keys[10], &mut state); + par_round(keys[9], &mut blocks); + par_round(keys[10], &mut blocks); } if KEYS == 15 { - par_round(keys[11], &mut state); - par_round(keys[12], &mut state); + par_round(keys[11], &mut blocks); + par_round(keys[12], &mut blocks); } for i in 0..ParBlocks::USIZE { // AES single round decryption - state[i] = vaesdq_u8(state[i], keys[KEYS - 2]); + blocks[i] = vaesdq_u8(blocks[i], keys[KEYS - 2]); // Final add (bitwise XOR) - state[i] = veorq_u8(state[i], keys[KEYS - 1]); + blocks[i] = veorq_u8(blocks[i], keys[KEYS - 1]); // Save plaintext blocks - vst1q_u8(out_ptr.add(i) as *mut u8, state[i]); + vst1q_u8(out_ptr.add(i) as *mut u8, blocks[i]); } }