From 849baf41d8cba5d9e023c32dfd4c4f98cceda095 Mon Sep 17 00:00:00 2001 From: David Palm Date: Thu, 17 May 2018 11:27:50 +0200 Subject: [PATCH 1/2] Don't unroll outer loop Not unrolling the outer loop seems to speed up hashing quite significally: Original (unrolled): ``` running 3 tests test bench_keccak_256_with_empty_input ... bench: 557 ns/iter (+/- 46) test bench_keccak_256_with_large_input ... bench: 17,288 ns/iter (+/- 1,871) = 236 MB/s test bench_keccak_256_with_typical_input ... bench: 577 ns/iter (+/- 28) = 88 MB/s ``` This branch (not unrolled): ``` running 3 tests test bench_keccak_256_with_empty_input ... bench: 487 ns/iter (+/- 25) test bench_keccak_256_with_large_input ... bench: 14,645 ns/iter (+/- 675) = 279 MB/s test bench_keccak_256_with_typical_input ... bench: 495 ns/iter (+/- 32) = 103 MB/s ``` "Inspired" by https://github.com/RustCrypto/sponges/blob/master/keccak/src/lib.rs#L138 Running benchmarks from the `keccak-hash` crate so we can compare to the numbers [here](https://github.com/paritytech/keccak-hash/pull/1). --- .gitignore | 1 + src/lib.rs | 98 ++++++++++++++++++++++++++---------------------------- 2 files changed, 49 insertions(+), 50 deletions(-) diff --git a/.gitignore b/.gitignore index d4f917d..9b38af7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ target Cargo.lock *.swp +.idea diff --git a/src/lib.rs b/src/lib.rs index 877a527..c33c18b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -67,72 +67,70 @@ const RC: [u64; 24] = [ /// keccak-f[1600] pub fn keccakf(a: &mut [u64; PLEN]) { let mut arrays: [[u64; 5]; 24] = [[0; 5]; 24]; - - unroll! { - for i in 0..24 { - // Theta - unroll! { - for x in 0..5 { - // This looks useless but it gets way slower without it. I tried using - // `mem::uninitialized` for the initialisation of `arrays` but that also makes - // it slower, although not by as much as removing this assignment. Optimisers - // are weird. Maybe a different version of LLVM will react differently, so if - // you see this comment in the future try deleting this assignment and using - // uninit above and see how it affects the benchmarks. - arrays[i][x] = 0; - - unroll! { - for y_count in 0..5 { - let y = y_count * 5; - arrays[i][x] ^= a[x + y]; - } + // Not unrolling this is faster, see https://github.com/RustCrypto/sponges/blob/master/keccak/src/lib.rs#L138 + for i in 0..24 { + // Theta + unroll! { + for x in 0..5 { + // This looks useless but it gets way slower without it. I tried using + // `mem::uninitialized` for the initialisation of `arrays` but that also makes + // it slower, although not by as much as removing this assignment. Optimisers + // are weird. Maybe a different version of LLVM will react differently, so if + // you see this comment in the future try deleting this assignment and using + // uninit above and see how it affects the benchmarks. + arrays[i][x] = 0; + + unroll! { + for y_count in 0..5 { + let y = y_count * 5; + arrays[i][x] ^= a[x + y]; } } } + } - unroll! { - for x in 0..5 { - unroll! { - for y_count in 0..5 { - let y = y_count * 5; - a[y + x] ^= arrays[i][(x + 4) % 5] ^ arrays[i][(x + 1) % 5].rotate_left(1); - } + unroll! { + for x in 0..5 { + unroll! { + for y_count in 0..5 { + let y = y_count * 5; + a[y + x] ^= arrays[i][(x + 4) % 5] ^ arrays[i][(x + 1) % 5].rotate_left(1); } } } + } - // Rho and pi - let mut last = a[1]; - unroll! { - for x in 0..24 { - arrays[i][0] = a[PI[x]]; - a[PI[x]] = last.rotate_left(RHO[x]); - last = arrays[i][0]; - } + // Rho and pi + let mut last = a[1]; + unroll! { + for x in 0..24 { + arrays[i][0] = a[PI[x]]; + a[PI[x]] = last.rotate_left(RHO[x]); + last = arrays[i][0]; } + } - // Chi - unroll! { - for y_step in 0..5 { - let y = y_step * 5; + // Chi + unroll! { + for y_step in 0..5 { + let y = y_step * 5; - unroll! { - for x in 0..5 { - arrays[i][x] = a[y + x]; - } + unroll! { + for x in 0..5 { + arrays[i][x] = a[y + x]; } + } - unroll! { - for x in 0..5 { - a[y + x] = arrays[i][x] ^ ((!arrays[i][(x + 1) % 5]) & (arrays[i][(x + 2) % 5])); - } + unroll! { + for x in 0..5 { + a[y + x] = arrays[i][x] ^ ((!arrays[i][(x + 1) % 5]) & (arrays[i][(x + 2) % 5])); } } - }; + } + }; - // Iota - a[0] ^= RC[i]; - } + // Iota + a[0] ^= RC[i]; } } From 40a1fe5841dc9bcec17c47232b2ac3ce2ee135e1 Mon Sep 17 00:00:00 2001 From: David Palm Date: Fri, 18 May 2018 08:56:46 +0200 Subject: [PATCH 2/2] Add test for hashing with same buffer for input and output --- tests/test.rs | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/tests/test.rs b/tests/test.rs index 054cafd..251a5bb 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,4 +1,5 @@ extern crate tiny_keccak; +extern crate core; use tiny_keccak::*; @@ -20,20 +21,26 @@ fn empty_keccak() { } #[test] -fn string_keccak_256() { - let mut keccak_256 = Keccak::new_keccak256(); - let data: Vec = From::from("hello world"); - keccak_256.update(&data); - let mut res : [u8;32] = [0;32]; - keccak_256.finalize(&mut res); - let expected = vec![ - 0x47, 0x17, 0x32, 0x85, 0xa8, 0xd7, 0x34, 0x1e, - 0x5e, 0x97, 0x2f, 0xc6, 0x77, 0x28, 0x63, 0x84, - 0xf8, 0x02, 0xf8, 0xef, 0x42, 0xa5, 0xec, 0x5f, - 0x03, 0xbb, 0xfa, 0x25, 0x4c, 0xb0, 0x1f, 0xad - ]; - let ref_ex: &[u8] = &expected; - assert_eq!(&res, ref_ex); +fn string_keccak_256_overlapping_buffer() { + let mut in_and_out : [u8; 32] = [0;32]; + for i in 1..6 { in_and_out[i as usize - 1] = i } + + let ptr = in_and_out.as_mut_ptr(); + Keccak::keccak256( + unsafe { + core::slice::from_raw_parts(ptr, 5) // read a piece from start of in_and_out + }, + &mut in_and_out, // write over the whole array + ); + + let expected = vec![125, 135, 197, 234, 117, 247, 55, 139, 183, 1, 228, 4, 197, 6, 57, 22, 26, 243, 239, 246, 98, 147, 233, 243, 117, 181, 241, 126, 181, 4, 118, 244]; + assert_eq!(&in_and_out, &expected.as_ref()); + + // Verify using overlapping in/out buffers yields same result as a "normal" hash + let control_in : [u8;5] = [1,2,3,4,5]; + let mut control_out : [u8;32] = [0;32]; + Keccak::keccak256(&control_in, &mut control_out); + assert_eq!(&control_out, &in_and_out); } #[test]