From 1fa096e1143bf1f0f47c793f72e6d88b492f403c Mon Sep 17 00:00:00 2001 From: Adrian Cruceru Date: Mon, 8 Jun 2020 18:43:55 +0200 Subject: [PATCH] Problem due to dependency: https://doc.rust-lang.org/beta/src/core/up/stdarch/crates/core_arch/src/x86/sse2.rs.html#1377-1379 pub unsafe fn _mm_extract_epi16(a: __m128i, imm8: i32) -> i32 { simd_extract::<_, i16>(a.as_i16x8(), (imm8 & 7) as u32) as i32 } It extracts as i16 and then converts i16 to i32... which does sign extension. Which means we cannot use the function directly - we need to cast to u16 first then to u32 - otherwise we get 0xffffffff instead of 0xffff --- src/decode/avx2.rs | 7 +- tests/decode.rs | 205 +++++++++++++++++++++++++++++++++++++++++++++ tests/encode.rs | 55 ++++++++++++ 3 files changed, 265 insertions(+), 2 deletions(-) create mode 100644 tests/decode.rs create mode 100644 tests/encode.rs diff --git a/src/decode/avx2.rs b/src/decode/avx2.rs index 0071b14..1de948d 100644 --- a/src/decode/avx2.rs +++ b/src/decode/avx2.rs @@ -132,8 +132,11 @@ unsafe fn decode_avx2(input: __m256i) -> (__m256i, u32, u32) { let mask0 = _mm_cmpestrm(valid_nonws_set, 14, lane0, 16, CMP_FLAGS); let mask1 = _mm_cmpestrm(valid_nonws_set, 14, lane1, 16, CMP_FLAGS); // Combine bitmasks into integer value - let valid_mask = - _mm_extract_epi16(mask0, 0) as u32 | ((_mm_extract_epi16(mask1, 0) as u32) << 16); + + let first = _mm_extract_epi16(mask0, 0) as u16; + let second = _mm_extract_epi16(mask1, 0) as u16; + + let valid_mask = first as u32 + ((second as u32) << 16); (result, invalid_mask as _, valid_mask as _) } diff --git a/tests/decode.rs b/tests/decode.rs new file mode 100644 index 0000000..f37b473 --- /dev/null +++ b/tests/decode.rs @@ -0,0 +1,205 @@ +use b64_ct::*; + +const BASE64_PEM_WRAP: usize = 64; + +static BASE64_PEM: b64_ct::Config = b64_ct::Config { + char_set: b64_ct::CharacterSet::Standard, + newline: b64_ct::Newline::LF, + pad: true, + line_length: Some(BASE64_PEM_WRAP), +}; + +#[test] +fn test_wrapping_base64() { + let mut v: Vec = vec![]; + let bytes_per_line = BASE64_PEM_WRAP * 3 / 4; + for _i in 0..2*bytes_per_line { + let encoded = v.to_base64(BASE64_PEM); + let decoded = encoded.from_base64().unwrap(); + assert_eq!(v, decoded); + v.push(0); + } +} + +#[test] +fn test_wrapping_base64_random() { + let mut v: Vec = vec![]; + for _i in 0..1000 { + let encoded = v.to_base64(BASE64_PEM); + let decoded = encoded.from_base64().unwrap(); + assert_eq!(v, decoded); + v.push(rand::random::()); + } +} + +#[test] +fn test_constant_strings() { + assert_eq!("".from_base64().unwrap(), vec![]); + assert_eq!("Zg==".from_base64().unwrap(), "f".as_bytes()); + assert_eq!("Zg".from_base64().unwrap(), "f".as_bytes()); + assert_eq!("Zg".from_base64().unwrap(), "f".as_bytes()); + assert_eq!("Zm8=".from_base64().unwrap(), "fo".as_bytes()); + assert_eq!("Zm8".from_base64().unwrap(), "fo".as_bytes()); + assert_eq!("Zm9y".from_base64().unwrap(), "for".as_bytes()); + assert_eq!("Zm9ydA==".from_base64().unwrap(), "fort".as_bytes()); + assert_eq!("Zm9ydA".from_base64().unwrap(), "fort".as_bytes()); + assert_eq!("Zm9v".from_base64().unwrap(), "foo".as_bytes()); + assert_eq!("Zm9vYg==".from_base64().unwrap(), "foob".as_bytes()); + assert_eq!("Zm9vYg".from_base64().unwrap(), "foob".as_bytes()); + assert_eq!("Zm9vYmE=".from_base64().unwrap(), "fooba".as_bytes()); + assert_eq!("Zm9vYmE".from_base64().unwrap(), "fooba".as_bytes()); + assert_eq!("Zm9vYmFy".from_base64().unwrap(), "foobar".as_bytes()); + + "YWx\0pY2U==".from_base64().unwrap_err(); + +} + +#[test] +fn decode_1_pad_byte_in_fast_loop_then_extra_padding_chunk_error() { + for num_quads in 0..25 { + let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); + s.push_str("YWxpY2U====="); + + // since the first 8 bytes are handled in stage 1 or 2, the padding is detected as a + // generic invalid byte, not specifcally a padding issue. + // Could argue that the *next* padding byte (in the next quad) is technically the first + // erroneous one, but reporting that accurately is more complex and probably nobody cares + s.from_base64().unwrap_err(); + } +} + +#[test] +fn decode_2_pad_bytes_in_leftovers_then_extra_padding_chunk_error() { + for num_quads in 0..25 { + let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); + s.push_str("YWxpY2UABB===="); + + s.from_base64().unwrap_err(); + } +} + +#[test] +fn decode_valid_bytes_after_padding_in_leftovers_error() { + for num_quads in 0..25 { + let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); + s.push_str("YWxpY2UABB=B"); + + // 4 bytes after last 8-byte chunk, so it's decoded by stage 4. + // First (and only) padding byte is invalid. + s.from_base64().unwrap_err(); + } +} + +#[test] +fn decode_absurd_pad_error() { + for num_quads in 0..25 { + let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); + s.push_str("==Y=Wx===pY=2U====="); + + // Plenty of remaining bytes, so handled by stage 1 or 2. + // first padding byte + s.from_base64().unwrap_err(); + } +} +#[test] +fn decode_extra_padding_after_1_pad_bytes_in_trailing_quad_returns_error() { + for num_quads in 0..25 { + let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); + s.push_str("EEE==="); + + // handled by stage 1, 2, or 4 depending on length + // first padding byte -- which would be legal if it was the only padding + s.from_base64().unwrap_err(); + } +} + +#[test] +fn decode_extra_padding_after_2_pad_bytes_in_trailing_quad_2_returns_error() { + for num_quads in 0..25 { + let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); + s.push_str("EE===="); + + // handled by stage 1, 2, or 4 depending on length + // first padding byte -- which would be legal if it was by itself + s.from_base64().unwrap_err(); + } +} + +#[test] +fn decode_start_quad_with_padding_returns_error() { + for num_quads in 0..25 { + // add enough padding to ensure that we'll hit all 4 stages at the different lengths + for pad_bytes in 1..32 { + let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); + let padding: String = std::iter::repeat("=").take(pad_bytes).collect(); + s.push_str(&padding); + + s.from_base64().unwrap_err(); + } + } +} + +#[test] +fn decode_padding_followed_by_non_padding_returns_error() { + for num_quads in 0..25 { + for pad_bytes in 0..31 { + let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); + let padding: String = std::iter::repeat("=").take(pad_bytes).collect(); + s.push_str(&padding); + s.push_str("E"); + + s.from_base64().unwrap_err(); + } + } +} + +#[test] +fn decode_one_char_in_quad_with_padding_error() { + for num_quads in 0..25 { + let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); + s.push_str("E="); + + s.from_base64().unwrap_err(); + + // more padding doesn't change the error + s.push_str("="); + s.from_base64().unwrap_err(); + + s.push_str("="); + s.from_base64().unwrap_err(); + } +} + +#[test] +fn decode_one_char_in_quad_without_padding_error() { + for num_quads in 0..25 { + let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); + s.push('E'); + + s.from_base64().unwrap_err(); + } +} + +#[test] +fn decode_reject_invalid_bytes_with_correct_error() { + for length in 1..100 { + for index in 0_usize..length { + for invalid_byte in "\x0B\x00%*.".bytes() { + let prefix: String = std::iter::repeat("A").take(index).collect(); + let suffix: String = std::iter::repeat("B").take(length - index - 1).collect(); + + let input = prefix + &String::from_utf8(vec![invalid_byte]).unwrap() + &suffix; + assert_eq!( + length, + input.len(), + "length {} error position {}", + length, + index + ); + + input.from_base64().unwrap_err(); + } + } + } +} + diff --git a/tests/encode.rs b/tests/encode.rs new file mode 100644 index 0000000..da59a75 --- /dev/null +++ b/tests/encode.rs @@ -0,0 +1,55 @@ +use b64_ct::*; + +const BASE64_PEM_WRAP: usize = 100000; + +static BASE64_PEM: b64_ct::Config = b64_ct::Config { + char_set: b64_ct::CharacterSet::Standard, + newline: b64_ct::Newline::LF, + pad: true, + line_length: Some(BASE64_PEM_WRAP), +}; + +#[test] +fn test_constant_strings() { + assert_eq!("", "".as_bytes().to_base64(BASE64_PEM)); + assert_eq!("Zg==", "f".as_bytes().to_base64(BASE64_PEM)); + assert_eq!("Zm8=", "fo".as_bytes().to_base64(BASE64_PEM)); + assert_eq!("Zm9y", "for".as_bytes().to_base64(BASE64_PEM)); + assert_eq!("Zm9ydA==", "fort".as_bytes().to_base64(BASE64_PEM)); + assert_eq!("Zm9v", "foo".as_bytes().to_base64(BASE64_PEM)); + assert_eq!("Zm9vYg==", "foob".as_bytes().to_base64(BASE64_PEM)); + assert_eq!("Zm9vYmE=", "fooba".as_bytes().to_base64(BASE64_PEM)); + assert_eq!("Zm9vYmFy", "foobar".as_bytes().to_base64(BASE64_PEM)); +} + +#[test] +fn encode_all_ascii() { + let mut ascii = Vec::::with_capacity(128); + for i in 0..128 { + ascii.push(i); + } + + + assert_eq!("AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7P\ + D0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8\ + =", + ascii.to_base64(BASE64_PEM)); + +} + +#[test] +fn encode_all_bytes() { + let mut bytes = Vec::::with_capacity(256); + + for i in 0..255 { + bytes.push(i); + } + bytes.push(255); //bug with "overflowing" ranges? + + assert_eq!( + "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7P\ + D0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn\ + +AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb6\ + /wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7/P3+/w==", + bytes.to_base64(BASE64_PEM)); +}