Problem due to dependency:

https://doc.rust-lang.org/beta/src/core/up/stdarch/crates/core_arch/src/x86/sse2.rs.html#1377-1379 pub unsafe fn _mm_extract_epi16(a: __m128i, imm8: i32) -> i32 { simd_extract::<_, i16>(a.as_i16x8(), (imm8 & 7) as u32) as i32 } It extracts as i16 and then converts i16 to i32... which does sign extension. Which means we cannot use the function directly - we need to cast to u16 first then to u32 - otherwise we get 0xffffffff instead of 0xffff
fortanix · Jun 8, 2020 · b4acfb7 · b4acfb7
1 parent ba5e463
commit b4acfb7
Show file tree

Hide file tree

Showing 4 changed files with 265 additions and 3 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "b64-ct"
-version = "0.1.0"
+version = "0.1.1"
 authors = ["Fortanix, Inc."]
 license = "MPL-2.0"
 edition = "2018"

diff --git a/src/decode/avx2.rs b/src/decode/avx2.rs
@@ -131,9 +131,11 @@ unsafe fn decode_avx2(input: __m256i) -> (__m256i, u32, u32) {
     const CMP_FLAGS: i32 = _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_BIT_MASK;
     let mask0 = _mm_cmpestrm(valid_nonws_set, 14, lane0, 16, CMP_FLAGS);
     let mask1 = _mm_cmpestrm(valid_nonws_set, 14, lane1, 16, CMP_FLAGS);
+
     // Combine bitmasks into integer value
-    let valid_mask =
-        _mm_extract_epi16(mask0, 0) as u32 | ((_mm_extract_epi16(mask1, 0) as u32) << 16);
+    let first = _mm_extract_epi16(mask0, 0) as u16;
+    let second = _mm_extract_epi16(mask1, 0) as u16;
+    let valid_mask = first as u32 + ((second as u32) << 16);
 
     (result, invalid_mask as _, valid_mask as _)
 }

diff --git a/tests/decode.rs b/tests/decode.rs
@@ -0,0 +1,205 @@
+use b64_ct::*;
+
+const BASE64_PEM_WRAP: usize = 64;
+
+static BASE64_PEM: b64_ct::Config = b64_ct::Config {
+    char_set: b64_ct::CharacterSet::Standard,
+    newline: b64_ct::Newline::LF,
+    pad: true,
+    line_length: Some(BASE64_PEM_WRAP),
+};
+
+#[test]
+fn test_wrapping_base64() {
+    let mut v: Vec<u8> = vec![];
+    let bytes_per_line = BASE64_PEM_WRAP * 3 / 4;
+    for _i in 0..2*bytes_per_line {
+        let encoded = v.to_base64(BASE64_PEM);
+        let decoded = encoded.from_base64().unwrap();
+        assert_eq!(v, decoded);
+        v.push(0);
+    }
+}
+
+#[test]
+fn test_wrapping_base64_random() {
+    let mut v: Vec<u8> = vec![];
+    for _i in 0..1000 {
+        let encoded = v.to_base64(BASE64_PEM);
+        let decoded = encoded.from_base64().unwrap();
+        assert_eq!(v, decoded);
+        v.push(rand::random::<u8>());
+    }
+}
+
+#[test]
+fn test_constant_strings() {
+    assert_eq!("".from_base64().unwrap(), vec![]);
+    assert_eq!("Zg==".from_base64().unwrap(), "f".as_bytes());
+    assert_eq!("Zg".from_base64().unwrap(), "f".as_bytes());
+    assert_eq!("Zg".from_base64().unwrap(), "f".as_bytes());
+    assert_eq!("Zm8=".from_base64().unwrap(), "fo".as_bytes());
+    assert_eq!("Zm8".from_base64().unwrap(), "fo".as_bytes());
+    assert_eq!("Zm9y".from_base64().unwrap(), "for".as_bytes());
+    assert_eq!("Zm9ydA==".from_base64().unwrap(), "fort".as_bytes());
+    assert_eq!("Zm9ydA".from_base64().unwrap(), "fort".as_bytes());
+    assert_eq!("Zm9v".from_base64().unwrap(), "foo".as_bytes());
+    assert_eq!("Zm9vYg==".from_base64().unwrap(), "foob".as_bytes());
+    assert_eq!("Zm9vYg".from_base64().unwrap(), "foob".as_bytes());
+    assert_eq!("Zm9vYmE=".from_base64().unwrap(), "fooba".as_bytes());
+    assert_eq!("Zm9vYmE".from_base64().unwrap(), "fooba".as_bytes());
+    assert_eq!("Zm9vYmFy".from_base64().unwrap(), "foobar".as_bytes());
+
+    "YWx\0pY2U==".from_base64().unwrap_err();
+
+}
+
+#[test]
+fn decode_1_pad_byte_in_fast_loop_then_extra_padding_chunk_error() {
+    for num_quads in 0..25 {
+        let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect();
+        s.push_str("YWxpY2U=====");
+
+        // since the first 8 bytes are handled in stage 1 or 2, the padding is detected as a
+        // generic invalid byte, not specifcally a padding issue.
+        // Could argue that the *next* padding byte (in the next quad) is technically the first
+        // erroneous one, but reporting that accurately is more complex and probably nobody cares
+        s.from_base64().unwrap_err();
+    }
+}
+
+#[test]
+fn decode_2_pad_bytes_in_leftovers_then_extra_padding_chunk_error() {
+    for num_quads in 0..25 {
+        let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect();
+        s.push_str("YWxpY2UABB====");
+
+        s.from_base64().unwrap_err();
+    }
+}
+
+#[test]
+fn decode_valid_bytes_after_padding_in_leftovers_error() {
+    for num_quads in 0..25 {
+        let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect();
+        s.push_str("YWxpY2UABB=B");
+
+        // 4 bytes after last 8-byte chunk, so it's decoded by stage 4.
+        // First (and only) padding byte is invalid.
+        s.from_base64().unwrap_err();
+    }
+}
+
+#[test]
+fn decode_absurd_pad_error() {
+    for num_quads in 0..25 {
+        let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect();
+        s.push_str("==Y=Wx===pY=2U=====");
+
+        // Plenty of remaining bytes, so handled by stage 1 or 2.
+        // first padding byte
+        s.from_base64().unwrap_err();
+    }
+}
+#[test]
+fn decode_extra_padding_after_1_pad_bytes_in_trailing_quad_returns_error() {
+    for num_quads in 0..25 {
+        let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect();
+        s.push_str("EEE===");
+
+        // handled by stage 1, 2, or 4 depending on length
+        // first padding byte -- which would be legal if it was the only padding
+        s.from_base64().unwrap_err();
+    }
+}
+
+#[test]
+fn decode_extra_padding_after_2_pad_bytes_in_trailing_quad_2_returns_error() {
+    for num_quads in 0..25 {
+        let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect();
+        s.push_str("EE====");
+
+        // handled by stage 1, 2, or 4 depending on length
+        // first padding byte -- which would be legal if it was by itself
+        s.from_base64().unwrap_err();
+    }
+}
+
+#[test]
+fn decode_start_quad_with_padding_returns_error() {
+    for num_quads in 0..25 {
+        // add enough padding to ensure that we'll hit all 4 stages at the different lengths
+        for pad_bytes in 1..32 {
+            let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect();
+            let padding: String = std::iter::repeat("=").take(pad_bytes).collect();
+            s.push_str(&padding);
+
+            s.from_base64().unwrap_err();
+        }
+    }
+}
+
+#[test]
+fn decode_padding_followed_by_non_padding_returns_error() {
+    for num_quads in 0..25 {
+        for pad_bytes in 0..31 {
+            let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect();
+            let padding: String = std::iter::repeat("=").take(pad_bytes).collect();
+            s.push_str(&padding);
+            s.push_str("E");
+
+            s.from_base64().unwrap_err();
+        }
+    }
+}
+
+#[test]
+fn decode_one_char_in_quad_with_padding_error() {
+    for num_quads in 0..25 {
+        let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect();
+        s.push_str("E=");
+
+        s.from_base64().unwrap_err();
+
+        // more padding doesn't change the error
+        s.push_str("=");
+        s.from_base64().unwrap_err();
+
+        s.push_str("=");
+        s.from_base64().unwrap_err();
+    }
+}
+
+#[test]
+fn decode_one_char_in_quad_without_padding_error() {
+    for num_quads in 0..25 {
+        let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect();
+        s.push('E');
+
+        s.from_base64().unwrap_err();
+    }
+}
+
+#[test]
+fn decode_reject_invalid_bytes_with_correct_error() {
+    for length in 1..100 {
+        for index in 0_usize..length {
+            for invalid_byte in "\x0B\x00%*.".bytes() {
+                let prefix: String = std::iter::repeat("A").take(index).collect();
+                let suffix: String = std::iter::repeat("B").take(length - index - 1).collect();
+
+                let input = prefix + &String::from_utf8(vec![invalid_byte]).unwrap() + &suffix;
+                assert_eq!(
+                    length,
+                    input.len(),
+                    "length {} error position {}",
+                    length,
+                    index
+                );
+
+                input.from_base64().unwrap_err();
+            }
+        }
+    }
+}
+
diff --git a/tests/encode.rs b/tests/encode.rs
@@ -0,0 +1,55 @@
+use b64_ct::*;
+
+const BASE64_PEM_WRAP: usize = 100000;
+
+static BASE64_PEM: b64_ct::Config = b64_ct::Config {
+    char_set: b64_ct::CharacterSet::Standard,
+    newline: b64_ct::Newline::LF,
+    pad: true,
+    line_length: Some(BASE64_PEM_WRAP),
+};
+
+#[test]
+fn test_constant_strings() {
+    assert_eq!("", "".as_bytes().to_base64(BASE64_PEM));
+    assert_eq!("Zg==", "f".as_bytes().to_base64(BASE64_PEM));
+    assert_eq!("Zm8=", "fo".as_bytes().to_base64(BASE64_PEM));
+    assert_eq!("Zm9y", "for".as_bytes().to_base64(BASE64_PEM));
+    assert_eq!("Zm9ydA==", "fort".as_bytes().to_base64(BASE64_PEM));
+    assert_eq!("Zm9v", "foo".as_bytes().to_base64(BASE64_PEM));
+    assert_eq!("Zm9vYg==", "foob".as_bytes().to_base64(BASE64_PEM));
+    assert_eq!("Zm9vYmE=", "fooba".as_bytes().to_base64(BASE64_PEM));
+    assert_eq!("Zm9vYmFy", "foobar".as_bytes().to_base64(BASE64_PEM));
+}
+
+#[test]
+fn encode_all_ascii() {
+    let mut ascii = Vec::<u8>::with_capacity(128);
+    for i in 0..128 {
+        ascii.push(i);
+    }
+
+
+    assert_eq!("AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7P\
+                D0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8\
+                =",
+               ascii.to_base64(BASE64_PEM));
+
+}
+
+#[test]
+fn encode_all_bytes() {
+    let mut bytes = Vec::<u8>::with_capacity(256);
+
+    for i in 0..255 {
+        bytes.push(i);
+    }
+    bytes.push(255); //bug with "overflowing" ranges?
+
+    assert_eq!(
+        "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7P\
+         D0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn\
+         +AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb6\
+         /wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7/P3+/w==",
+        bytes.to_base64(BASE64_PEM));
+}