From dfed9dd81c2f91947f235cf2d674e26449cfcadd Mon Sep 17 00:00:00 2001 From: grjte <91497953+grjte@users.noreply.github.com> Date: Wed, 30 Oct 2024 18:05:02 +0000 Subject: [PATCH] feat: add support for base64url alphabet (#24) * fix: decoding of padded input and add length assertions * feat: add support for base64url encoding * test: url safe encoding pad/no pad * feat: add support for base64url decoding * test: url safe decoding pad/no pad * docs: update README and example tests for configurability * docs: update costs for encode/decode - note: based on profiling, it seems that the previous costs were wrong and that the current costs have been the same since the reversed encoding/decoding was fixed in commit cc5b18af99c22069748863257d8c6480e04dbd4a. * chore: rename encoder/decoder config names --- README.md | 76 +++++++--- src/decoder.nr | 394 ++++++++++++++++++++++++++++++++++++++++++++++--- src/encoder.nr | 75 +++++++++- src/lib.nr | 40 ++++- 4 files changed, 545 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 84cce30..de2840c 100644 --- a/README.md +++ b/README.md @@ -2,38 +2,80 @@ A Base64 encoding/decoding library written in Noir which can encode arbitrary byte arrays into Base64 and decode Base64-encoded byte arrays (e.g. `"SGVsbG8gV29ybGQ=".as_bytes()`). -# Usage +## Usage +### Configuration +Start by selecting the encoder or decoder for your configuration. These are defined separately so that only one lookup table will be instantiated at a time, since many cases will require either an encoder or a decoder but not both. -### `fn base64_encode` -Takes an arbitrary byte array as input, unpacks it into Base64 values, then encodes each Base64 value into an ASCII character according to the [standard Base64 alphabet](https://datatracker.ietf.org/doc/html/rfc4648#section-4), to return a byte array representing the Base64 encoding. The encoded result is *not padded*, so padding must be handled separately. +RFC 4648 specifies multiple alphabets, including the [standard Base 64 Alphabet](https://datatracker.ietf.org/doc/html/rfc4648#section-4) known as `base64` and the ["URL and Filename Safe Alphabet"](https://datatracker.ietf.org/doc/html/rfc4648#section-5) known as `base64url`. It also specifies that [padding](https://datatracker.ietf.org/doc/html/rfc4648#section-3.2) should be required in the general case but can be explicitly omitted as an option. -### `fn base64_decode` -Takes an ASCII byte array that encodes a Base64 string and decodes it into bytes. Input data is expected to be unpadded, so padding characters will cause decoding to fail. +Available encoder configurations: +- `BASE64_ENCODER`: uses the standard alphabet (base64) and adds padding. +- `BASE64_NO_PAD_ENCODER`: uses the standard alphabet (base64), but omits padding. +- `BASE64_URL_ENCODER`: uses the "URL and Filename Safe Alphabet" (base64url) and omits padding, which is common for `base64url` when the length is implicitly known, as in this case. +- `BASE64_URL_WITH_PAD_ENCODER`: uses the "URL and Filename Safe Alphabet" (base64url) and adds padding. -### `fn base64_encode_elements` -Takes an input byte array of ASCII characters and produces an output byte array of base64-encoded characters. Data is not packed i.e. each output array element maps to a 6-bit base64 character. +Available decoder configurations: +- `BASE64_DECODER`: uses the standard alphabet (base64) and expects correct padding. +- `BASE64_NO_PAD_DECODER`: uses the standard alphabet (base64), but expects all padding characters to have been stripped, which is common for `base64url` when the length is implicitly known, as in this case. A padding character encountered during decoding will trigger an error. +- `BASE64_URL_DECODER`: uses the "URL and Filename Safe Alphabet" (base64url), but expects all padding characters to have been stripped. A padding character encountered during decoding will trigger an error. +- `BASE64_URL_WITH_PAD_DECODER`: uses the "URL and Filename Safe Alphabet" (base64url) and expects correct padding. -### `fn base64_decode_elements` -Takes an input byte array of base64 characters and produces an output byte array of ASCII characters. Input data is not packed i.e. each input element maps to a 6-bit base64 character. Input data is expected not to contain padding characters. Padding characters will cause decoding to fail. +### `fn encode` +Takes an arbitrary byte array as input, encodes it in Base64 according to the alphabet and padding rules specified by the configuration, then encodes each Base64 character into UTF-8 to return a byte array representing the Base64 encoding. -### Example usage +``` +// bytes: [u8; N] +let base64 = BASE64_ENCODER.encode(bytes); +``` + +### `fn decode` +Takes a utf-8 byte array that encodes a Base64 string and attempts to decoded it into bytes according to the provided configuration specifying the alphabet and padding rules. + +``` +// base64: [u8; N] +let bytes = BASE64_DECODER.decode(base64); +``` + +## Example usage (see tests in `lib.nr` for more examples) ``` -use dep::noir_base64; fn encode_and_decode() { let input: str<88> = "The quick brown fox jumps over the lazy dog, while 42 ravens perch atop a rusty mailbox."; - let base64_encoded: str<118> = "VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZywgd2hpbGUgNDIgcmF2ZW5zIHBlcmNoIGF0b3AgYSBydXN0eSBtYWlsYm94Lg"; + let base64_encoded = "VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZywgd2hpbGUgNDIgcmF2ZW5zIHBlcmNoIGF0b3AgYSBydXN0eSBtYWlsYm94Lg=="; - let encoded:[u8; 118] = noir_base64::base64_encode(input.as_bytes()); + let encoded:[u8; 120] = noir_base64::BASE64_ENCODER.encode(input.as_bytes()); assert(encoded == base64_encoded.as_bytes()); - let decoded: [u8; 88] = noir_base64::base64_decode(encoded); + let decoded: [u8; 88] = noir_base64::BASE64_DECODER.decode(encoded); assert(decoded == input.as_bytes()); } ``` -# Costs -- `base64_encode` will encode an array of 88 bytes in ~1182 gates, plus a ~64 gate cost to initialize the encoding lookup table (the initialization cost is incurred once regardless of the number of encodings). -- `base64_decode` will decode an array of 118 bytes in ~2150 gates, plus a ~256 gate cost to initialize the decoding lookup table (the initialization cost is incurred once regardless of the number of decodings). +## Costs + +All of the benchmarks below are for the [Barretenberg proving backend](https://github.com/AztecProtocol/aztec-packages/tree/master/barretenberg). + +After the initial setup cost it is often cheaper to decode than to encode, as shown by the numbers below where the encode/decode were run over the same pairs of unencoded and base64-encoded text. + +| UTF-8 Length | Base64 Length | # times | # Gates to Encode | # Gates to Decode | +| ------------ | ------------- | ------- | ----------------- | ----------------- | +| 12 | 16 | 1 | 2946 | 1065 | +| 12 | 16 | 2 | 3057 | 1114 | +| 12 | 16 | 3 | 3166 | 1163 | +| 610 | 816 | 1 | 7349 | 8062 | +| 610 | 816 | 2 | 10993 | 9181 | +| 610 | 816 | 3 | 14597 | 10239 | + +### `encode` +Costs are equivalent for all encoder configurations. + +- encoding an array of 12 bytes into 16 base64 characters requires ~110 gates plus an initial setup cost of ~2836 gates. (Gate counts for encoding the same array 1, 2, and 3 were 2946, 3057, 3166 respectively.) +- encoding an array of 610 input bytes requires ~3625 gates plus an initial setup cost of ~3700 gates. (Gate counts for encoding the same array 1, 2, 3, 4 times were 7349, 10993, 14597, and 18200 respectively.) + +### `decode` +Decoding padded inputs costs 1-2 gates more than decoding unpadded inputs. Since the difference is marginal, the numbers below are only for the padded case. + +- decoding an array of 16 base64 characters bytes into 12 bytes requires ~49 gates plus an initial setup cost of ~1016 gates. (Gate counts for encoding the same array 1, 2, and 3 times were 1065, 1114, and 1163 respectively.) +- decoding an array of 816 base64 characters (including padding) into 610 input bytes requires ~1060 gates plus an initial setup cost of ~7000 gates. (Gate counts for decoding the same array 1, 2, 3, 4 times were 8062, 9181, 10239, and 11298 respectively.) \ No newline at end of file diff --git a/src/decoder.nr b/src/decoder.nr index 0a2bc03..0553251 100644 --- a/src/decoder.nr +++ b/src/decoder.nr @@ -2,6 +2,8 @@ use super::defaults::BASE64_PADDING_CHAR; pub global STANDARD = Base64DecodeBE::new(true); pub global STANDARD_NO_PAD = Base64DecodeBE::new(false); +pub global URL_SAFE = Base64DecodeBE::base64url(false); +pub global URL_SAFE_WITH_PAD = Base64DecodeBE::base64url(true); global INVALID_VALUE: u8 = 255; struct Base64DecodeBE { @@ -278,6 +280,274 @@ impl Base64DecodeBE { } } + // Creates a new decoder that uses the URL and Filename Safe Alphabet specified in RFC 4648 + // https://datatracker.ietf.org/doc/html/rfc4648#section-5 + fn base64url(pad: bool) -> Self { + Base64DecodeBE { + table: [ + // 0-44 (no mapping) + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + 62, // 45 (-) + INVALID_VALUE, + INVALID_VALUE, // 46-47 (no mapping) + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, // 48-57 (0-9) + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, // 58-64 (no mapping) + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, // 65-90 (A-Z) + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, // 91-94 (no mapping) + 63, // 95 (_) + INVALID_VALUE, // 96 (no mapping) + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, // 97-122 (a-z) + // 123-255 + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + INVALID_VALUE, + ], + pad, + } + } + fn get(self, idx: Field) -> u8 { self.table[idx] } @@ -291,9 +561,37 @@ impl Base64DecodeBE { self, input: [u8; InputElements], ) -> [u8; OutputBytes] { + let rem = OutputBytes % 3; + // Calculate the number of padding characters and the length of the input without padding + let num_padding_chars = if rem == 1 { + 2 + } else if rem == 2 { + 1 + } else { + 0 + }; + + // Assert that the output length & input length are correct + // Every 3 output chars will be encoded as 4 base64 input chars + let encoded_length = (OutputBytes + 2) / 3 * 4; // ceil(input * 4 / 3) if self.pad { - // if the input length is not a multiple of 4, then it's not a valid base64 encoding - assert(InputElements % 4 == 0); + assert( + encoded_length == InputElements, + f"DecodeError: invalid input length for specified output length. Expected {encoded_length} input elements, but got {InputElements}.", + ); + // enforce Base64 padding is valid + if num_padding_chars == 2 { + assert(input[InputElements - 1] == BASE64_PADDING_CHAR); + assert(input[InputElements - 2] == BASE64_PADDING_CHAR); + } else if num_padding_chars == 1 { + assert(input[InputElements - 1] == BASE64_PADDING_CHAR); + } + } else { + let expected = encoded_length - num_padding_chars; + assert( + encoded_length - num_padding_chars == InputElements, + f"DecodeError: invalid input length for specified output length. Expected {expected} input elements, but got {InputElements}.", + ); } // 240 bits fits 40 6-bit chunks and 30 8-bit chunks @@ -330,18 +628,11 @@ impl Base64DecodeBE { // process the final chunk, which may contain padding let base64_offset: u32 = final_chunk * BASE64_ELEMENTS_PER_CHUNK; let byte_offset = final_chunk * BYTES_PER_CHUNK; - let mut base64_elements_in_final_chunk = InputElements - base64_offset; - - if self.pad { - // enforce Base64 padding is valid, then strip the padding - if (input[InputElements - 2] == BASE64_PADDING_CHAR) { - // if a non-padding byte follows a padding byte, the base64 is invalid - assert(input[InputElements - 1] == BASE64_PADDING_CHAR); - base64_elements_in_final_chunk -= 2; - } else if (input[InputElements - 1] == BASE64_PADDING_CHAR) { - base64_elements_in_final_chunk -= 1; - } - } + let base64_elements_in_final_chunk = if self.pad { + InputElements - base64_offset - num_padding_chars + } else { + InputElements - base64_offset + }; // pack the base64 values into the field element let mut slice: Field = 0; @@ -440,15 +731,23 @@ fn test_decode_max_byte() { let input: [u8; 2] = [47, 119]; // "/w" let result: [u8; 1] = STANDARD_NO_PAD.decode(input); assert(result == expected); + + let input: [u8; 4] = [95, 119, 61, 61]; // "_w==" + let result: [u8; 1] = URL_SAFE_WITH_PAD.decode(input); + assert(result == expected); + + let input: [u8; 2] = [95, 119]; // "_w" + let result: [u8; 1] = URL_SAFE.decode(input); + assert(result == expected); } #[test(should_fail_with = "DecodeError: invalid symbol 255, offset 0")] fn test_decode_invalid() { - let input: [u8; 1] = [255]; - let _: [u8; 0] = STANDARD_NO_PAD.decode(input); + let input: [u8; 2] = [255, 255]; + let _: [u8; 1] = STANDARD_NO_PAD.decode(input); } -#[test(should_fail_with = "DecodeError: invalid symbol 61, offset 3")] +#[test(should_fail_with = "DecodeError: invalid input length for specified output length. Expected 3 input elements, but got 4.")] fn test_decode_standard_no_pad_fail_with_padding() { // test decoding / and + let input: [u8; 4] = [47, 43, 65, 61]; @@ -457,6 +756,59 @@ fn test_decode_standard_no_pad_fail_with_padding() { assert(result == expected); } +#[test] +fn test_decode_standard() { + // test decoding / and + + let input: [u8; 4] = [47, 43, 65, 61]; + let expected: [u8; 2] = [255, 224]; + let result: [u8; 2] = STANDARD.decode(input); + + assert(result == expected); +} + +#[test(should_fail_with = "DecodeError: invalid symbol 95, offset 0")] +fn test_decode_underscore_with_standard() { + // test decoding _ and - + let input: [u8; 4] = [95, 45, 65, 61]; + let _: [u8; 2] = STANDARD.decode(input); +} + +#[test] +fn test_decode_url_safe_with_pad() { + // test decoding _ and - + let input: [u8; 4] = [95, 45, 65, 61]; + let expected: [u8; 2] = [255, 224]; + let result: [u8; 2] = URL_SAFE_WITH_PAD.decode(input); + + assert(result == expected); +} + +#[test(should_fail_with = "DecodeError: invalid symbol 47, offset 0")] +fn test_decode_slash_with_url_safe() { + // test decoding / and + + let input: [u8; 3] = [47, 43, 65]; + let _: [u8; 2] = URL_SAFE.decode(input); +} + +#[test] +fn test_decode_url_safe() { + // test decoding _ and - + let input: [u8; 3] = [95, 45, 65]; + let expected: [u8; 2] = [255, 224]; + let result: [u8; 2] = URL_SAFE.decode(input); + + assert(result == expected); +} + +#[test(should_fail_with = "DecodeError: invalid input length for specified output length. Expected 3 input elements, but got 4.")] +fn test_decode_url_safe_no_pad_fail_with_padding() { + // test decoding _ and - + let input: [u8; 4] = [95, 45, 65, 61]; + let expected: [u8; 2] = [255, 224]; + let result: [u8; 2] = URL_SAFE.decode(input); + assert(result == expected); +} + #[test] fn test_decode_ascii() { // base64: SGVsbG8gV29ybGQh @@ -469,6 +821,10 @@ fn test_decode_ascii() { assert(result == expected); let result: [u8; 12] = STANDARD_NO_PAD.decode(input); assert(result == expected); + let result: [u8; 12] = URL_SAFE_WITH_PAD.decode(input); + assert(result == expected); + let result: [u8; 12] = URL_SAFE.decode(input); + assert(result == expected); } #[test] @@ -489,6 +845,10 @@ fn test_decode_utf8() { assert(result == expected); let result: [u8; 27] = STANDARD_NO_PAD.decode(input); assert(result == expected); + let result: [u8; 27] = URL_SAFE_WITH_PAD.decode(input); + assert(result == expected); + let result: [u8; 27] = URL_SAFE.decode(input); + assert(result == expected); } #[test] diff --git a/src/encoder.nr b/src/encoder.nr index b4b515c..c11ab12 100644 --- a/src/encoder.nr +++ b/src/encoder.nr @@ -2,6 +2,8 @@ use super::defaults::BASE64_PADDING_CHAR; pub global STANDARD = Base64EncodeBE::new(true); pub global STANDARD_NO_PAD = Base64EncodeBE::new(false); +pub global URL_SAFE = Base64EncodeBE::base64url(false); +pub global URL_SAFE_WITH_PAD = Base64EncodeBE::base64url(true); struct Base64EncodeBE { // for some reason, if the lookup table is not defined in a struct, access costs are expensive and ROM tables aren't being used :/ @@ -30,6 +32,24 @@ impl Base64EncodeBE { } } + // Creates a new encoder that uses the URL and Filename Safe Alphabet specified in RFC 4648 + // https://datatracker.ietf.org/doc/html/rfc4648#section-5 + fn base64url(pad: bool) -> Self { + Base64EncodeBE { + table: [ + 65, 66, 67, 68, 69, 70, 71, 72, // A, B, C, D, E, F, G, H + 73, 74, 75, 76, 77, 78, 79, 80, // I, J, K, L, M, N, O, P + 81, 82, 83, 84, 85, 86, 87, 88, // Q, R, S, T, U, V, W, X + 89, 90, 97, 98, 99, 100, 101, 102, // Y, Z, a, b, c, d, e, f + 103, 104, 105, 106, 107, 108, 109, 110, // g, h, i, j, k, l, m, n + 111, 112, 113, 114, 115, 116, 117, 118, // o, p, q, r, s, t, u, v + 119, 120, 121, 122, 48, 49, 50, 51, // w, x, y, z, 0, 1, 2, 3 + 52, 53, 54, 55, 56, 57, 45, 95, // 4, 5, 6, 7, 8, 9, -, _ + ], + pad, + } + } + fn get(self, idx: Field) -> u8 { self.table[idx] } @@ -74,9 +94,15 @@ impl Base64EncodeBE { // Every 3 chars will be encoded as 4 base64 chars let encoded_length = (InputBytes + 2) / 3 * 4; // ceil(input * 4 / 3) if self.pad { - assert(encoded_length == OutputElements, "invalid output length"); + assert( + encoded_length == OutputElements, + f"EncodeError: invalid output length. Expected {encoded_length} output elements, but got {OutputElements}.", + ); } else { - assert(encoded_length - num_padding_chars == OutputElements, "invalid output length"); + assert( + encoded_length - num_padding_chars == OutputElements, + f"EncodeError: invalid output length. Expected {encoded_length} output elements, but got {OutputElements}.", + ); } let mut result: [u8; OutputElements] = [0; OutputElements]; @@ -238,6 +264,14 @@ fn test_encode_max_byte() { let result: [u8; 2] = STANDARD_NO_PAD.encode(input); let expected: [u8; 2] = [47, 119]; // "/w" assert(result == expected); + + let result: [u8; 4] = URL_SAFE_WITH_PAD.encode(input); + let expected: [u8; 4] = [95, 119, 61, 61]; // "_w==" + assert(result == expected); + + let result: [u8; 2] = URL_SAFE.encode(input); + let expected: [u8; 2] = [95, 119]; // "_w" + assert(result == expected); } #[test] @@ -252,6 +286,10 @@ fn test_encode_ascii() { assert(result == expected); let result = STANDARD_NO_PAD.encode(input); assert(result == expected); + let result = URL_SAFE_WITH_PAD.encode(input); + assert(result == expected); + let result = URL_SAFE.encode(input); + assert(result == expected); } #[test] @@ -272,6 +310,10 @@ fn test_encode_utf8() { assert(result == expected); let result = STANDARD_NO_PAD.encode(input); assert(result == expected); + let result = URL_SAFE_WITH_PAD.encode(input); + assert(result == expected); + let result = URL_SAFE.encode(input); + assert(result == expected); } #[test] @@ -367,6 +409,35 @@ fn test_encode_multi_chunks() { assert(result == expected); } +#[test] +fn test_encode_standard() { + // test encoding to / and + + let input: [u8; 2] = [255, 224]; + let expected: [u8; 4] = [47, 43, 65, 61]; + let result: [u8; 4] = STANDARD.encode(input); + + assert(result == expected); +} + +#[test] +fn test_encode_url_safe_with_pad() { + // test encoding to _ and - + let input: [u8; 2] = [255, 224]; + let expected: [u8; 4] = [95, 45, 65, 61]; + let result: [u8; 4] = URL_SAFE_WITH_PAD.encode(input); + + assert(result == expected); +} + +#[test] +fn test_encode_url_safe() { + // test encoding to _ and - + let input: [u8; 2] = [255, 224]; + let expected: [u8; 3] = [95, 45, 65]; + let result: [u8; 3] = URL_SAFE.encode(input); + assert(result == expected); +} + #[test] fn test_encode() { // Raw bh: GxMlgwLiypnVrE2C0Sf4yzhcWTkAhSZ5+WERhKhXtlU diff --git a/src/lib.nr b/src/lib.nr index e98c45a..aaf1c9e 100644 --- a/src/lib.nr +++ b/src/lib.nr @@ -1,26 +1,58 @@ +// Encodings use the alphabets and padding rules specified in RFC 4648 +// (https://datatracker.ietf.org/doc/html/rfc4648: +// +// A 65-character subset of US-ASCII is used, enabling 6 bits to be +// represented per printable character. (The extra 65th character, "=", +// is used to signify a special processing function.) +// +// The encoding process represents 24-bit groups of input bits as output +// strings of 4 encoded characters. Proceeding from left to right, a +// 24-bit input group is formed by concatenating 3 8-bit input groups. +// These 24 bits are then treated as 4 concatenated 6-bit groups, each +// of which is translated into a single character in the base 64 +// alphabet. +// +// Each 6-bit group is used as an index into an array of 64 printable +// characters. The character referenced by the index is placed in the +// output string. mod encoder; pub use encoder::{ - STANDARD as BASE64_ENCODER_STANDARD, STANDARD_NO_PAD as BASE64_ENCODER_STANDARD_NO_PAD, + STANDARD as BASE64_ENCODER, STANDARD_NO_PAD as BASE64_NO_PAD_ENCODER, + URL_SAFE as BASE64_URL_ENCODER, URL_SAFE_WITH_PAD as BASE64_URL_WITH_PAD_ENCODER, }; mod decoder; pub use decoder::{ - STANDARD as BASE64_DECODER_STANDARD, STANDARD_NO_PAD as BASE64_DECODER_STANDARD_NO_PAD, + STANDARD as BASE64_DECODER, STANDARD_NO_PAD as BASE64_NO_PAD_DECODER, + URL_SAFE as BASE64_URL_DECODER, URL_SAFE_WITH_PAD as BASE64_URL_WITH_PAD_DECODER, }; pub(crate) mod defaults { pub(crate) global BASE64_PADDING_CHAR: u8 = 61; } +#[test] +fn encode_and_decode() { + let input: str<88> = + "The quick brown fox jumps over the lazy dog, while 42 ravens perch atop a rusty mailbox."; + let base64_encoded = "VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZywgd2hpbGUgNDIgcmF2ZW5zIHBlcmNoIGF0b3AgYSBydXN0eSBtYWlsYm94Lg=="; + + let encoded: [u8; 120] = BASE64_ENCODER.encode(input.as_bytes()); + assert(encoded == base64_encoded.as_bytes()); + + let decoded: [u8; 88] = BASE64_DECODER.decode(encoded); + assert(decoded == input.as_bytes()); +} + #[test] fn encode_and_decode_no_pad() { let input: str<88> = "The quick brown fox jumps over the lazy dog, while 42 ravens perch atop a rusty mailbox."; let base64_encoded: str<118> = "VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZywgd2hpbGUgNDIgcmF2ZW5zIHBlcmNoIGF0b3AgYSBydXN0eSBtYWlsYm94Lg"; - let encoded: [u8; 118] = BASE64_ENCODER_STANDARD_NO_PAD.encode(input.as_bytes()); + let encoded: [u8; 118] = BASE64_NO_PAD_ENCODER.encode(input.as_bytes()); assert(encoded == base64_encoded.as_bytes()); - let decoded: [u8; 88] = BASE64_DECODER_STANDARD_NO_PAD.decode(encoded); + let decoded: [u8; 88] = BASE64_NO_PAD_DECODER.decode(encoded); assert(decoded == input.as_bytes()); }