From dfed9dd81c2f91947f235cf2d674e26449cfcadd Mon Sep 17 00:00:00 2001
From: grjte <91497953+grjte@users.noreply.github.com>
Date: Wed, 30 Oct 2024 18:05:02 +0000
Subject: [PATCH] feat: add support for base64url alphabet (#24)

* fix: decoding of padded input and add length assertions

* feat: add support for base64url encoding

* test: url safe encoding pad/no pad

* feat: add support for base64url decoding

* test: url safe decoding pad/no pad

* docs: update README and example tests for configurability

* docs: update costs for encode/decode

- note: based on profiling, it seems that the previous costs were wrong and that the current costs have been the same since the reversed encoding/decoding was fixed in commit cc5b18af99c22069748863257d8c6480e04dbd4a.

* chore: rename encoder/decoder config names
---
 README.md      |  76 +++++++---
 src/decoder.nr | 394 ++++++++++++++++++++++++++++++++++++++++++++++---
 src/encoder.nr |  75 +++++++++-
 src/lib.nr     |  40 ++++-
 4 files changed, 545 insertions(+), 40 deletions(-)

diff --git a/README.md b/README.md
index 84cce30..de2840c 100644
--- a/README.md
+++ b/README.md
@@ -2,38 +2,80 @@
 
 A Base64 encoding/decoding library written in Noir which can encode arbitrary byte arrays into Base64 and decode Base64-encoded byte arrays (e.g. `"SGVsbG8gV29ybGQ=".as_bytes()`).
 
-# Usage
+## Usage
+### Configuration
+Start by selecting the encoder or decoder for your configuration. These are defined separately so that only one lookup table will be instantiated at a time, since many cases will require either an encoder or a decoder but not both.
 
-### `fn base64_encode`
-Takes an arbitrary byte array as input, unpacks it into Base64 values, then encodes each Base64 value into an ASCII character according to the [standard Base64 alphabet](https://datatracker.ietf.org/doc/html/rfc4648#section-4), to return a byte array representing the Base64 encoding. The encoded result is *not padded*, so padding must be handled separately.
+RFC 4648 specifies multiple alphabets, including the [standard Base 64 Alphabet](https://datatracker.ietf.org/doc/html/rfc4648#section-4) known as `base64` and the ["URL and Filename Safe Alphabet"](https://datatracker.ietf.org/doc/html/rfc4648#section-5) known as `base64url`. It also specifies that [padding](https://datatracker.ietf.org/doc/html/rfc4648#section-3.2) should be required in the general case but can be explicitly omitted as an option.
 
-### `fn base64_decode`
-Takes an ASCII byte array that encodes a Base64 string and decodes it into bytes. Input data is expected to be unpadded, so padding characters will cause decoding to fail.
+Available encoder configurations:
+- `BASE64_ENCODER`: uses the standard alphabet (base64) and adds padding.
+- `BASE64_NO_PAD_ENCODER`: uses the standard alphabet (base64), but omits padding.
+- `BASE64_URL_ENCODER`: uses the "URL and Filename Safe Alphabet" (base64url) and omits padding, which is common for `base64url` when the length is implicitly known, as in this case.
+- `BASE64_URL_WITH_PAD_ENCODER`: uses the "URL and Filename Safe Alphabet" (base64url) and adds padding.
 
-### `fn base64_encode_elements`
-Takes an input byte array of ASCII characters and produces an output byte array of base64-encoded characters. Data is not packed i.e. each output array element maps to a 6-bit base64 character.
+Available decoder configurations:
+- `BASE64_DECODER`: uses the standard alphabet (base64) and expects correct padding.
+- `BASE64_NO_PAD_DECODER`: uses the standard alphabet (base64), but expects all padding characters to have been stripped, which is common for `base64url` when the length is implicitly known, as in this case. A padding character encountered during decoding will trigger an error.
+- `BASE64_URL_DECODER`: uses the "URL and Filename Safe Alphabet" (base64url), but expects all padding characters to have been stripped. A padding character encountered during decoding will trigger an error.
+- `BASE64_URL_WITH_PAD_DECODER`: uses the "URL and Filename Safe Alphabet" (base64url) and expects correct padding.
 
-### `fn base64_decode_elements`
-Takes an input byte array of base64 characters and produces an output byte array of ASCII characters. Input data is not packed i.e. each input element maps to a 6-bit base64 character. Input data is expected not to contain padding characters. Padding characters will cause decoding to fail.
+### `fn encode`
+Takes an arbitrary byte array as input, encodes it in Base64 according to the alphabet and padding rules specified by the configuration, then encodes each Base64 character into UTF-8 to return a byte array representing the Base64 encoding.
 
-### Example usage
+```
+// bytes: [u8; N]
+let base64 = BASE64_ENCODER.encode(bytes);
+```
+
+### `fn decode`
+Takes a utf-8 byte array that encodes a Base64 string and attempts to decoded it into bytes according to the provided configuration specifying the alphabet and padding rules.
+
+```
+// base64: [u8; N]
+let bytes = BASE64_DECODER.decode(base64);
+```
+
+## Example usage
 (see tests in `lib.nr` for more examples)
 
 ```
-use dep::noir_base64;
 fn encode_and_decode() {
     let input: str<88> = "The quick brown fox jumps over the lazy dog, while 42 ravens perch atop a rusty mailbox.";
-    let base64_encoded: str<118> = "VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZywgd2hpbGUgNDIgcmF2ZW5zIHBlcmNoIGF0b3AgYSBydXN0eSBtYWlsYm94Lg";
+    let base64_encoded = "VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZywgd2hpbGUgNDIgcmF2ZW5zIHBlcmNoIGF0b3AgYSBydXN0eSBtYWlsYm94Lg==";
 
-    let encoded:[u8; 118] = noir_base64::base64_encode(input.as_bytes());
+    let encoded:[u8; 120] = noir_base64::BASE64_ENCODER.encode(input.as_bytes());
     assert(encoded == base64_encoded.as_bytes());
 
-    let decoded: [u8; 88] = noir_base64::base64_decode(encoded);
+    let decoded: [u8; 88] = noir_base64::BASE64_DECODER.decode(encoded);
     assert(decoded == input.as_bytes());
 }
 ```
 
-# Costs
 
-- `base64_encode` will encode an array of 88 bytes in ~1182 gates, plus a ~64 gate cost to initialize the encoding lookup table (the initialization cost is incurred once regardless of the number of encodings).
-- `base64_decode` will decode an array of 118 bytes in ~2150 gates, plus a ~256 gate cost to initialize the decoding lookup table (the initialization cost is incurred once regardless of the number of decodings).
+## Costs
+
+All of the benchmarks below are for the [Barretenberg proving backend](https://github.com/AztecProtocol/aztec-packages/tree/master/barretenberg). 
+
+After the initial setup cost it is often cheaper to decode than to encode, as shown by the numbers below where the encode/decode were run over the same pairs of unencoded and base64-encoded text.
+
+| UTF-8 Length | Base64 Length | # times | # Gates to Encode | # Gates to Decode |
+| ------------ | ------------- | ------- | ----------------- | ----------------- |
+| 12           | 16            | 1       | 2946              | 1065              |
+| 12           | 16            | 2       | 3057              | 1114              |
+| 12           | 16            | 3       | 3166              | 1163              |
+| 610          | 816           | 1       | 7349              | 8062              |
+| 610          | 816           | 2       | 10993             | 9181              |
+| 610          | 816           | 3       | 14597             | 10239             |
+
+### `encode`
+Costs are equivalent for all encoder configurations. 
+
+- encoding an array of 12 bytes into 16 base64 characters requires ~110 gates plus an initial setup cost of ~2836 gates. (Gate counts for encoding the same array 1, 2, and 3 were 2946, 3057, 3166 respectively.)
+- encoding an array of 610 input bytes requires ~3625 gates plus an initial setup cost of ~3700 gates. (Gate counts for encoding the same array 1, 2, 3, 4 times were 7349, 10993, 14597, and 18200 respectively.)
+
+### `decode`
+Decoding padded inputs costs 1-2 gates more than decoding unpadded inputs. Since the difference is marginal, the numbers below are only for the padded case.
+
+- decoding an array of 16 base64 characters bytes into 12 bytes requires ~49 gates plus an initial setup cost of ~1016 gates. (Gate counts for encoding the same array 1, 2, and 3 times were 1065, 1114, and 1163 respectively.)
+- decoding an array of 816 base64 characters (including padding) into 610 input bytes requires ~1060 gates plus an initial setup cost of ~7000 gates. (Gate counts for decoding the same array 1, 2, 3, 4 times were 8062, 9181, 10239, and 11298 respectively.)
\ No newline at end of file
diff --git a/src/decoder.nr b/src/decoder.nr
index 0a2bc03..0553251 100644
--- a/src/decoder.nr
+++ b/src/decoder.nr
@@ -2,6 +2,8 @@ use super::defaults::BASE64_PADDING_CHAR;
 
 pub global STANDARD = Base64DecodeBE::new(true);
 pub global STANDARD_NO_PAD = Base64DecodeBE::new(false);
+pub global URL_SAFE = Base64DecodeBE::base64url(false);
+pub global URL_SAFE_WITH_PAD = Base64DecodeBE::base64url(true);
 
 global INVALID_VALUE: u8 = 255;
 struct Base64DecodeBE {
@@ -278,6 +280,274 @@ impl Base64DecodeBE {
         }
     }
 
+    // Creates a new decoder that uses the URL and Filename Safe Alphabet specified in RFC 4648
+    // https://datatracker.ietf.org/doc/html/rfc4648#section-5
+    fn base64url(pad: bool) -> Self {
+        Base64DecodeBE {
+            table: [
+                // 0-44 (no mapping)
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                62, // 45 (-)
+                INVALID_VALUE,
+                INVALID_VALUE, // 46-47 (no mapping)
+                52,
+                53,
+                54,
+                55,
+                56,
+                57,
+                58,
+                59,
+                60,
+                61, // 48-57 (0-9)
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE, // 58-64 (no mapping)
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+                17,
+                18,
+                19,
+                20,
+                21,
+                22,
+                23,
+                24,
+                25, // 65-90 (A-Z)
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE, // 91-94 (no mapping)
+                63, // 95 (_)
+                INVALID_VALUE, // 96 (no mapping)
+                26,
+                27,
+                28,
+                29,
+                30,
+                31,
+                32,
+                33,
+                34,
+                35,
+                36,
+                37,
+                38,
+                39,
+                40,
+                41,
+                42,
+                43,
+                44,
+                45,
+                46,
+                47,
+                48,
+                49,
+                50,
+                51, // 97-122 (a-z)
+                // 123-255
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+                INVALID_VALUE,
+            ],
+            pad,
+        }
+    }
+
     fn get(self, idx: Field) -> u8 {
         self.table[idx]
     }
@@ -291,9 +561,37 @@ impl Base64DecodeBE {
         self,
         input: [u8; InputElements],
     ) -> [u8; OutputBytes] {
+        let rem = OutputBytes % 3;
+        // Calculate the number of padding characters and the length of the input without padding
+        let num_padding_chars = if rem == 1 {
+            2
+        } else if rem == 2 {
+            1
+        } else {
+            0
+        };
+
+        // Assert that the output length & input length are correct
+        // Every 3 output chars will be encoded as 4 base64 input chars
+        let encoded_length = (OutputBytes + 2) / 3 * 4; // ceil(input * 4 / 3)
         if self.pad {
-            // if the input length is not a multiple of 4, then it's not a valid base64 encoding
-            assert(InputElements % 4 == 0);
+            assert(
+                encoded_length == InputElements,
+                f"DecodeError: invalid input length for specified output length. Expected {encoded_length} input elements, but got {InputElements}.",
+            );
+            // enforce Base64 padding is valid
+            if num_padding_chars == 2 {
+                assert(input[InputElements - 1] == BASE64_PADDING_CHAR);
+                assert(input[InputElements - 2] == BASE64_PADDING_CHAR);
+            } else if num_padding_chars == 1 {
+                assert(input[InputElements - 1] == BASE64_PADDING_CHAR);
+            }
+        } else {
+            let expected = encoded_length - num_padding_chars;
+            assert(
+                encoded_length - num_padding_chars == InputElements,
+                f"DecodeError: invalid input length for specified output length. Expected {expected} input elements, but got {InputElements}.",
+            );
         }
 
         // 240 bits fits 40 6-bit chunks and 30 8-bit chunks
@@ -330,18 +628,11 @@ impl Base64DecodeBE {
             // process the final chunk, which may contain padding
             let base64_offset: u32 = final_chunk * BASE64_ELEMENTS_PER_CHUNK;
             let byte_offset = final_chunk * BYTES_PER_CHUNK;
-            let mut base64_elements_in_final_chunk = InputElements - base64_offset;
-
-            if self.pad {
-                // enforce Base64 padding is valid, then strip the padding
-                if (input[InputElements - 2] == BASE64_PADDING_CHAR) {
-                    // if a non-padding byte follows a padding byte, the base64 is invalid
-                    assert(input[InputElements - 1] == BASE64_PADDING_CHAR);
-                    base64_elements_in_final_chunk -= 2;
-                } else if (input[InputElements - 1] == BASE64_PADDING_CHAR) {
-                    base64_elements_in_final_chunk -= 1;
-                }
-            }
+            let base64_elements_in_final_chunk = if self.pad {
+                InputElements - base64_offset - num_padding_chars
+            } else {
+                InputElements - base64_offset
+            };
 
             // pack the base64 values into the field element
             let mut slice: Field = 0;
@@ -440,15 +731,23 @@ fn test_decode_max_byte() {
     let input: [u8; 2] = [47, 119]; // "/w"
     let result: [u8; 1] = STANDARD_NO_PAD.decode(input);
     assert(result == expected);
+
+    let input: [u8; 4] = [95, 119, 61, 61]; // "_w=="
+    let result: [u8; 1] = URL_SAFE_WITH_PAD.decode(input);
+    assert(result == expected);
+
+    let input: [u8; 2] = [95, 119]; // "_w"
+    let result: [u8; 1] = URL_SAFE.decode(input);
+    assert(result == expected);
 }
 
 #[test(should_fail_with = "DecodeError: invalid symbol 255, offset 0")]
 fn test_decode_invalid() {
-    let input: [u8; 1] = [255];
-    let _: [u8; 0] = STANDARD_NO_PAD.decode(input);
+    let input: [u8; 2] = [255, 255];
+    let _: [u8; 1] = STANDARD_NO_PAD.decode(input);
 }
 
-#[test(should_fail_with = "DecodeError: invalid symbol 61, offset 3")]
+#[test(should_fail_with = "DecodeError: invalid input length for specified output length. Expected 3 input elements, but got 4.")]
 fn test_decode_standard_no_pad_fail_with_padding() {
     // test decoding / and +
     let input: [u8; 4] = [47, 43, 65, 61];
@@ -457,6 +756,59 @@ fn test_decode_standard_no_pad_fail_with_padding() {
     assert(result == expected);
 }
 
+#[test]
+fn test_decode_standard() {
+    // test decoding / and +
+    let input: [u8; 4] = [47, 43, 65, 61];
+    let expected: [u8; 2] = [255, 224];
+    let result: [u8; 2] = STANDARD.decode(input);
+
+    assert(result == expected);
+}
+
+#[test(should_fail_with = "DecodeError: invalid symbol 95, offset 0")]
+fn test_decode_underscore_with_standard() {
+    // test decoding _ and -
+    let input: [u8; 4] = [95, 45, 65, 61];
+    let _: [u8; 2] = STANDARD.decode(input);
+}
+
+#[test]
+fn test_decode_url_safe_with_pad() {
+    // test decoding _ and -
+    let input: [u8; 4] = [95, 45, 65, 61];
+    let expected: [u8; 2] = [255, 224];
+    let result: [u8; 2] = URL_SAFE_WITH_PAD.decode(input);
+
+    assert(result == expected);
+}
+
+#[test(should_fail_with = "DecodeError: invalid symbol 47, offset 0")]
+fn test_decode_slash_with_url_safe() {
+    // test decoding / and +
+    let input: [u8; 3] = [47, 43, 65];
+    let _: [u8; 2] = URL_SAFE.decode(input);
+}
+
+#[test]
+fn test_decode_url_safe() {
+    // test decoding _ and -
+    let input: [u8; 3] = [95, 45, 65];
+    let expected: [u8; 2] = [255, 224];
+    let result: [u8; 2] = URL_SAFE.decode(input);
+
+    assert(result == expected);
+}
+
+#[test(should_fail_with = "DecodeError: invalid input length for specified output length. Expected 3 input elements, but got 4.")]
+fn test_decode_url_safe_no_pad_fail_with_padding() {
+    // test decoding _ and -
+    let input: [u8; 4] = [95, 45, 65, 61];
+    let expected: [u8; 2] = [255, 224];
+    let result: [u8; 2] = URL_SAFE.decode(input);
+    assert(result == expected);
+}
+
 #[test]
 fn test_decode_ascii() {
     // base64: SGVsbG8gV29ybGQh
@@ -469,6 +821,10 @@ fn test_decode_ascii() {
     assert(result == expected);
     let result: [u8; 12] = STANDARD_NO_PAD.decode(input);
     assert(result == expected);
+    let result: [u8; 12] = URL_SAFE_WITH_PAD.decode(input);
+    assert(result == expected);
+    let result: [u8; 12] = URL_SAFE.decode(input);
+    assert(result == expected);
 }
 
 #[test]
@@ -489,6 +845,10 @@ fn test_decode_utf8() {
     assert(result == expected);
     let result: [u8; 27] = STANDARD_NO_PAD.decode(input);
     assert(result == expected);
+    let result: [u8; 27] = URL_SAFE_WITH_PAD.decode(input);
+    assert(result == expected);
+    let result: [u8; 27] = URL_SAFE.decode(input);
+    assert(result == expected);
 }
 
 #[test]
diff --git a/src/encoder.nr b/src/encoder.nr
index b4b515c..c11ab12 100644
--- a/src/encoder.nr
+++ b/src/encoder.nr
@@ -2,6 +2,8 @@ use super::defaults::BASE64_PADDING_CHAR;
 
 pub global STANDARD = Base64EncodeBE::new(true);
 pub global STANDARD_NO_PAD = Base64EncodeBE::new(false);
+pub global URL_SAFE = Base64EncodeBE::base64url(false);
+pub global URL_SAFE_WITH_PAD = Base64EncodeBE::base64url(true);
 
 struct Base64EncodeBE {
     // for some reason, if the lookup table is not defined in a struct, access costs are expensive and ROM tables aren't being used :/
@@ -30,6 +32,24 @@ impl Base64EncodeBE {
         }
     }
 
+    // Creates a new encoder that uses the URL and Filename Safe Alphabet specified in RFC 4648
+    // https://datatracker.ietf.org/doc/html/rfc4648#section-5
+    fn base64url(pad: bool) -> Self {
+        Base64EncodeBE {
+            table: [
+                65, 66, 67, 68, 69, 70, 71, 72, // A, B, C, D, E, F, G, H
+                73, 74, 75, 76, 77, 78, 79, 80, // I, J, K, L, M, N, O, P
+                81, 82, 83, 84, 85, 86, 87, 88, // Q, R, S, T, U, V, W, X
+                89, 90, 97, 98, 99, 100, 101, 102, // Y, Z, a, b, c, d, e, f
+                103, 104, 105, 106, 107, 108, 109, 110, // g, h, i, j, k, l, m, n
+                111, 112, 113, 114, 115, 116, 117, 118, // o, p, q, r, s, t, u, v
+                119, 120, 121, 122, 48, 49, 50, 51, // w, x, y, z, 0, 1, 2, 3
+                52, 53, 54, 55, 56, 57, 45, 95, // 4, 5, 6, 7, 8, 9, -, _
+            ],
+            pad,
+        }
+    }
+
     fn get(self, idx: Field) -> u8 {
         self.table[idx]
     }
@@ -74,9 +94,15 @@ impl Base64EncodeBE {
         // Every 3 chars will be encoded as 4 base64 chars
         let encoded_length = (InputBytes + 2) / 3 * 4; // ceil(input * 4 / 3)
         if self.pad {
-            assert(encoded_length == OutputElements, "invalid output length");
+            assert(
+                encoded_length == OutputElements,
+                f"EncodeError: invalid output length. Expected {encoded_length} output elements, but got {OutputElements}.",
+            );
         } else {
-            assert(encoded_length - num_padding_chars == OutputElements, "invalid output length");
+            assert(
+                encoded_length - num_padding_chars == OutputElements,
+                f"EncodeError: invalid output length. Expected {encoded_length} output elements, but got {OutputElements}.",
+            );
         }
 
         let mut result: [u8; OutputElements] = [0; OutputElements];
@@ -238,6 +264,14 @@ fn test_encode_max_byte() {
     let result: [u8; 2] = STANDARD_NO_PAD.encode(input);
     let expected: [u8; 2] = [47, 119]; // "/w"
     assert(result == expected);
+
+    let result: [u8; 4] = URL_SAFE_WITH_PAD.encode(input);
+    let expected: [u8; 4] = [95, 119, 61, 61]; // "_w=="
+    assert(result == expected);
+
+    let result: [u8; 2] = URL_SAFE.encode(input);
+    let expected: [u8; 2] = [95, 119]; // "_w"
+    assert(result == expected);
 }
 
 #[test]
@@ -252,6 +286,10 @@ fn test_encode_ascii() {
     assert(result == expected);
     let result = STANDARD_NO_PAD.encode(input);
     assert(result == expected);
+    let result = URL_SAFE_WITH_PAD.encode(input);
+    assert(result == expected);
+    let result = URL_SAFE.encode(input);
+    assert(result == expected);
 }
 
 #[test]
@@ -272,6 +310,10 @@ fn test_encode_utf8() {
     assert(result == expected);
     let result = STANDARD_NO_PAD.encode(input);
     assert(result == expected);
+    let result = URL_SAFE_WITH_PAD.encode(input);
+    assert(result == expected);
+    let result = URL_SAFE.encode(input);
+    assert(result == expected);
 }
 
 #[test]
@@ -367,6 +409,35 @@ fn test_encode_multi_chunks() {
     assert(result == expected);
 }
 
+#[test]
+fn test_encode_standard() {
+    // test encoding to / and +
+    let input: [u8; 2] = [255, 224];
+    let expected: [u8; 4] = [47, 43, 65, 61];
+    let result: [u8; 4] = STANDARD.encode(input);
+
+    assert(result == expected);
+}
+
+#[test]
+fn test_encode_url_safe_with_pad() {
+    // test encoding to _ and -
+    let input: [u8; 2] = [255, 224];
+    let expected: [u8; 4] = [95, 45, 65, 61];
+    let result: [u8; 4] = URL_SAFE_WITH_PAD.encode(input);
+
+    assert(result == expected);
+}
+
+#[test]
+fn test_encode_url_safe() {
+    // test encoding to _ and -
+    let input: [u8; 2] = [255, 224];
+    let expected: [u8; 3] = [95, 45, 65];
+    let result: [u8; 3] = URL_SAFE.encode(input);
+    assert(result == expected);
+}
+
 #[test]
 fn test_encode() {
     // Raw bh: GxMlgwLiypnVrE2C0Sf4yzhcWTkAhSZ5+WERhKhXtlU
diff --git a/src/lib.nr b/src/lib.nr
index e98c45a..aaf1c9e 100644
--- a/src/lib.nr
+++ b/src/lib.nr
@@ -1,26 +1,58 @@
+// Encodings use the alphabets and padding rules specified in RFC 4648
+// (https://datatracker.ietf.org/doc/html/rfc4648:
+//
+//    A 65-character subset of US-ASCII is used, enabling 6 bits to be
+//    represented per printable character.  (The extra 65th character, "=",
+//    is used to signify a special processing function.)
+//
+//    The encoding process represents 24-bit groups of input bits as output
+//    strings of 4 encoded characters.  Proceeding from left to right, a
+//    24-bit input group is formed by concatenating 3 8-bit input groups.
+//    These 24 bits are then treated as 4 concatenated 6-bit groups, each
+//    of which is translated into a single character in the base 64
+//    alphabet.
+//
+//    Each 6-bit group is used as an index into an array of 64 printable
+//    characters.  The character referenced by the index is placed in the
+//    output string.
 mod encoder;
 pub use encoder::{
-    STANDARD as BASE64_ENCODER_STANDARD, STANDARD_NO_PAD as BASE64_ENCODER_STANDARD_NO_PAD,
+    STANDARD as BASE64_ENCODER, STANDARD_NO_PAD as BASE64_NO_PAD_ENCODER,
+    URL_SAFE as BASE64_URL_ENCODER, URL_SAFE_WITH_PAD as BASE64_URL_WITH_PAD_ENCODER,
 };
 
 mod decoder;
 pub use decoder::{
-    STANDARD as BASE64_DECODER_STANDARD, STANDARD_NO_PAD as BASE64_DECODER_STANDARD_NO_PAD,
+    STANDARD as BASE64_DECODER, STANDARD_NO_PAD as BASE64_NO_PAD_DECODER,
+    URL_SAFE as BASE64_URL_DECODER, URL_SAFE_WITH_PAD as BASE64_URL_WITH_PAD_DECODER,
 };
 
 pub(crate) mod defaults {
     pub(crate) global BASE64_PADDING_CHAR: u8 = 61;
 }
 
+#[test]
+fn encode_and_decode() {
+    let input: str<88> =
+        "The quick brown fox jumps over the lazy dog, while 42 ravens perch atop a rusty mailbox.";
+    let base64_encoded = "VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZywgd2hpbGUgNDIgcmF2ZW5zIHBlcmNoIGF0b3AgYSBydXN0eSBtYWlsYm94Lg==";
+
+    let encoded: [u8; 120] = BASE64_ENCODER.encode(input.as_bytes());
+    assert(encoded == base64_encoded.as_bytes());
+
+    let decoded: [u8; 88] = BASE64_DECODER.decode(encoded);
+    assert(decoded == input.as_bytes());
+}
+
 #[test]
 fn encode_and_decode_no_pad() {
     let input: str<88> =
         "The quick brown fox jumps over the lazy dog, while 42 ravens perch atop a rusty mailbox.";
     let base64_encoded: str<118> = "VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZywgd2hpbGUgNDIgcmF2ZW5zIHBlcmNoIGF0b3AgYSBydXN0eSBtYWlsYm94Lg";
 
-    let encoded: [u8; 118] = BASE64_ENCODER_STANDARD_NO_PAD.encode(input.as_bytes());
+    let encoded: [u8; 118] = BASE64_NO_PAD_ENCODER.encode(input.as_bytes());
     assert(encoded == base64_encoded.as_bytes());
 
-    let decoded: [u8; 88] = BASE64_DECODER_STANDARD_NO_PAD.decode(encoded);
+    let decoded: [u8; 88] = BASE64_NO_PAD_DECODER.decode(encoded);
     assert(decoded == input.as_bytes());
 }