From 12705bb75d0503f59e6a830a9802b3dca01e7053 Mon Sep 17 00:00:00 2001 From: bumblefudge Date: Tue, 4 Jul 2023 19:06:23 +0200 Subject: [PATCH 01/31] first-stab-at-closing-4 --- README.md | 80 ++++++++++++++++++++++++++++----------------------- multibase.csv | 50 ++++++++++++++++---------------- 2 files changed, 69 insertions(+), 61 deletions(-) diff --git a/README.md b/README.md index 567e23c..a430788 100644 --- a/README.md +++ b/README.md @@ -27,19 +27,27 @@ cases. Unfortunately, it's not always clear *what* base encoding is used; that's where multibase comes in. It answers the question: -> Given data d encoded into text s, what base is it encoded with? +> Given binary data d encoded into text s, what base b was used to encode it? + +To answer this question, a single-byte prefix is added to d that renders (upon +b-encoding) as a one-character (or in the case of non-ASCII text like the emoji +alphabet, one-symbol) prefix to s. This prefix is the "code" that makes b +visible in s. ## Table of Contents -- [Format](#format) - - [Multibase Table](#multibase-table) -- [Multibase By Example](#multibase-by-example) -- [FAQ](#faq) -- [Implementations:](#implementations) -- [Disclaimers](#disclaimers) -- [Maintainers](#maintainers) -- [Contribute](#contribute) -- [License](#license) +- [multibase](#multibase) + - [Table of Contents](#table-of-contents) + - [Format](#format) + - [Multibase Table](#multibase-table) + - [Reserved](#reserved) + - [Status](#status) + - [Multibase By Example](#multibase-by-example) + - [FAQ](#faq) + - [Implementations:](#implementations) + - [Disclaimers](#disclaimers) + - [Contribute](#contribute) + - [License](#license) ## Format @@ -56,32 +64,32 @@ Where `` is used according to the multibase table. The current multibase table is [here](multibase.csv): ``` -encoding, code, description, status -identity, 0x00, 8-bit binary (encoder and decoder keeps data unmodified), default -base2, 0, binary (01010101), candidate -base8, 7, octal, draft -base10, 9, decimal, draft -base16, f, hexadecimal, default -base16upper, F, hexadecimal, default -base32hex, v, rfc4648 case-insensitive - no padding - highest char, candidate -base32hexupper, V, rfc4648 case-insensitive - no padding - highest char, candidate -base32hexpad, t, rfc4648 case-insensitive - with padding, candidate -base32hexpadupper, T, rfc4648 case-insensitive - with padding, candidate -base32, b, rfc4648 case-insensitive - no padding, default -base32upper, B, rfc4648 case-insensitive - no padding, default -base32pad, c, rfc4648 case-insensitive - with padding, candidate -base32padupper, C, rfc4648 case-insensitive - with padding, candidate -base32z, h, z-base-32 (used by Tahoe-LAFS), draft -base36, k, base36 [0-9a-z] case-insensitive - no padding, draft -base36upper, K, base36 [0-9a-z] case-insensitive - no padding, draft -base58btc, z, base58 bitcoin, default -base58flickr, Z, base58 flicker, candidate -base64, m, rfc4648 no padding, default -base64pad, M, rfc4648 with padding - MIME encoding, candidate -base64url, u, rfc4648 no padding, default -base64urlpad, U, rfc4648 with padding, default -proquint, p, PRO-QUINT https://arxiv.org/html/0901.4016, draft -base256emoji, 🚀, base256 with custom alphabet using variable-sized-codepoints, draft +encoding, code, description, status +identity, 0x00 (NUL), 8-bit binary (encoder and decoder keeps data unmodified), default +base2, 0x30 (0), binary (01010101), candidate +base8, 0x37 (7), octal, draft +base10, 0x39 (9), decimal, draft +base16, 0x66 (f), hexadecimal, default +base16upper, 0x46 (F), hexadecimal, default +base32hex, 0x76 (v), rfc4648 case-insensitive - no padding - highest char, candidate +base32hexupper, 0x56 (V), rfc4648 case-insensitive - no padding - highest char, candidate +base32hexpad, 0x74 (t), rfc4648 case-insensitive - with padding, candidate +base32hexpadupper, 0x54 (T), rfc4648 case-insensitive - with padding, candidate +base32, 0x62 (b), rfc4648 case-insensitive - no padding, default +base32upper, 0x42 (B), rfc4648 case-insensitive - no padding, default +base32pad, 0x63 (c), rfc4648 case-insensitive - with padding, candidate +base32padupper, 0x43 (C), rfc4648 case-insensitive - with padding, candidate +base32z, 0x68 (h), z-base-32 (used by Tahoe-LAFS), draft +base36, 0x6b (k), base36 [0-9a-z] case-insensitive - no padding, draft +base36upper, 0x4b (K), base36 [0-9a-z] case-insensitive - no padding, draft +base58btc, 0x7a (z), base58 bitcoin, default +base58flickr, 0x5a (Z), base58 flicker, candidate +base64, 0x6d (m), rfc4648 no padding, default +base64pad, 0x4d (M), rfc4648 with padding - MIME encoding, candidate +base64url, 0x75 (u), rfc4648 no padding, default +base64urlpad, 0x55 (U), rfc4648 with padding, default +proquint, 0x70 (p), PRO-QUINT https://arxiv.org/html/0901.4016, draft +base256emoji, 0xe7 (🚀), base256 with custom alphabet using variable-sized-codepoints, draft ``` **NOTE:** Multibase-prefixes are encoding agnostic. "z" is "z", not 0x7a ("z" encoded as ASCII/UTF-8). For example, in UTF-32, "z" would be `[0x7a, 0x00, 0x00, 0x00]`. diff --git a/multibase.csv b/multibase.csv index 7c7549d..e6f9cd9 100644 --- a/multibase.csv +++ b/multibase.csv @@ -1,26 +1,26 @@ encoding, code, description, status -identity, 0x00, 8-bit binary (encoder and decoder keeps data unmodified), default -base2, 0, binary (01010101), candidate -base8, 7, octal, draft -base10, 9, decimal, draft -base16, f, hexadecimal, default -base16upper, F, hexadecimal, default -base32hex, v, rfc4648 case-insensitive - no padding - highest char, candidate -base32hexupper, V, rfc4648 case-insensitive - no padding - highest char, candidate -base32hexpad, t, rfc4648 case-insensitive - with padding, candidate -base32hexpadupper, T, rfc4648 case-insensitive - with padding, candidate -base32, b, rfc4648 case-insensitive - no padding, default -base32upper, B, rfc4648 case-insensitive - no padding, default -base32pad, c, rfc4648 case-insensitive - with padding, candidate -base32padupper, C, rfc4648 case-insensitive - with padding, candidate -base32z, h, z-base-32 (used by Tahoe-LAFS), draft -base36, k, base36 [0-9a-z] case-insensitive - no padding, draft -base36upper, K, base36 [0-9a-z] case-insensitive - no padding, draft -base58btc, z, base58 bitcoin, default -base58flickr, Z, base58 flicker, candidate -base64, m, rfc4648 no padding, default -base64pad, M, rfc4648 with padding - MIME encoding, candidate -base64url, u, rfc4648 no padding, default -base64urlpad, U, rfc4648 with padding, default -proquint, p, PRO-QUINT https://arxiv.org/html/0901.4016, draft -base256emoji, 🚀, base256 with custom alphabet using variable-sized-codepoints, draft +identity, 0x00 (NUL), 8-bit binary (encoder and decoder keeps data unmodified), default +base2, 0x30 (0), binary (01010101), candidate +base8, 0x37 (7), octal, draft +base10, 0x39 (9), decimal, draft +base16, 0x66 (f), hexadecimal, default +base16upper, 0x46 (F), hexadecimal, default +base32hex, 0x76 (v), rfc4648 case-insensitive - no padding - highest char, candidate +base32hexupper, 0x56 (V), rfc4648 case-insensitive - no padding - highest char, candidate +base32hexpad, 0x74 (t), rfc4648 case-insensitive - with padding, candidate +base32hexpadupper, 0x54 (T), rfc4648 case-insensitive - with padding, candidate +base32, 0x62 (b), rfc4648 case-insensitive - no padding, default +base32upper, 0x42 (B), rfc4648 case-insensitive - no padding, default +base32pad, 0x63 (c), rfc4648 case-insensitive - with padding, candidate +base32padupper, 0x43 (C), rfc4648 case-insensitive - with padding, candidate +base32z, 0x68 (h), z-base-32 (used by Tahoe-LAFS), draft +base36, 0x6b (k), base36 [0-9a-z] case-insensitive - no padding, draft +base36upper, 0x4b (K), base36 [0-9a-z] case-insensitive - no padding, draft +base58btc, 0x7a (z), base58 bitcoin, default +base58flickr, 0x5a (Z), base58 flicker, candidate +base64, 0x6d (m), rfc4648 no padding, default +base64pad, 0x4d (M), rfc4648 with padding - MIME encoding, candidate +base64url, 0x75 (u), rfc4648 no padding, default +base64urlpad, 0x55 (U), rfc4648 with padding, default +proquint, 0x70 (p), PRO-QUINT https://arxiv.org/html/0901.4016, draft +base256emoji, 0xe7 (🚀), base256 with custom alphabet using variable-sized-codepoints, draft From b7307ea2907a37a4d5637e06cb1c1ef73b4dcaaf Mon Sep 17 00:00:00 2001 From: bumblefudge Date: Tue, 4 Jul 2023 20:00:13 +0200 Subject: [PATCH 02/31] second-stab-thanks-to-@aarongoldman --- README.md | 60 ++++++++++++++++++++++++++------------------------- multibase.csv | 52 ++++++++++++++++++++++---------------------- 2 files changed, 57 insertions(+), 55 deletions(-) diff --git a/README.md b/README.md index a430788..9f5e421 100644 --- a/README.md +++ b/README.md @@ -29,10 +29,12 @@ multibase comes in. It answers the question: > Given binary data d encoded into text s, what base b was used to encode it? -To answer this question, a single-byte prefix is added to d that renders (upon +To answer this question, a binary prefix `bp` is added to `d` that renders (upon b-encoding) as a one-character (or in the case of non-ASCII text like the emoji -alphabet, one-symbol) prefix to s. This prefix is the "code" that makes b -visible in s. +alphabet, one-symbol) prefix, `sp`. This prefix `sp` is the "code" that makes +`b` visible in `s`. For most entries, `bp` is a single byte in UTF-8, but in +the case of the emoji alphabet, UTF-16 is required to achieve a single-byte +binary prefix. ## Table of Contents @@ -64,32 +66,32 @@ Where `` is used according to the multibase table. The current multibase table is [here](multibase.csv): ``` -encoding, code, description, status -identity, 0x00 (NUL), 8-bit binary (encoder and decoder keeps data unmodified), default -base2, 0x30 (0), binary (01010101), candidate -base8, 0x37 (7), octal, draft -base10, 0x39 (9), decimal, draft -base16, 0x66 (f), hexadecimal, default -base16upper, 0x46 (F), hexadecimal, default -base32hex, 0x76 (v), rfc4648 case-insensitive - no padding - highest char, candidate -base32hexupper, 0x56 (V), rfc4648 case-insensitive - no padding - highest char, candidate -base32hexpad, 0x74 (t), rfc4648 case-insensitive - with padding, candidate -base32hexpadupper, 0x54 (T), rfc4648 case-insensitive - with padding, candidate -base32, 0x62 (b), rfc4648 case-insensitive - no padding, default -base32upper, 0x42 (B), rfc4648 case-insensitive - no padding, default -base32pad, 0x63 (c), rfc4648 case-insensitive - with padding, candidate -base32padupper, 0x43 (C), rfc4648 case-insensitive - with padding, candidate -base32z, 0x68 (h), z-base-32 (used by Tahoe-LAFS), draft -base36, 0x6b (k), base36 [0-9a-z] case-insensitive - no padding, draft -base36upper, 0x4b (K), base36 [0-9a-z] case-insensitive - no padding, draft -base58btc, 0x7a (z), base58 bitcoin, default -base58flickr, 0x5a (Z), base58 flicker, candidate -base64, 0x6d (m), rfc4648 no padding, default -base64pad, 0x4d (M), rfc4648 with padding - MIME encoding, candidate -base64url, 0x75 (u), rfc4648 no padding, default -base64urlpad, 0x55 (U), rfc4648 with padding, default -proquint, 0x70 (p), PRO-QUINT https://arxiv.org/html/0901.4016, draft -base256emoji, 0xe7 (🚀), base256 with custom alphabet using variable-sized-codepoints, draft +encoding, code,prefix (UTF-8), description, status, comments +identity, NUL, 0x00, 8-bit binary (encoder and decoder keeps data unmodified), default, +base2, 0, 0x30, binary (01010101), candidate +base8, 7, 0x37, octal, draft +base10, 9, 0x39, decimal, draft +base16, f, 0x66, hexadecimal, default +base16upper, F, 0x46, hexadecimal, default +base32hex, v, 0x76, rfc4648 case-insensitive - no padding - highest char, candidate +base32hexupper, V, 0x56, rfc4648 case-insensitive - no padding - highest char, candidate +base32hexpad, t, 0x74, rfc4648 case-insensitive - with padding, candidate +base32hexpadupper, T, 0x54, rfc4648 case-insensitive - with padding, candidate +base32, b, 0x62, rfc4648 case-insensitive - no padding, default +base32upper, B, 0x42, rfc4648 case-insensitive - no padding, default +base32pad, c, 0x63, rfc4648 case-insensitive - with padding, candidate +base32padupper, C, 0x43, rfc4648 case-insensitive - with padding, candidate +base32z, h, 0x68, z-base-32 (used by Tahoe-LAFS), draft +base36, k, 0x6b, base36 [0-9a-z] case-insensitive - no padding, draft +base36upper, K, 0x4b, base36 [0-9a-z] case-insensitive - no padding, draft +base58btc, z, 0x7a, base58 bitcoin, default +base58flickr, Z, 0x5a, base58 flicker, candidate +base64, m, 0x6d, rfc4648 no padding, default +base64pad, M, 0x4d, rfc4648 with padding - MIME encoding, candidate +base64url, u, 0x75, rfc4648 no padding, default +base64urlpad, U, 0x55, rfc4648 with padding, default +proquint, p, 0x70, PRO-QUINT https://arxiv.org/html/0901.4016, draft +base256emoji, 🚀, 0x0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints (prefix 0xe7 in UTF-16), draft ``` **NOTE:** Multibase-prefixes are encoding agnostic. "z" is "z", not 0x7a ("z" encoded as ASCII/UTF-8). For example, in UTF-32, "z" would be `[0x7a, 0x00, 0x00, 0x00]`. diff --git a/multibase.csv b/multibase.csv index e6f9cd9..4e38488 100644 --- a/multibase.csv +++ b/multibase.csv @@ -1,26 +1,26 @@ -encoding, code, description, status -identity, 0x00 (NUL), 8-bit binary (encoder and decoder keeps data unmodified), default -base2, 0x30 (0), binary (01010101), candidate -base8, 0x37 (7), octal, draft -base10, 0x39 (9), decimal, draft -base16, 0x66 (f), hexadecimal, default -base16upper, 0x46 (F), hexadecimal, default -base32hex, 0x76 (v), rfc4648 case-insensitive - no padding - highest char, candidate -base32hexupper, 0x56 (V), rfc4648 case-insensitive - no padding - highest char, candidate -base32hexpad, 0x74 (t), rfc4648 case-insensitive - with padding, candidate -base32hexpadupper, 0x54 (T), rfc4648 case-insensitive - with padding, candidate -base32, 0x62 (b), rfc4648 case-insensitive - no padding, default -base32upper, 0x42 (B), rfc4648 case-insensitive - no padding, default -base32pad, 0x63 (c), rfc4648 case-insensitive - with padding, candidate -base32padupper, 0x43 (C), rfc4648 case-insensitive - with padding, candidate -base32z, 0x68 (h), z-base-32 (used by Tahoe-LAFS), draft -base36, 0x6b (k), base36 [0-9a-z] case-insensitive - no padding, draft -base36upper, 0x4b (K), base36 [0-9a-z] case-insensitive - no padding, draft -base58btc, 0x7a (z), base58 bitcoin, default -base58flickr, 0x5a (Z), base58 flicker, candidate -base64, 0x6d (m), rfc4648 no padding, default -base64pad, 0x4d (M), rfc4648 with padding - MIME encoding, candidate -base64url, 0x75 (u), rfc4648 no padding, default -base64urlpad, 0x55 (U), rfc4648 with padding, default -proquint, 0x70 (p), PRO-QUINT https://arxiv.org/html/0901.4016, draft -base256emoji, 0xe7 (🚀), base256 with custom alphabet using variable-sized-codepoints, draft +encoding, code,prefix (UTF-8), description, status, comments +identity, NUL, 0x00, 8-bit binary (encoder and decoder keeps data unmodified), default, +base2, 0, 0x30, binary (01010101), candidate +base8, 7, 0x37, octal, draft +base10, 9, 0x39, decimal, draft +base16, f, 0x66, hexadecimal, default +base16upper, F, 0x46, hexadecimal, default +base32hex, v, 0x76, rfc4648 case-insensitive - no padding - highest char, candidate +base32hexupper, V, 0x56, rfc4648 case-insensitive - no padding - highest char, candidate +base32hexpad, t, 0x74, rfc4648 case-insensitive - with padding, candidate +base32hexpadupper, T, 0x54, rfc4648 case-insensitive - with padding, candidate +base32, b, 0x62, rfc4648 case-insensitive - no padding, default +base32upper, B, 0x42, rfc4648 case-insensitive - no padding, default +base32pad, c, 0x63, rfc4648 case-insensitive - with padding, candidate +base32padupper, C, 0x43, rfc4648 case-insensitive - with padding, candidate +base32z, h, 0x68, z-base-32 (used by Tahoe-LAFS), draft +base36, k, 0x6b, base36 [0-9a-z] case-insensitive - no padding, draft +base36upper, K, 0x4b, base36 [0-9a-z] case-insensitive - no padding, draft +base58btc, z, 0x7a, base58 bitcoin, default +base58flickr, Z, 0x5a, base58 flicker, candidate +base64, m, 0x6d, rfc4648 no padding, default +base64pad, M, 0x4d, rfc4648 with padding - MIME encoding, candidate +base64url, u, 0x75, rfc4648 no padding, default +base64urlpad, U, 0x55, rfc4648 with padding, default +proquint, p, 0x70, PRO-QUINT https://arxiv.org/html/0901.4016, draft +base256emoji, 🚀, 0x0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints (prefix 0xe7 in UTF-16), draft From e380b706e50d01c05bcf7324bb71de542a244b79 Mon Sep 17 00:00:00 2001 From: bumblefudge Date: Tue, 4 Jul 2023 20:06:13 +0200 Subject: [PATCH 03/31] typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9f5e421..36666ca 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ base64pad, M, 0x4d, rfc4648 with padding - MIME encoding, base64url, u, 0x75, rfc4648 no padding, default base64urlpad, U, 0x55, rfc4648 with padding, default proquint, p, 0x70, PRO-QUINT https://arxiv.org/html/0901.4016, draft -base256emoji, 🚀, 0x0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints (prefix 0xe7 in UTF-16), draft +base256emoji, 🚀, 0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints (prefix 0xe7 in UTF-16), draft ``` **NOTE:** Multibase-prefixes are encoding agnostic. "z" is "z", not 0x7a ("z" encoded as ASCII/UTF-8). For example, in UTF-32, "z" would be `[0x7a, 0x00, 0x00, 0x00]`. From 5c7594a3a4cba10feac635ce534044e38d6560d7 Mon Sep 17 00:00:00 2001 From: bumblefudge Date: Tue, 4 Jul 2023 20:30:53 +0200 Subject: [PATCH 04/31] remove confusing 0xe7 reference for now --- README.md | 16 ++++++++-------- multibase.csv | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 36666ca..afed7e1 100644 --- a/README.md +++ b/README.md @@ -29,12 +29,12 @@ multibase comes in. It answers the question: > Given binary data d encoded into text s, what base b was used to encode it? -To answer this question, a binary prefix `bp` is added to `d` that renders (upon -b-encoding) as a one-character (or in the case of non-ASCII text like the emoji -alphabet, one-symbol) prefix, `sp`. This prefix `sp` is the "code" that makes -`b` visible in `s`. For most entries, `bp` is a single byte in UTF-8, but in -the case of the emoji alphabet, UTF-16 is required to achieve a single-byte -binary prefix. +To answer this question, a binary prefix `dp` is added to `d` that renders (upon +b-encoding) as a one-character (or in the case of non-ASCII text like the +Unicode emoji alphabet, a one-symbol) prefix, `sp`. This prefix `sp` is the +"code" that makes `b` visible in `s`. For most entries, `dp` is a single byte +in UTF-8, but in the case of the emoji alphabet, the single-byte property may +only be present in other encodings. ## Table of Contents @@ -91,10 +91,10 @@ base64pad, M, 0x4d, rfc4648 with padding - MIME encoding, base64url, u, 0x75, rfc4648 no padding, default base64urlpad, U, 0x55, rfc4648 with padding, default proquint, p, 0x70, PRO-QUINT https://arxiv.org/html/0901.4016, draft -base256emoji, 🚀, 0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints (prefix 0xe7 in UTF-16), draft +base256emoji, 🚀, 0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints, draft ``` -**NOTE:** Multibase-prefixes are encoding agnostic. "z" is "z", not 0x7a ("z" encoded as ASCII/UTF-8). For example, in UTF-32, "z" would be `[0x7a, 0x00, 0x00, 0x00]`. +**NOTE:** Multibase-prefixes are encoding agnostic, and conformance is tested by the prefix code in the string, not the binary prefix, however encoded. I.e., the code is `z` (in BTC-alphabet base 58), not `0x7a` (the binary that becomes the `z` encoded in ASCII/UTF-8). Note that the same binary, in UTF-32, would be `[0x7a, 0x00, 0x00, 0x00]` to produce the same `z` with the base and alphabet. ## Reserved diff --git a/multibase.csv b/multibase.csv index 4e38488..2e445e1 100644 --- a/multibase.csv +++ b/multibase.csv @@ -23,4 +23,4 @@ base64pad, M, 0x4d, rfc4648 with padding - MIME encoding, base64url, u, 0x75, rfc4648 no padding, default base64urlpad, U, 0x55, rfc4648 with padding, default proquint, p, 0x70, PRO-QUINT https://arxiv.org/html/0901.4016, draft -base256emoji, 🚀, 0x0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints (prefix 0xe7 in UTF-16), draft +base256emoji, 🚀, 0x0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints, draft From b0bf557aa37aa243378ad9faba5f20177c5c7dc2 Mon Sep 17 00:00:00 2001 From: bumblefudge Date: Tue, 4 Jul 2023 20:58:17 +0200 Subject: [PATCH 05/31] third attempt to clarify that base256emoji as codepoint --- README.md | 12 +++++------- multibase.csv | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index afed7e1..5303d8c 100644 --- a/README.md +++ b/README.md @@ -30,11 +30,9 @@ multibase comes in. It answers the question: > Given binary data d encoded into text s, what base b was used to encode it? To answer this question, a binary prefix `dp` is added to `d` that renders (upon -b-encoding) as a one-character (or in the case of non-ASCII text like the -Unicode emoji alphabet, a one-symbol) prefix, `sp`. This prefix `sp` is the -"code" that makes `b` visible in `s`. For most entries, `dp` is a single byte -in UTF-8, but in the case of the emoji alphabet, the single-byte property may -only be present in other encodings. +b-encoding) as a one-codepoint prefix, `sp` (note: most entries only concern +themselves with UTF-8 encodings where one codepoint = one character). This +prefix `sp` is the "code" that makes `b` visible in `s`. ## Table of Contents @@ -91,10 +89,10 @@ base64pad, M, 0x4d, rfc4648 with padding - MIME encoding, base64url, u, 0x75, rfc4648 no padding, default base64urlpad, U, 0x55, rfc4648 with padding, default proquint, p, 0x70, PRO-QUINT https://arxiv.org/html/0901.4016, draft -base256emoji, 🚀, 0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints, draft +base256emoji, 🚀 (Unicode U+1F680), 0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints, draft ``` -**NOTE:** Multibase-prefixes are encoding agnostic, and conformance is tested by the prefix code in the string, not the binary prefix, however encoded. I.e., the code is `z` (in BTC-alphabet base 58), not `0x7a` (the binary that becomes the `z` encoded in ASCII/UTF-8). Note that the same binary, in UTF-32, would be `[0x7a, 0x00, 0x00, 0x00]` to produce the same `z` with the base and alphabet. +**NOTE:** Multibase-prefixes are encoding agnostic, and conformance is tested by the prefix code in the string, not the binary prefix, however encoded. I.e., the code is the `z` codepoint (in BTC-alphabet base 58), not `0x7a` (the binary that becomes the `z` encoded in ASCII/UTF-8). Note that the same binary, in UTF-32, would be `[0x7a, 0x00, 0x00, 0x00]` to produce the same `z` with the base and alphabet. ## Reserved diff --git a/multibase.csv b/multibase.csv index 2e445e1..1c67ee3 100644 --- a/multibase.csv +++ b/multibase.csv @@ -23,4 +23,4 @@ base64pad, M, 0x4d, rfc4648 with padding - MIME encoding, base64url, u, 0x75, rfc4648 no padding, default base64urlpad, U, 0x55, rfc4648 with padding, default proquint, p, 0x70, PRO-QUINT https://arxiv.org/html/0901.4016, draft -base256emoji, 🚀, 0x0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints, draft +base256emoji, 🚀 (Unicode U+1F680), 0x0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints, draft From d451436d3c9356e133e8ab6e00fd4697c4998539 Mon Sep 17 00:00:00 2001 From: Bumblefudge Date: Wed, 5 Jul 2023 16:17:08 +0200 Subject: [PATCH 06/31] typo fix (thanks rvagg) Co-authored-by: Rod Vagg --- multibase.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multibase.csv b/multibase.csv index 1c67ee3..50d9489 100644 --- a/multibase.csv +++ b/multibase.csv @@ -23,4 +23,4 @@ base64pad, M, 0x4d, rfc4648 with padding - MIME encoding, base64url, u, 0x75, rfc4648 no padding, default base64urlpad, U, 0x55, rfc4648 with padding, default proquint, p, 0x70, PRO-QUINT https://arxiv.org/html/0901.4016, draft -base256emoji, 🚀 (Unicode U+1F680), 0x0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints, draft +base256emoji, 🚀 (Unicode U+1F680), 0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints, draft From a39f8ace71ce99bc380e64da229255a3ef77f133 Mon Sep 17 00:00:00 2001 From: bumblefudge Date: Wed, 5 Jul 2023 16:22:09 +0200 Subject: [PATCH 07/31] tab-aligning tables --- README.md | 52 +++++++++++++++++++++++++-------------------------- multibase.csv | 52 +++++++++++++++++++++++++-------------------------- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 5303d8c..260cbcd 100644 --- a/README.md +++ b/README.md @@ -64,32 +64,32 @@ Where `` is used according to the multibase table. The current multibase table is [here](multibase.csv): ``` -encoding, code,prefix (UTF-8), description, status, comments -identity, NUL, 0x00, 8-bit binary (encoder and decoder keeps data unmodified), default, -base2, 0, 0x30, binary (01010101), candidate -base8, 7, 0x37, octal, draft -base10, 9, 0x39, decimal, draft -base16, f, 0x66, hexadecimal, default -base16upper, F, 0x46, hexadecimal, default -base32hex, v, 0x76, rfc4648 case-insensitive - no padding - highest char, candidate -base32hexupper, V, 0x56, rfc4648 case-insensitive - no padding - highest char, candidate -base32hexpad, t, 0x74, rfc4648 case-insensitive - with padding, candidate -base32hexpadupper, T, 0x54, rfc4648 case-insensitive - with padding, candidate -base32, b, 0x62, rfc4648 case-insensitive - no padding, default -base32upper, B, 0x42, rfc4648 case-insensitive - no padding, default -base32pad, c, 0x63, rfc4648 case-insensitive - with padding, candidate -base32padupper, C, 0x43, rfc4648 case-insensitive - with padding, candidate -base32z, h, 0x68, z-base-32 (used by Tahoe-LAFS), draft -base36, k, 0x6b, base36 [0-9a-z] case-insensitive - no padding, draft -base36upper, K, 0x4b, base36 [0-9a-z] case-insensitive - no padding, draft -base58btc, z, 0x7a, base58 bitcoin, default -base58flickr, Z, 0x5a, base58 flicker, candidate -base64, m, 0x6d, rfc4648 no padding, default -base64pad, M, 0x4d, rfc4648 with padding - MIME encoding, candidate -base64url, u, 0x75, rfc4648 no padding, default -base64urlpad, U, 0x55, rfc4648 with padding, default -proquint, p, 0x70, PRO-QUINT https://arxiv.org/html/0901.4016, draft -base256emoji, 🚀 (Unicode U+1F680), 0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints, draft +encoding, code, (UTF-8), description, status, comments +identity, NUL, 0x00, 8-bit binary (encoder and decoder keeps data unmodified), default, +base2, 0, 0x30, binary (01010101), candidate +base8, 7, 0x37, octal, draft +base10, 9, 0x39, decimal, draft +base16, f, 0x66, hexadecimal, default +base16upper, F, 0x46, hexadecimal, default +base32hex, v, 0x76, rfc4648 case-insensitive - no padding - highest char, candidate +base32hexupper, V, 0x56, rfc4648 case-insensitive - no padding - highest char, candidate +base32hexpad, t, 0x74, rfc4648 case-insensitive - with padding, candidate +base32hexpadupper, T, 0x54, rfc4648 case-insensitive - with padding, candidate +base32, b, 0x62, rfc4648 case-insensitive - no padding, default +base32upper, B, 0x42, rfc4648 case-insensitive - no padding, default +base32pad, c, 0x63, rfc4648 case-insensitive - with padding, candidate +base32padupper, C, 0x43, rfc4648 case-insensitive - with padding, candidate +base32z, h, 0x68, z-base-32 (used by Tahoe-LAFS), draft +base36, k, 0x6b, base36 [0-9a-z] case-insensitive - no padding, draft +base36upper, K, 0x4b, base36 [0-9a-z] case-insensitive - no padding, draft +base58btc, z, 0x7a, base58 bitcoin, default +base58flickr, Z, 0x5a, base58 flicker, candidate +base64, m, 0x6d, rfc4648 no padding, default +base64pad, M, 0x4d, rfc4648 with padding - MIME encoding, candidate +base64url, u, 0x75, rfc4648 no padding, default +base64urlpad, U, 0x55, rfc4648 with padding, default +proquint, p, 0x70, PRO-QUINT https://arxiv.org/html/0901.4016, draft +base256emoji, 🚀 (U+1F680), 0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints, draft ``` **NOTE:** Multibase-prefixes are encoding agnostic, and conformance is tested by the prefix code in the string, not the binary prefix, however encoded. I.e., the code is the `z` codepoint (in BTC-alphabet base 58), not `0x7a` (the binary that becomes the `z` encoded in ASCII/UTF-8). Note that the same binary, in UTF-32, would be `[0x7a, 0x00, 0x00, 0x00]` to produce the same `z` with the base and alphabet. diff --git a/multibase.csv b/multibase.csv index 50d9489..2787b6b 100644 --- a/multibase.csv +++ b/multibase.csv @@ -1,26 +1,26 @@ -encoding, code,prefix (UTF-8), description, status, comments -identity, NUL, 0x00, 8-bit binary (encoder and decoder keeps data unmodified), default, -base2, 0, 0x30, binary (01010101), candidate -base8, 7, 0x37, octal, draft -base10, 9, 0x39, decimal, draft -base16, f, 0x66, hexadecimal, default -base16upper, F, 0x46, hexadecimal, default -base32hex, v, 0x76, rfc4648 case-insensitive - no padding - highest char, candidate -base32hexupper, V, 0x56, rfc4648 case-insensitive - no padding - highest char, candidate -base32hexpad, t, 0x74, rfc4648 case-insensitive - with padding, candidate -base32hexpadupper, T, 0x54, rfc4648 case-insensitive - with padding, candidate -base32, b, 0x62, rfc4648 case-insensitive - no padding, default -base32upper, B, 0x42, rfc4648 case-insensitive - no padding, default -base32pad, c, 0x63, rfc4648 case-insensitive - with padding, candidate -base32padupper, C, 0x43, rfc4648 case-insensitive - with padding, candidate -base32z, h, 0x68, z-base-32 (used by Tahoe-LAFS), draft -base36, k, 0x6b, base36 [0-9a-z] case-insensitive - no padding, draft -base36upper, K, 0x4b, base36 [0-9a-z] case-insensitive - no padding, draft -base58btc, z, 0x7a, base58 bitcoin, default -base58flickr, Z, 0x5a, base58 flicker, candidate -base64, m, 0x6d, rfc4648 no padding, default -base64pad, M, 0x4d, rfc4648 with padding - MIME encoding, candidate -base64url, u, 0x75, rfc4648 no padding, default -base64urlpad, U, 0x55, rfc4648 with padding, default -proquint, p, 0x70, PRO-QUINT https://arxiv.org/html/0901.4016, draft -base256emoji, 🚀 (Unicode U+1F680), 0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints, draft +encoding, code, (UTF-8), description, status, comments +identity, NUL, 0x00, 8-bit binary (encoder and decoder keeps data unmodified), default, +base2, 0, 0x30, binary (01010101), candidate +base8, 7, 0x37, octal, draft +base10, 9, 0x39, decimal, draft +base16, f, 0x66, hexadecimal, default +base16upper, F, 0x46, hexadecimal, default +base32hex, v, 0x76, rfc4648 case-insensitive - no padding - highest char, candidate +base32hexupper, V, 0x56, rfc4648 case-insensitive - no padding - highest char, candidate +base32hexpad, t, 0x74, rfc4648 case-insensitive - with padding, candidate +base32hexpadupper, T, 0x54, rfc4648 case-insensitive - with padding, candidate +base32, b, 0x62, rfc4648 case-insensitive - no padding, default +base32upper, B, 0x42, rfc4648 case-insensitive - no padding, default +base32pad, c, 0x63, rfc4648 case-insensitive - with padding, candidate +base32padupper, C, 0x43, rfc4648 case-insensitive - with padding, candidate +base32z, h, 0x68, z-base-32 (used by Tahoe-LAFS), draft +base36, k, 0x6b, base36 [0-9a-z] case-insensitive - no padding, draft +base36upper, K, 0x4b, base36 [0-9a-z] case-insensitive - no padding, draft +base58btc, z, 0x7a, base58 bitcoin, default +base58flickr, Z, 0x5a, base58 flicker, candidate +base64, m, 0x6d, rfc4648 no padding, default +base64pad, M, 0x4d, rfc4648 with padding - MIME encoding, candidate +base64url, u, 0x75, rfc4648 no padding, default +base64urlpad, U, 0x55, rfc4648 with padding, default +proquint, p, 0x70, PRO-QUINT https://arxiv.org/html/0901.4016, draft +base256emoji, 🚀 (U+1F680), 0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints, draft From a1b75a59370f488140e2705269e14c432f698d2a Mon Sep 17 00:00:00 2001 From: bumblefudge Date: Wed, 5 Jul 2023 20:05:39 +0200 Subject: [PATCH 08/31] clarify unicode versus utf-8 + fix table --- README.md | 119 ++++++++++++++++++++++++++++---------------------- multibase.csv | 52 +++++++++++----------- 2 files changed, 92 insertions(+), 79 deletions(-) diff --git a/README.md b/README.md index 260cbcd..a3292e3 100644 --- a/README.md +++ b/README.md @@ -5,34 +5,36 @@ [![](https://img.shields.io/badge/freenode-%23ipfs-blue.svg?style=flat-square)](https://webchat.freenode.net/?channels=%23ipfs) [![](https://img.shields.io/badge/readme%20style-standard-brightgreen.svg?style=flat-square)](https://github.com/RichardLitt/standard-readme) -> Self identifying base encodings +> Self-identifying base encodings -Multibase is a protocol for disambiguating the encoding of base-encoded (e.g., -base32, base36, base64, base58, etc.) binary appearing in text. +Multibase is a protocol for disambiguating the "base encoding" used to express +binary data in text formats (e.g., base32, base36, base64, base58, etc.) from the +expression alone. When text is encoded as bytes, we can usually use a one-size-fits-all encoding (UTF-8) because we're always encoding to the same set of 256 bytes (+/- the NUL byte). When that doesn't work, usually for historical or performance reasons, we can usually infer the encoding from the context. -However, when bytes are encoded as text (using a base encoding), the base choice -of base encoding is often restricted by the context. Worse, these restrictions -can change based on where the data appears in the text. In some cases, we can -only use `[a-z0-9]`. In others, we can use a larger set of characters but need a -compact encoding. This has lead to a large set of "base encodings", one for -every use-case. Unlike when encoding text to bytes, we can't just standardize -around a single base encoding because there is no optimal encoding for all -cases. +However, when bytes are encoded as text (using a base encoding), the choice of +base encoding (and alphabet, and other factors) is often restricted by the +context. Worse, these restrictions can change based on where the data appears in +the text. In some cases, we can only use `[a-z0-9]`. In others, we can use a +larger set of characters but need a compact encoding. This has lead to a large +set of "base encodings", almost one for every use-case. Unlike the case of +encoding text to bytes, it is impractical to standardize widely around a single +base encoding because there is no optimal encoding for all cases. -Unfortunately, it's not always clear *what* base encoding is used; that's where -multibase comes in. It answers the question: +As data travels beyond its context, it becomes quite hard to ascertain *which* +base encoding of the many possible ones were used; that's where multibase comes +in. Where the data has been prefixed before leaving its context behind, it +answers the question: -> Given binary data d encoded into text s, what base b was used to encode it? +> Given binary data `d` encoded into text `s`, what base `b` was used to encode it? -To answer this question, a binary prefix `dp` is added to `d` that renders (upon -b-encoding) as a one-codepoint prefix, `sp` (note: most entries only concern -themselves with UTF-8 encodings where one codepoint = one character). This -prefix `sp` is the "code" that makes `b` visible in `s`. +To answer this question, a single code point is prepended to `s` at time of +encoding, which signals in that new context which `b` can be used to reconstruct +`d`. ## Table of Contents @@ -54,56 +56,59 @@ prefix `sp` is the "code" that makes `b` visible in `s`. The Format is: ``` - + ``` -Where `` is used according to the multibase table. +Where `` is a code representing an entry in the +multibase table. ### Multibase Table The current multibase table is [here](multibase.csv): ``` -encoding, code, (UTF-8), description, status, comments -identity, NUL, 0x00, 8-bit binary (encoder and decoder keeps data unmodified), default, -base2, 0, 0x30, binary (01010101), candidate -base8, 7, 0x37, octal, draft -base10, 9, 0x39, decimal, draft -base16, f, 0x66, hexadecimal, default -base16upper, F, 0x46, hexadecimal, default -base32hex, v, 0x76, rfc4648 case-insensitive - no padding - highest char, candidate -base32hexupper, V, 0x56, rfc4648 case-insensitive - no padding - highest char, candidate -base32hexpad, t, 0x74, rfc4648 case-insensitive - with padding, candidate -base32hexpadupper, T, 0x54, rfc4648 case-insensitive - with padding, candidate -base32, b, 0x62, rfc4648 case-insensitive - no padding, default -base32upper, B, 0x42, rfc4648 case-insensitive - no padding, default -base32pad, c, 0x63, rfc4648 case-insensitive - with padding, candidate -base32padupper, C, 0x43, rfc4648 case-insensitive - with padding, candidate -base32z, h, 0x68, z-base-32 (used by Tahoe-LAFS), draft -base36, k, 0x6b, base36 [0-9a-z] case-insensitive - no padding, draft -base36upper, K, 0x4b, base36 [0-9a-z] case-insensitive - no padding, draft -base58btc, z, 0x7a, base58 bitcoin, default -base58flickr, Z, 0x5a, base58 flicker, candidate -base64, m, 0x6d, rfc4648 no padding, default -base64pad, M, 0x4d, rfc4648 with padding - MIME encoding, candidate -base64url, u, 0x75, rfc4648 no padding, default -base64urlpad, U, 0x55, rfc4648 with padding, default -proquint, p, 0x70, PRO-QUINT https://arxiv.org/html/0901.4016, draft -base256emoji, 🚀 (U+1F680), 0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints, draft +code, Unicode, (UTF-8), encoding, description, status +NUL, U+0000 0x00, 8-bit binary (encoder and decoder keeps data unmodified), default, +0, U+0030 0x30, base2, binary (01010101), candidate +7, U+0037 0x37 base8, octal, draft +9, U+0039 0x39 base10, decimal, draft +f, U+0066 0x66, base16, hexadecimal, default +F, U+0006 0x06 base16upper, hexadecimal, default +v, U+0076 0x76 base32hex, rfc4648 case-insensitive - no padding - highest char, candidate +V, U+0056 0x56 base32hexupper, rfc4648 case-insensitive - no padding - highest char, candidate +t, U+0074 0x74, base32hexpad,rfc4648 case-insensitive - with padding, candidate +T, U+0054 0x54 base32hexpadupper, rfc4648 case-insensitive - with padding, candidate +b, U+0062 0x62, base32, rfc4648 case-insensitive - no padding, default +B, U+0042 0x42, base32upper, rfc4648 case-insensitive - no padding, default +c, U+0063 0x63, base32pad, rfc4648 case-insensitive - with padding, candidate +C, U+0043 0x43, base32padupper, rfc4648 case-insensitive - with padding, candidate +h, U+0068 0x68, base32z, z-base-32 (used by Tahoe-LAFS), draft +k, U+006b 0x6b, base36, base36 [0-9a-z] case-insensitive - no padding, draft +K, U+004b 0x4b, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft +z, U+007a 0x7a, base58btc, base58 bitcoin, default +Z, U+005a 0x5a, base58flickr, base58 flicker, candidate +m, U+006d 0x6d, base64, rfc4648 no padding, default +M, U+004d 0x4d, base64pad, rfc4648 with padding - MIME encoding, candidate +u, U+0075 0x75, base64url, rfc4648 no padding, default +U, U+0055 0x55, base64urlpad, rfc4648 with padding, default +p, U+0070 0x70, proquint, [PRO-QUINT], draft +🚀, U+1F680, 0xF09F9A80, base256emoji, base256 with custom alphabet using variable-sized-codepoints, draft ``` -**NOTE:** Multibase-prefixes are encoding agnostic, and conformance is tested by the prefix code in the string, not the binary prefix, however encoded. I.e., the code is the `z` codepoint (in BTC-alphabet base 58), not `0x7a` (the binary that becomes the `z` encoded in ASCII/UTF-8). Note that the same binary, in UTF-32, would be `[0x7a, 0x00, 0x00, 0x00]` to produce the same `z` with the base and alphabet. +**NOTE:** Multibase-prefixes are encoding agnostic and their canonical form is a Unicode [code point], not an ASCII character or corresponding UTF-8 bytes. Since UTF-8 is the most common context for binary data that gets prefixed as a multibase today, the UTF-8 column is provided as a reference for detecting multibase-prefixes, since most of these codes can be detected in the first byte. + +However, if the string in question came from a UTF-32 context, detecting and dropping an initial byte of `0x7a` would not suffice to confirm the rest was `base58btc`-encoded bytes; `[0x7a, 0x00, 0x00, 0x00]` would instead be the UTF-32 bytes that correspond to the `z` of that code to be detected and dropped. ## Reserved -The following codes are _reserved_ for (backwards) compatibility with existing systems. +The following codes are _reserved_ for (backwards) compatibility with existing systems and cannot be registered in the `multibase` table. Note that all three Unicode entries here correspond to entries in the UTF-8-keyed [multiformats] registry group registered under their UTF-8 equivalents. -* `/` - Separator used by [multiaddr](https://github.com/multiformats/multiaddr). -* `1` - Base58 encoded identity multihashes used by libp2p peer IDs. -* `Q` - Base58 encoded sha2-256 multihashes used by libp2p/ipfs for peer IDs and CIDv0. +* `/` (U+002F) - Separator used by [multiaddr]. +* `1` (U+0031) - Base58-encoded identity multihashes used by libp2p peer IDs. +* `Q` (U+0011) - Base58-encoded sha2-256 multihashes used by libp2p/ipfs for peer IDs and CIDv0. If you'd like to switch a project over to multibase and would also like to -reserve a prefix for compatibility, please file an issue. +reserve a prefix for compatibility, please file an issue in this repository. ## Status @@ -149,7 +154,11 @@ Yes. If i give you `"1214314321432165"` is that decimal? or hex? or something el > Why the strange selection of codes / characters? -The code values are selected such that they are included in the alphabets of the base they represent. For example, `f` is the base code for `base16 (hex)`, because `f` is in hex's 16 character alphabet. Note that the alphabets can be encoded in UTF8, and most can be encoded in ASCII. We have not found a case needing something else. +The code values are selected such that they are included in the alphabets of the +base they represent. For example, `f` is the base code for `base16 (hex)`, +because `f` is in hex's 16 character alphabet. Note that most of the alphabets +used can be encoded in UTF-8, and most but not all can be encoded in ASCII. We +have yet not found a case needing something else. > Don't we have to agree on a table of base encodings? @@ -191,3 +200,7 @@ Small note: If editing the README, please conform to the [standard-readme](https ## License This repository is only for documents. All of these are licensed under the [CC-BY-SA 3.0](https://ipfs.io/ipfs/QmVreNvKsQmQZ83T86cWSjPu2vR3yZHGPm5jnxFuunEB9u) license © 2016 Protocol Labs Inc. Any code is under a [MIT](LICENSE) © 2016 Protocol Labs Inc. + +[multiaddr]: https://github.com/multiformats/multiaddr +[multiformats]: https://github.com/multiformats/multicodec/blob/master/table.csv +[code point]: https://infra.spec.whatwg.org/#code-points \ No newline at end of file diff --git a/multibase.csv b/multibase.csv index 2787b6b..57ea00c 100644 --- a/multibase.csv +++ b/multibase.csv @@ -1,26 +1,26 @@ -encoding, code, (UTF-8), description, status, comments -identity, NUL, 0x00, 8-bit binary (encoder and decoder keeps data unmodified), default, -base2, 0, 0x30, binary (01010101), candidate -base8, 7, 0x37, octal, draft -base10, 9, 0x39, decimal, draft -base16, f, 0x66, hexadecimal, default -base16upper, F, 0x46, hexadecimal, default -base32hex, v, 0x76, rfc4648 case-insensitive - no padding - highest char, candidate -base32hexupper, V, 0x56, rfc4648 case-insensitive - no padding - highest char, candidate -base32hexpad, t, 0x74, rfc4648 case-insensitive - with padding, candidate -base32hexpadupper, T, 0x54, rfc4648 case-insensitive - with padding, candidate -base32, b, 0x62, rfc4648 case-insensitive - no padding, default -base32upper, B, 0x42, rfc4648 case-insensitive - no padding, default -base32pad, c, 0x63, rfc4648 case-insensitive - with padding, candidate -base32padupper, C, 0x43, rfc4648 case-insensitive - with padding, candidate -base32z, h, 0x68, z-base-32 (used by Tahoe-LAFS), draft -base36, k, 0x6b, base36 [0-9a-z] case-insensitive - no padding, draft -base36upper, K, 0x4b, base36 [0-9a-z] case-insensitive - no padding, draft -base58btc, z, 0x7a, base58 bitcoin, default -base58flickr, Z, 0x5a, base58 flicker, candidate -base64, m, 0x6d, rfc4648 no padding, default -base64pad, M, 0x4d, rfc4648 with padding - MIME encoding, candidate -base64url, u, 0x75, rfc4648 no padding, default -base64urlpad, U, 0x55, rfc4648 with padding, default -proquint, p, 0x70, PRO-QUINT https://arxiv.org/html/0901.4016, draft -base256emoji, 🚀 (U+1F680), 0xF09F9A80, base256 with custom alphabet using variable-sized-codepoints, draft +code, Unicode, (UTF-8), encoding, description, status +NUL, U+0000 0x00, 8-bit binary (encoder and decoder keeps data unmodified), default, +0, U+0030 0x30, base2, binary (01010101), candidate +7, U+0037 0x37 base8, octal, draft +9, U+0039 0x39 base10, decimal, draft +f, U+0066 0x66, base16, hexadecimal, default +F, U+0006 0x06 base16upper, hexadecimal, default +v, U+0076 0x76 base32hex, rfc4648 case-insensitive - no padding - highest char, candidate +V, U+0056 0x56 base32hexupper, rfc4648 case-insensitive - no padding - highest char, candidate +t, U+0074 0x74, base32hexpad,rfc4648 case-insensitive - with padding, candidate +T, U+0054 0x54 base32hexpadupper, rfc4648 case-insensitive - with padding, candidate +b, U+0062 0x62, base32, rfc4648 case-insensitive - no padding, default +B, U+0042 0x42, base32upper, rfc4648 case-insensitive - no padding, default +c, U+0063 0x63, base32pad, rfc4648 case-insensitive - with padding, candidate +C, U+0043 0x43, base32padupper, rfc4648 case-insensitive - with padding, candidate +h, U+0068 0x68, base32z, z-base-32 (used by Tahoe-LAFS), draft +k, U+006b 0x6b, base36, base36 [0-9a-z] case-insensitive - no padding, draft +K, U+004b 0x4b, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft +z, U+007a 0x7a, base58btc, base58 bitcoin, default +Z, U+005a 0x5a, base58flickr, base58 flicker, candidate +m, U+006d 0x6d, base64, rfc4648 no padding, default +M, U+004d 0x4d, base64pad, rfc4648 with padding - MIME encoding, candidate +u, U+0075 0x75, base64url, rfc4648 no padding, default +U, U+0055 0x55, base64urlpad, rfc4648 with padding, default +p, U+0070 0x70, proquint, PRO-QUINT https://arxiv.org/html/0901.4016, draft +🚀, U+1F680, 0xF09F9A80, base256emoji, base256 with custom alphabet using variable-sized-codepoints, draft From 9d69e19485992a0bf3bb9a567bda604f3b8ce1b8 Mon Sep 17 00:00:00 2001 From: bumblefudge Date: Thu, 6 Jul 2023 10:27:32 +0200 Subject: [PATCH 09/31] future-proof collision-protection against single-byte multiformats --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a3292e3..15a47b7 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,7 @@ However, if the string in question came from a UTF-32 context, detecting and dro ## Reserved -The following codes are _reserved_ for (backwards) compatibility with existing systems and cannot be registered in the `multibase` table. Note that all three Unicode entries here correspond to entries in the UTF-8-keyed [multiformats] registry group registered under their UTF-8 equivalents. +The following codes are _reserved_ and cannot be registered in the `multibase` table. Note that all three Unicode entries, expressed as a UTF-8 byte, collide with entries in the UTF-8-keyed namespace of the [multiformats] registry group; this list of reserved Unicode codepoints may grow in the future to avoid such collisions as other single-byte UTF-8 codes are reserved there. * `/` (U+002F) - Separator used by [multiaddr]. * `1` (U+0031) - Base58-encoded identity multihashes used by libp2p peer IDs. From e814396b4be13be2b43825cbccb8c40768ea1382 Mon Sep 17 00:00:00 2001 From: bumblefudge Date: Tue, 18 Jul 2023 14:41:03 +0200 Subject: [PATCH 10/31] update readme language --- README.md | 59 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 15a47b7..e21b2ae 100644 --- a/README.md +++ b/README.md @@ -95,29 +95,31 @@ p, U+0070 0x70, proquint, [PRO-QUINT], 🚀, U+1F680, 0xF09F9A80, base256emoji, base256 with custom alphabet using variable-sized-codepoints, draft ``` -**NOTE:** Multibase-prefixes are encoding agnostic and their canonical form is a Unicode [code point], not an ASCII character or corresponding UTF-8 bytes. Since UTF-8 is the most common context for binary data that gets prefixed as a multibase today, the UTF-8 column is provided as a reference for detecting multibase-prefixes, since most of these codes can be detected in the first byte. +**NOTE:** Multibase-prefixes are encoding agnostic and their canonical form is a Unicode [code point], not an ASCII character or corresponding UTF-8 bytes. +Since UTF-8 is the most common context for binary data that gets prefixed as a multibase today, the UTF-8 column is provided as a reference for detecting multibase-prefixes, since most of these codes can be detected in the first byte in known-encoding contexts. -However, if the string in question came from a UTF-32 context, detecting and dropping an initial byte of `0x7a` would not suffice to confirm the rest was `base58btc`-encoded bytes; `[0x7a, 0x00, 0x00, 0x00]` would instead be the UTF-32 bytes that correspond to the `z` of that code to be detected and dropped. +However, if the string in question came from a UTF-32 context, detecting and dropping an initial byte of `0x7a` would not suffice to confirm the rest was `base58btc`-encoded bytes; `[0x7a, 0x00, 0x00, 0x00]` would instead be the UTF-32 bytes that correspond to the `z` codepoint for that entry, and the entire byte array would need to be detected and dropped. ## Reserved -The following codes are _reserved_ and cannot be registered in the `multibase` table. Note that all three Unicode entries, expressed as a UTF-8 byte, collide with entries in the UTF-8-keyed namespace of the [multiformats] registry group; this list of reserved Unicode codepoints may grow in the future to avoid such collisions as other single-byte UTF-8 codes are reserved there. +The following codes are _reserved_ and cannot be registered in the `multibase` table. Note that all three Unicode entries, expressed as the [unsigned varint] expression of that Unicode code-point in UTF-8, are reserved in the greater [multiformats registry group]; this list of reserved Unicode codepoints may grow in the future to avoid such collisions as other single-byte UTF-8 codes are reserved there. + * `/` (U+002F) - Separator used by [multiaddr]. * `1` (U+0031) - Base58-encoded identity multihashes used by libp2p peer IDs. * `Q` (U+0011) - Base58-encoded sha2-256 multihashes used by libp2p/ipfs for peer IDs and CIDv0. -If you'd like to switch a project over to multibase and would also like to -reserve a prefix for compatibility, please file an issue in this repository. - ## Status Each multibase encoding has a status: -* draft - these encodings have been proposed but are not widely implemented and may be removed. -* candidate - these encodings are mature and widely implemented but may not be implemented by all implementations. -* default - these encodings should be implemented by all implementations and are widely used. - +* reserved - for functional reasons or to avoid collisions with other multi-* registries, this registry cannot accept registrations at this code-point and implementing one unregistered is discouraged for interoperability reasons +* experimental - these encodings have been proposed but are not widely implemented and may be removed. +* draft - these encodings are mature and widely implemented but may not be + implemented by all implementations. +* final - these encodings should be implemented by all implementations and are widely used. +* deprecated - this entry will likely be removed and reassigned in the future and it will not likely become a `final` registration + ## Multibase By Example Consider the following encodings of the same binary string: @@ -162,7 +164,10 @@ have yet not found a case needing something else. > Don't we have to agree on a table of base encodings? -Yes, but we already have to agree on base encodings, so this is not hard. The table even leaves some room for custom encodings. +Yes, but we already have to agree on base encodings, so this is not hard. The +table even leaves some room for custom encodings and is intended to work both in +contexts where the encodings are known or agreed on and open-world or brownfield +contexts where these may vary. ## Implementations: @@ -187,20 +192,36 @@ Yes, but we already have to agree on base encodings, so this is not hard. The ta ## Disclaimers -Warning: **obviously multibase changes the first character depending on the encoding**. Do not expect the value to be exactly the same. Remove the multibase prefix before using the value. +Warning: **obviously multibase changes the first character depending on the +encoding**. Do not expect the value to be exactly the same. Remove the multibase +prefix before using the value. ## Contribute -Contributions welcome. Please check out [the issues](https://github.com/multiformats/multibase/issues). - -Check out our [contributing document](https://github.com/multiformats/multiformats/blob/master/contributing.md) for more information on how we work, and about contributing in general. Please be aware that all interactions related to multiformats are subject to the IPFS [Code of Conduct](https://github.com/ipfs/community/blob/master/code-of-conduct.md). - -Small note: If editing the README, please conform to the [standard-readme](https://github.com/RichardLitt/standard-readme) specification. +Contributions welcome. Please check out [the +issues](https://github.com/multiformats/multibase/issues) and reading the +[contributing +document](https://github.com/multiformats/multiformats/blob/master/contributing.md) +for the greater multiformats project before opening your first issue, as the +workflow and the relation of multibase to the greater project both benefit from +this context. more information on how we work, and about contributing in +general. + +If you'd like to switch a project over to multibase, whether by creating a new +multibase implementation or building on one of those listed above, please file +an issue in this repository using the "Interested in implementing" issue +template. If would also like to reserve a prefix for compatibility, please file +a separate issue in this repository using the "New Registration" issue template. ## License -This repository is only for documents. All of these are licensed under the [CC-BY-SA 3.0](https://ipfs.io/ipfs/QmVreNvKsQmQZ83T86cWSjPu2vR3yZHGPm5jnxFuunEB9u) license © 2016 Protocol Labs Inc. Any code is under a [MIT](LICENSE) © 2016 Protocol Labs Inc. +This repository is only for documents. All of these are licensed under the +[CC-BY-SA +3.0](https://ipfs.io/ipfs/QmVreNvKsQmQZ83T86cWSjPu2vR3yZHGPm5jnxFuunEB9u) +license © 2016 Protocol Labs Inc. Any code is under a [MIT](LICENSE) © 2016 +Protocol Labs Inc. [multiaddr]: https://github.com/multiformats/multiaddr -[multiformats]: https://github.com/multiformats/multicodec/blob/master/table.csv +[multiformats registry group]: https://github.com/multiformats/multicodec/blob/master/table.csv +[unsigned varint]: https://github.com/multiformats/unsigned-varint [code point]: https://infra.spec.whatwg.org/#code-points \ No newline at end of file From e79d6d8daf92283d3ea9e9ffd842b2fe7ccd599e Mon Sep 17 00:00:00 2001 From: bumblefudge Date: Tue, 18 Jul 2023 14:44:14 +0200 Subject: [PATCH 11/31] align status column with iana terms --- README.md | 40 ++++++++++++++++++++-------------------- multibase.csv | 40 ++++++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index e21b2ae..1c9b35e 100644 --- a/README.md +++ b/README.md @@ -68,31 +68,31 @@ The current multibase table is [here](multibase.csv): ``` code, Unicode, (UTF-8), encoding, description, status -NUL, U+0000 0x00, 8-bit binary (encoder and decoder keeps data unmodified), default, -0, U+0030 0x30, base2, binary (01010101), candidate +NUL, U+0000 0x00, 8-bit binary (encoder and decoder keeps data unmodified), reserved, +0, U+0030 0x30, base2, binary (01010101), experimental 7, U+0037 0x37 base8, octal, draft 9, U+0039 0x39 base10, decimal, draft -f, U+0066 0x66, base16, hexadecimal, default -F, U+0006 0x06 base16upper, hexadecimal, default -v, U+0076 0x76 base32hex, rfc4648 case-insensitive - no padding - highest char, candidate -V, U+0056 0x56 base32hexupper, rfc4648 case-insensitive - no padding - highest char, candidate -t, U+0074 0x74, base32hexpad,rfc4648 case-insensitive - with padding, candidate -T, U+0054 0x54 base32hexpadupper, rfc4648 case-insensitive - with padding, candidate -b, U+0062 0x62, base32, rfc4648 case-insensitive - no padding, default -B, U+0042 0x42, base32upper, rfc4648 case-insensitive - no padding, default -c, U+0063 0x63, base32pad, rfc4648 case-insensitive - with padding, candidate -C, U+0043 0x43, base32padupper, rfc4648 case-insensitive - with padding, candidate +f, U+0066 0x66, base16, hexadecimal, final +F, U+0006 0x06 base16upper, hexadecimal, final +v, U+0076 0x76 base32hex, rfc4648 case-insensitive - no padding - highest char, experimental +V, U+0056 0x56 base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental +t, U+0074 0x74, base32hexpad,rfc4648 case-insensitive - with padding, experimental +T, U+0054 0x54 base32hexpadupper, rfc4648 case-insensitive - with padding, experimental +b, U+0062 0x62, base32, rfc4648 case-insensitive - no padding, final +B, U+0042 0x42, base32upper, rfc4648 case-insensitive - no padding, final +c, U+0063 0x63, base32pad, rfc4648 case-insensitive - with padding, draft +C, U+0043 0x43, base32padupper, rfc4648 case-insensitive - with padding, draft h, U+0068 0x68, base32z, z-base-32 (used by Tahoe-LAFS), draft k, U+006b 0x6b, base36, base36 [0-9a-z] case-insensitive - no padding, draft K, U+004b 0x4b, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft -z, U+007a 0x7a, base58btc, base58 bitcoin, default -Z, U+005a 0x5a, base58flickr, base58 flicker, candidate -m, U+006d 0x6d, base64, rfc4648 no padding, default -M, U+004d 0x4d, base64pad, rfc4648 with padding - MIME encoding, candidate -u, U+0075 0x75, base64url, rfc4648 no padding, default -U, U+0055 0x55, base64urlpad, rfc4648 with padding, default -p, U+0070 0x70, proquint, [PRO-QUINT], draft -🚀, U+1F680, 0xF09F9A80, base256emoji, base256 with custom alphabet using variable-sized-codepoints, draft +z, U+007a 0x7a, base58btc, base58 bitcoin, final +Z, U+005a 0x5a, base58flickr, base58 flicker, experimental +m, U+006d 0x6d, base64, rfc4648 no padding, final +M, U+004d 0x4d, base64pad, rfc4648 with padding - MIME encoding, experimental +u, U+0075 0x75, base64url, rfc4648 no padding, final +U, U+0055 0x55, base64urlpad, rfc4648 with padding, final +p, U+0070 0x70, proquint, [PRO-QUINT], experimental +🚀, U+1F680, 0xF09F9A80, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental ``` **NOTE:** Multibase-prefixes are encoding agnostic and their canonical form is a Unicode [code point], not an ASCII character or corresponding UTF-8 bytes. diff --git a/multibase.csv b/multibase.csv index 57ea00c..5bfde32 100644 --- a/multibase.csv +++ b/multibase.csv @@ -1,26 +1,26 @@ code, Unicode, (UTF-8), encoding, description, status -NUL, U+0000 0x00, 8-bit binary (encoder and decoder keeps data unmodified), default, -0, U+0030 0x30, base2, binary (01010101), candidate +NUL, U+0000 0x00, 8-bit binary (encoder and decoder keeps data unmodified), reserved, +0, U+0030 0x30, base2, binary (01010101), experimental 7, U+0037 0x37 base8, octal, draft 9, U+0039 0x39 base10, decimal, draft -f, U+0066 0x66, base16, hexadecimal, default -F, U+0006 0x06 base16upper, hexadecimal, default -v, U+0076 0x76 base32hex, rfc4648 case-insensitive - no padding - highest char, candidate -V, U+0056 0x56 base32hexupper, rfc4648 case-insensitive - no padding - highest char, candidate -t, U+0074 0x74, base32hexpad,rfc4648 case-insensitive - with padding, candidate -T, U+0054 0x54 base32hexpadupper, rfc4648 case-insensitive - with padding, candidate -b, U+0062 0x62, base32, rfc4648 case-insensitive - no padding, default -B, U+0042 0x42, base32upper, rfc4648 case-insensitive - no padding, default -c, U+0063 0x63, base32pad, rfc4648 case-insensitive - with padding, candidate -C, U+0043 0x43, base32padupper, rfc4648 case-insensitive - with padding, candidate +f, U+0066 0x66, base16, hexadecimal, final +F, U+0006 0x06 base16upper, hexadecimal, final +v, U+0076 0x76 base32hex, rfc4648 case-insensitive - no padding - highest char, experimental +V, U+0056 0x56 base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental +t, U+0074 0x74, base32hexpad,rfc4648 case-insensitive - with padding, experimental +T, U+0054 0x54 base32hexpadupper, rfc4648 case-insensitive - with padding, experimental +b, U+0062 0x62, base32, rfc4648 case-insensitive - no padding, final +B, U+0042 0x42, base32upper, rfc4648 case-insensitive - no padding, final +c, U+0063 0x63, base32pad, rfc4648 case-insensitive - with padding, draft +C, U+0043 0x43, base32padupper, rfc4648 case-insensitive - with padding, draft h, U+0068 0x68, base32z, z-base-32 (used by Tahoe-LAFS), draft k, U+006b 0x6b, base36, base36 [0-9a-z] case-insensitive - no padding, draft K, U+004b 0x4b, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft -z, U+007a 0x7a, base58btc, base58 bitcoin, default -Z, U+005a 0x5a, base58flickr, base58 flicker, candidate -m, U+006d 0x6d, base64, rfc4648 no padding, default -M, U+004d 0x4d, base64pad, rfc4648 with padding - MIME encoding, candidate -u, U+0075 0x75, base64url, rfc4648 no padding, default -U, U+0055 0x55, base64urlpad, rfc4648 with padding, default -p, U+0070 0x70, proquint, PRO-QUINT https://arxiv.org/html/0901.4016, draft -🚀, U+1F680, 0xF09F9A80, base256emoji, base256 with custom alphabet using variable-sized-codepoints, draft +z, U+007a 0x7a, base58btc, base58 bitcoin, final +Z, U+005a 0x5a, base58flickr, base58 flicker, experimental +m, U+006d 0x6d, base64, rfc4648 no padding, final +M, U+004d 0x4d, base64pad, rfc4648 with padding - MIME encoding, experimental +u, U+0075 0x75, base64url, rfc4648 no padding, final +U, U+0055 0x55, base64urlpad, rfc4648 with padding, final +p, U+0070 0x70, proquint, [PRO-QUINT], experimental +🚀, U+1F680, 0xF09F9A80, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental From d875325b9c79c6683acc02ed17ec08d2b233f42d Mon Sep 17 00:00:00 2001 From: bumblefudge Date: Tue, 18 Jul 2023 14:58:52 +0200 Subject: [PATCH 12/31] add issue templates to the repo --- .github/ISSUE_TEMPLATE/BUG-REPORT.yml | 31 +++++++++ .github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml | 55 +++++++++++++++ .github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml | 69 +++++++++++++++++++ .github/ISSUE_TEMPLATE/config.yml | 5 ++ 4 files changed, 160 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/BUG-REPORT.yml create mode 100644 .github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml create mode 100644 .github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml create mode 100644 .github/ISSUE_TEMPLATE/config.yml diff --git a/.github/ISSUE_TEMPLATE/BUG-REPORT.yml b/.github/ISSUE_TEMPLATE/BUG-REPORT.yml new file mode 100644 index 0000000..5422dc0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/BUG-REPORT.yml @@ -0,0 +1,31 @@ +name: "Bug Report - documentation or registry" +description: Report possible bugs in multibase spec, process docs, and/or the multibase registry. +title: "🐛 [DOC/PROCESS BUG] - " +labels: [ + "bug" +] +body: + - type: textarea + id: description + attributes: + label: "Description" + description: Please enter an explicit description of your issue, + placeholder: Short and explicit description of your incident, ideally with commit-specific link to lines + validations: + required: true + - type: input + id: reprod-url + attributes: + label: "Reproduction URL" + description: Please enter your GitHub URL to provide a reproduction of the issue + placeholder: ex. https://github.com/multiformats/multibase/ + validations: + required: true + - type: textarea + id: context + attributes: + label: "Context" + description: Please provide additional context + placeholder: "Context or external links needed to explain the possible mistake" + validations: + required: true \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml b/.github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml new file mode 100644 index 0000000..ded5720 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml @@ -0,0 +1,55 @@ +name: "Interest in Implementing - New Multibase Library or System" +description: Express interest in possible new multibase library or system +title: "📚 [NEW PROJECT] - <title>" +labels: [ + "ideation" +] +body: + - type: input + id: name + attributes: + label: "Name" + description: Name this library or system + placeholder: [lang-]Multibase-[usecase], for example + validations: + required: false + - type: checkboxes + attributes: + label: "Have read contributing" + description: I have read the [contributing](https://github.com/multiformats/multiformats/blob/master/contributing.md) document + options: + - label: I read it! + validations: + required: true + - type: textarea + id: problem_statement + attributes: + label: "Description of parsing, sniffing, or encoding problem solved" + description: Please describe the problem solved by a new multibase library or system depending on multibase codecs + placeholder: Feel free to provide links for context and use-case descriptions, and how this problem is not solved by existing multi-formats entries or mini-registries + validations: + required: true + - type: textarea + id: prior_art + attributes: + label: "Description of relevant prior art and status quo" + description: Please describe relevant prior art and how this is use-case functions today + placeholder: Links welcome + validations: + required: true + - type: textarea + id: solution_and_rationale + attributes: + label: "Proposed solution and rationale" + description: Please describe at a high level what you are exploring building and current open research questions + placeholder: Detail welcome + validations: + required: true + - type: textarea + id: questions + attributes: + label: "Any further questions or requests" + description: Anything else you'd like to ask the maintainers or community? + placeholder: ok to leave blank! + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml b/.github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml new file mode 100644 index 0000000..75ae90d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml @@ -0,0 +1,69 @@ +name: "New Registration" +description: Express interest in registering a new encoding +title: "📚 [NEW REGISTRATION] - <title>" +labels: [ + "Registration" +] +body: + - type: input + id: name + attributes: + label: "Name" + description: Name this library or system + placeholder: acronyms and abbreviations are fine + validations: + required: false + - type: checkboxes + attributes: + label: "Have read contributing" + description: I have read the [contributing](https://github.com/multiformats/multiformats/blob/master/contributing.md) document + options: + - label: I read it! + validations: + required: true + - type: checkboxes + attributes: + label: "Have checked table" + description: I have reviewed the [multiformats mega-table](https://github.com/multiformats/multicodec/blob/master/table.csv) to assess viable sub-namespace for a registry if applicable + options: + - label: I read it! + - type: input + id: codepoint + attributes: + label: "Proposed codepoint" + description: Please put here the prefix in the target encoding + placeholder: By tradition, the highest binary value in the alphabet works well and has a built-in mnemonic if it doesn't conflict with any other entries + validations: + required: true + - type: input + id: varint-value + attributes: + label: "Proposed varint value for registration in multiformats" + description: Please put here the UTF-8 value that corresponds to that target encoding, for inclusion in the multiformats table + placeholder: See the multibase spec for reserved values + validations: + required: true + - type: textarea + id: use-case + attributes: + label: "use-case" + description: Please describe the possible use-cases where this additional codec would be helpful, where this encoding is used currently in the wild, etc. + placeholder: Feel free to provide links for context and use-case descriptions + validations: + required: true + - type: textarea + id: specification + attributes: + label: "Description of relevant prior art and status quo" + description: Please describe relevant prior art and, if already specified in a static public document, the algorithms and configurations needed to deterministically encode/decode + placeholder: Links welcome + validations: + required: true + - type: textarea + id: solution_and_rationale + attributes: + label: "Proposed solution and rationale" + description: Please describe at a high level what you are exploring building and current open research questions + placeholder: Detail welcome + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..4943f9b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: true +contact_links: + - name: Protocol Labs Vulnerability Disclosure Team + url: mailto:security@ipfs.io + about: Please do NOT open issues related to security of implementations or spec here without contacting the IPFS security team first. \ No newline at end of file From c3dbd700528ff8fbd20976d3f4a9a262832cfbbe Mon Sep 17 00:00:00 2001 From: bumblefudge <bumblefudge@learningproof.xyz> Date: Tue, 18 Jul 2023 15:05:38 +0200 Subject: [PATCH 13/31] refine issue templates --- .github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml | 2 +- .github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml b/.github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml index ded5720..afaca88 100644 --- a/.github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml +++ b/.github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml @@ -10,7 +10,7 @@ body: attributes: label: "Name" description: Name this library or system - placeholder: [lang-]Multibase-[usecase], for example + placeholder: {language-}Multibase{-usecase}, for example validations: required: false - type: checkboxes diff --git a/.github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml b/.github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml index 75ae90d..92ba2eb 100644 --- a/.github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml +++ b/.github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml @@ -31,16 +31,16 @@ body: id: codepoint attributes: label: "Proposed codepoint" - description: Please put here the prefix in the target encoding - placeholder: By tradition, the highest binary value in the alphabet works well and has a built-in mnemonic if it doesn't conflict with any other entries + description: Please put here the prefix in the target encoding. By tradition, the highest binary value in the encoding alphabet works well and has a built-in mnemonic if it doesn't conflict with any other entries + placeholder: x validations: required: true - type: input id: varint-value attributes: label: "Proposed varint value for registration in multiformats" - description: Please put here the UTF-8 value that corresponds to that target encoding, for inclusion in the multiformats table - placeholder: See the multibase spec for reserved values + description: Please put here the UTF-8 value that corresponds to that target encoding, for inclusion in the multiformats table, formatted as an [unsigned varint](https://github.com/multiformats/unsigned-varint) + placeholder: See mf/unsigned-varint validations: required: true - type: textarea From e071bc11c85d14c45eda50e9fb1a4be2c5db5af2 Mon Sep 17 00:00:00 2001 From: bumblefudge <bumblefudge@learningproof.xyz> Date: Tue, 18 Jul 2023 15:09:36 +0200 Subject: [PATCH 14/31] refine issue templates --- .github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml b/.github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml index 92ba2eb..d417555 100644 --- a/.github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml +++ b/.github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml @@ -6,9 +6,9 @@ labels: [ ] body: - type: input - id: name + id: encoding-name attributes: - label: "Name" + label: "Name of encoding" description: Name this library or system placeholder: acronyms and abbreviations are fine validations: From f07376cae39ad68a7b398515794d0998174877ea Mon Sep 17 00:00:00 2001 From: bumblefudge <bumblefudge@learningproof.xyz> Date: Tue, 18 Jul 2023 15:19:42 +0200 Subject: [PATCH 15/31] refine issue templates --- .github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml b/.github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml index afaca88..38a99da 100644 --- a/.github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml +++ b/.github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml @@ -6,9 +6,9 @@ labels: [ ] body: - type: input - id: name + id: project-name attributes: - label: "Name" + label: "Name of Project" description: Name this library or system placeholder: {language-}Multibase{-usecase}, for example validations: From f7c29cecc8be5f57e0d44b65e0948a9acb7d324f Mon Sep 17 00:00:00 2001 From: bumblefudge <bumblefudge@learningproof.xyz> Date: Tue, 18 Jul 2023 15:35:30 +0200 Subject: [PATCH 16/31] refine issue templates --- .github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml b/.github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml index 38a99da..c3fd2f4 100644 --- a/.github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml +++ b/.github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml @@ -10,7 +10,7 @@ body: attributes: label: "Name of Project" description: Name this library or system - placeholder: {language-}Multibase{-usecase}, for example + placeholder: (language-)Multibase(-usecase), for example validations: required: false - type: checkboxes From 3e8ce2beb4e23c0df9df0312ff1113d7d3f0b1f5 Mon Sep 17 00:00:00 2001 From: Bumblefudge <caballerojuan@pm.me> Date: Fri, 21 Jul 2023 13:12:11 +0200 Subject: [PATCH 17/31] multibase.csv typo --- multibase.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multibase.csv b/multibase.csv index 5bfde32..bbd64bd 100644 --- a/multibase.csv +++ b/multibase.csv @@ -1,5 +1,5 @@ code, Unicode, (UTF-8), encoding, description, status -NUL, U+0000 0x00, 8-bit binary (encoder and decoder keeps data unmodified), reserved, +NUL, U+0000 0x00, 8-bit binary (encoder and decoder keeps data unmodified), reserved 0, U+0030 0x30, base2, binary (01010101), experimental 7, U+0037 0x37 base8, octal, draft 9, U+0039 0x39 base10, decimal, draft From dafe60b653423fd76ee6a6a97dc2f4ec8a00dca5 Mon Sep 17 00:00:00 2001 From: bumblefudge <bumblefudge@learningproof.xyz> Date: Fri, 21 Jul 2023 13:15:31 +0200 Subject: [PATCH 18/31] csv typos --- README.md | 48 ++++++++++++++++++++++++------------------------ multibase.csv | 48 ++++++++++++++++++++++++------------------------ 2 files changed, 48 insertions(+), 48 deletions(-) diff --git a/README.md b/README.md index 1c9b35e..a6224fc 100644 --- a/README.md +++ b/README.md @@ -68,30 +68,30 @@ The current multibase table is [here](multibase.csv): ``` code, Unicode, (UTF-8), encoding, description, status -NUL, U+0000 0x00, 8-bit binary (encoder and decoder keeps data unmodified), reserved, -0, U+0030 0x30, base2, binary (01010101), experimental -7, U+0037 0x37 base8, octal, draft -9, U+0039 0x39 base10, decimal, draft -f, U+0066 0x66, base16, hexadecimal, final -F, U+0006 0x06 base16upper, hexadecimal, final -v, U+0076 0x76 base32hex, rfc4648 case-insensitive - no padding - highest char, experimental -V, U+0056 0x56 base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental -t, U+0074 0x74, base32hexpad,rfc4648 case-insensitive - with padding, experimental -T, U+0054 0x54 base32hexpadupper, rfc4648 case-insensitive - with padding, experimental -b, U+0062 0x62, base32, rfc4648 case-insensitive - no padding, final -B, U+0042 0x42, base32upper, rfc4648 case-insensitive - no padding, final -c, U+0063 0x63, base32pad, rfc4648 case-insensitive - with padding, draft -C, U+0043 0x43, base32padupper, rfc4648 case-insensitive - with padding, draft -h, U+0068 0x68, base32z, z-base-32 (used by Tahoe-LAFS), draft -k, U+006b 0x6b, base36, base36 [0-9a-z] case-insensitive - no padding, draft -K, U+004b 0x4b, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft -z, U+007a 0x7a, base58btc, base58 bitcoin, final -Z, U+005a 0x5a, base58flickr, base58 flicker, experimental -m, U+006d 0x6d, base64, rfc4648 no padding, final -M, U+004d 0x4d, base64pad, rfc4648 with padding - MIME encoding, experimental -u, U+0075 0x75, base64url, rfc4648 no padding, final -U, U+0055 0x55, base64urlpad, rfc4648 with padding, final -p, U+0070 0x70, proquint, [PRO-QUINT], experimental +NUL, U+0000, 0x00, 8-bit binary, (encoder and decoder keeps data unmodified), reserved +0, U+0030, 0x30, base2, binary (01010101), experimental +7, U+0037, 0x37, base8, octal, draft +9, U+0039, 0x39, base10, decimal, draft +f, U+0066, 0x66, base16, hexadecimal, final +F, U+0006, 0x06, base16upper, hexadecimal, final +v, U+0076, 0x76, base32hex, rfc4648 case-insensitive - no padding - highest char, experimental +V, U+0056, 0x56, base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental +t, U+0074, 0x74, base32hexpad, rfc4648 case-insensitive - with padding, experimental +T, U+0054, 0x54, base32hexpadupper, rfc4648 case-insensitive - with padding, experimental +b, U+0062, 0x62, base32, rfc4648 case-insensitive - no padding, final +B, U+0042, 0x42, base32upper, rfc4648 case-insensitive - no padding, final +c, U+0063, 0x63, base32pad, rfc4648 case-insensitive - with padding, draft +C, U+0043, 0x43, base32padupper, rfc4648 case-insensitive - with padding, draft +h, U+0068, 0x68, base32z, z-base-32 (used by Tahoe-LAFS), draft +k, U+006b, 0x6b, base36, base36 [0-9a-z] case-insensitive - no padding, draft +K, U+004b, 0x4b, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft +z, U+007a, 0x7a, base58btc, base58 bitcoin, final +Z, U+005a, 0x5a, base58flickr, base58 flicker, experimental +m, U+006d, 0x6d, base64, rfc4648 no padding, final +M, U+004d, 0x4d, base64pad, rfc4648 with padding - MIME encoding, experimental +u, U+0075, 0x75, base64url, rfc4648 no padding, final +U, U+0055, 0x55, base64urlpad, rfc4648 with padding, final +p, U+0070, 0x70, proquint, [PRO-QUINT], experimental 🚀, U+1F680, 0xF09F9A80, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental ``` diff --git a/multibase.csv b/multibase.csv index 5bfde32..55bb4aa 100644 --- a/multibase.csv +++ b/multibase.csv @@ -1,26 +1,26 @@ code, Unicode, (UTF-8), encoding, description, status -NUL, U+0000 0x00, 8-bit binary (encoder and decoder keeps data unmodified), reserved, -0, U+0030 0x30, base2, binary (01010101), experimental -7, U+0037 0x37 base8, octal, draft -9, U+0039 0x39 base10, decimal, draft -f, U+0066 0x66, base16, hexadecimal, final -F, U+0006 0x06 base16upper, hexadecimal, final -v, U+0076 0x76 base32hex, rfc4648 case-insensitive - no padding - highest char, experimental -V, U+0056 0x56 base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental -t, U+0074 0x74, base32hexpad,rfc4648 case-insensitive - with padding, experimental -T, U+0054 0x54 base32hexpadupper, rfc4648 case-insensitive - with padding, experimental -b, U+0062 0x62, base32, rfc4648 case-insensitive - no padding, final -B, U+0042 0x42, base32upper, rfc4648 case-insensitive - no padding, final -c, U+0063 0x63, base32pad, rfc4648 case-insensitive - with padding, draft -C, U+0043 0x43, base32padupper, rfc4648 case-insensitive - with padding, draft -h, U+0068 0x68, base32z, z-base-32 (used by Tahoe-LAFS), draft -k, U+006b 0x6b, base36, base36 [0-9a-z] case-insensitive - no padding, draft -K, U+004b 0x4b, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft -z, U+007a 0x7a, base58btc, base58 bitcoin, final -Z, U+005a 0x5a, base58flickr, base58 flicker, experimental -m, U+006d 0x6d, base64, rfc4648 no padding, final -M, U+004d 0x4d, base64pad, rfc4648 with padding - MIME encoding, experimental -u, U+0075 0x75, base64url, rfc4648 no padding, final -U, U+0055 0x55, base64urlpad, rfc4648 with padding, final -p, U+0070 0x70, proquint, [PRO-QUINT], experimental +NUL, U+0000, 0x00, 8-bit binary, (encoder and decoder keeps data unmodified), reserved +0, U+0030, 0x30, base2, binary (01010101), experimental +7, U+0037, 0x37, base8, octal, draft +9, U+0039, 0x39, base10, decimal, draft +f, U+0066, 0x66, base16, hexadecimal, final +F, U+0006, 0x06, base16upper, hexadecimal, final +v, U+0076, 0x76, base32hex, rfc4648 case-insensitive - no padding - highest char, experimental +V, U+0056, 0x56, base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental +t, U+0074, 0x74, base32hexpad, rfc4648 case-insensitive - with padding, experimental +T, U+0054, 0x54, base32hexpadupper, rfc4648 case-insensitive - with padding, experimental +b, U+0062, 0x62, base32, rfc4648 case-insensitive - no padding, final +B, U+0042, 0x42, base32upper, rfc4648 case-insensitive - no padding, final +c, U+0063, 0x63, base32pad, rfc4648 case-insensitive - with padding, draft +C, U+0043, 0x43, base32padupper, rfc4648 case-insensitive - with padding, draft +h, U+0068, 0x68, base32z, z-base-32 (used by Tahoe-LAFS), draft +k, U+006b, 0x6b, base36, base36 [0-9a-z] case-insensitive - no padding, draft +K, U+004b, 0x4b, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft +z, U+007a, 0x7a, base58btc, base58 bitcoin, final +Z, U+005a, 0x5a, base58flickr, base58 flicker, experimental +m, U+006d, 0x6d, base64, rfc4648 no padding, final +M, U+004d, 0x4d, base64pad, rfc4648 with padding - MIME encoding, experimental +u, U+0075, 0x75, base64url, rfc4648 no padding, final +U, U+0055, 0x55, base64urlpad, rfc4648 with padding, final +p, U+0070, 0x70, proquint, [PRO-QUINT], experimental 🚀, U+1F680, 0xF09F9A80, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental From 52b3c06533a14d554aa140cbfc143850dd187851 Mon Sep 17 00:00:00 2001 From: bumblefudge <bumblefudge@learningproof.xyz> Date: Mon, 24 Jul 2023 16:49:17 +0200 Subject: [PATCH 19/31] undo hard-wrapping of lines per vmx request --- README.md | 94 ++++++++++++++++++++----------------------------------- 1 file changed, 34 insertions(+), 60 deletions(-) diff --git a/README.md b/README.md index b18c7cb..ec96b16 100644 --- a/README.md +++ b/README.md @@ -7,34 +7,23 @@ > Self-identifying base encodings -Multibase is a protocol for disambiguating the "base encoding" used to express -binary data in text formats (e.g., base32, base36, base64, base58, etc.) from the -expression alone. - -When text is encoded as bytes, we can usually use a one-size-fits-all encoding -(UTF-8) because we're always encoding to the same set of 256 bytes (+/- the NUL -byte). When that doesn't work, usually for historical or performance reasons, we -can usually infer the encoding from the context. - -However, when bytes are encoded as text (using a base encoding), the choice of -base encoding (and alphabet, and other factors) is often restricted by the -context. Worse, these restrictions can change based on where the data appears in -the text. In some cases, we can only use `[a-z0-9]`. In others, we can use a -larger set of characters but need a compact encoding. This has lead to a large -set of "base encodings", almost one for every use-case. Unlike the case of -encoding text to bytes, it is impractical to standardize widely around a single -base encoding because there is no optimal encoding for all cases. - -As data travels beyond its context, it becomes quite hard to ascertain *which* -base encoding of the many possible ones were used; that's where multibase comes -in. Where the data has been prefixed before leaving its context behind, it -answers the question: +Multibase is a protocol for disambiguating the "base encoding" used to express binary data in text formats (e.g., base32, base36, base64, base58, etc.) from the expression alone. + +When text is encoded as bytes, we can usually use a one-size-fits-all encoding (UTF-8) because we're always encoding to the same set of 256 bytes (+/- the NUL byte). +When that doesn't work, usually for historical or performance reasons, we can usually infer the encoding from the context. + +However, when bytes are encoded as text (using a base encoding), the choice of base encoding (and alphabet, and other factors) is often restricted by the context. +Worse, these restrictions can change based on where the data appears in the text. +In some cases, we can only use `[a-z0-9]`; in others, we can use a larger set of characters but need a compact encoding. +This has lead to a large set of "base encodings", almost one for every use-case. +Unlike the case of encoding text to bytes, it is impractical to standardize widely around a single base encoding because there is no optimal encoding for all cases. + +As data travels beyond its context, it becomes quite hard to ascertain *which* base encoding of the many possible ones were used; that's where multibase comes in. +Where the data has been prefixed before leaving its context behind, it answers the question: > Given binary data `d` encoded into text `s`, what base `b` was used to encode it? -To answer this question, a single code point is prepended to `s` at time of -encoding, which signals in that new context which `b` can be used to reconstruct -`d`. +To answer this question, a single code point is prepended to `s` at time of encoding, which signals in that new context which `b` can be used to reconstruct `d`. ## Table of Contents @@ -59,8 +48,7 @@ The Format is: <base-encoding-code-point><base-encoded-data> ``` -Where `<base-encoding-code-point>` is a code representing an entry in the -multibase table. +Where `<base-encoding-code-point>` is a code representing an entry in the multibase table. ### Multibase Table @@ -116,8 +104,7 @@ Each multibase encoding has a status: * reserved - for functional reasons or to avoid collisions with other multi-* registries, this registry cannot accept registrations at this code-point and implementing one unregistered is discouraged for interoperability reasons * experimental - these encodings have been proposed but are not widely implemented and may be removed. -* draft - these encodings are mature and widely implemented but may not be - implemented by all implementations. +* draft - these encodings are mature and widely implemented but may not be implemented by all implementations. * final - these encodings should be implemented by all implementations and are widely used. * deprecated - this entry will likely be removed and reassigned in the future and it will not likely become a `final` registration @@ -157,18 +144,15 @@ Yes. If i give you `"1214314321432165"` is that decimal? or hex? or something el > Why the strange selection of codes / characters? -The code values are selected such that they are included in the alphabets of the -base they represent. For example, `f` is the base code for `base16 (hex)`, -because `f` is in hex's 16 character alphabet. Note that most of the alphabets -used can be encoded in UTF-8, and most but not all can be encoded in ASCII. We -have yet not found a case needing something else. +The code values are selected such that they are included in the alphabets of the base they represent. +For example, `f` is the base code for `base16 (hex)`, because `f` is in hex's 16 character alphabet. +Note that most of the alphabets used can be encoded in UTF-8, and most but not all can be encoded in ASCII. +We have yet not found a case needing something else. > Don't we have to agree on a table of base encodings? -Yes, but we already have to agree on base encodings, so this is not hard. The -table even leaves some room for custom encodings and is intended to work both in -contexts where the encodings are known or agreed on and open-world or brownfield -contexts where these may vary. +Yes, but we already have to agree on base encodings, so this is not hard. +The table even leaves some room for custom encodings and is intended to work both in contexts where the encodings are known or agreed on and open-world or brownfield contexts where these may vary. ## Implementations: @@ -193,34 +177,24 @@ contexts where these may vary. ## Disclaimers -Warning: **obviously multibase changes the first character depending on the -encoding**. Do not expect the value to be exactly the same. Remove the multibase -prefix before using the value. +Warning: **obviously multibase changes the first character depending on the encoding**. +Do not expect the value to be exactly the same. +Remove the multibase prefix before using the value. ## Contribute -Contributions welcome. Please check out [the -issues](https://github.com/multiformats/multibase/issues) and reading the -[contributing -document](https://github.com/multiformats/multiformats/blob/master/contributing.md) -for the greater multiformats project before opening your first issue, as the -workflow and the relation of multibase to the greater project both benefit from -this context. more information on how we work, and about contributing in -general. - -If you'd like to switch a project over to multibase, whether by creating a new -multibase implementation or building on one of those listed above, please file -an issue in this repository using the "Interested in implementing" issue -template. If would also like to reserve a prefix for compatibility, please file -a separate issue in this repository using the "New Registration" issue template. +Contributions welcome. +Please check out [the issues](https://github.com/multiformats/multibase/issues) and reading the [contributing document](https://github.com/multiformats/multiformats/blob/master/contributing.md) for the greater multiformats project before opening your first issue, as the workflow and the relation of multibase to the greater project both benefit from this context. +more information on how we work, and about contributing in general. + +If you'd like to switch a project over to multibase, whether by creating a new multibase implementation or building on one of those listed above, please file an issue in this repository using the "Interested in implementing" issue template. +If would also like to reserve a prefix for compatibility, please file a separate issue in this repository using the "New Registration" issue template. ## License -This repository is only for documents. All of these are licensed under the -[CC-BY-SA -3.0](https://ipfs.io/ipfs/QmVreNvKsQmQZ83T86cWSjPu2vR3yZHGPm5jnxFuunEB9u) -license © 2016 Protocol Labs Inc. Any code is under a [MIT](LICENSE) © 2016 -Protocol Labs Inc. +This repository is only for documents. +All of these are licensed under the [CC-BY-SA 3.0](https://ipfs.io/ipfs/QmVreNvKsQmQZ83T86cWSjPu2vR3yZHGPm5jnxFuunEB9u) license © 2016 Protocol Labs Inc. +Any code is under a [MIT](LICENSE) © 2016 Protocol Labs Inc. [multiaddr]: https://github.com/multiformats/multiaddr [multiformats registry group]: https://github.com/multiformats/multicodec/blob/master/table.csv From be3ab370dba8584ba31f09cdcc4133c335f3f772 Mon Sep 17 00:00:00 2001 From: bumblefudge <bumblefudge@learningproof.xyz> Date: Tue, 8 Aug 2023 23:06:11 +0200 Subject: [PATCH 20/31] embarassingly walking back all my wierd changes --- README.md | 53 +++++++++++++++++++++++++-------------------------- multibase.csv | 52 +++++++++++++++++++++++++------------------------- 2 files changed, 52 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index ec96b16..2eb0355 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,6 @@ To answer this question, a single code point is prepended to `s` at time of enco ## Table of Contents - [multibase](#multibase) - - [Table of Contents](#table-of-contents) - [Format](#format) - [Multibase Table](#multibase-table) - [Reserved](#reserved) @@ -55,32 +54,32 @@ Where `<base-encoding-code-point>` is a code representing an entry in the multib The current multibase table is [here](multibase.csv): ``` -code, Unicode, (UTF-8), encoding, description, status -NUL, U+0000, 0x00, 8-bit binary, (encoder and decoder keeps data unmodified), reserved -0, U+0030, 0x30, base2, binary (01010101), experimental -7, U+0037, 0x37, base8, octal, draft -9, U+0039, 0x39, base10, decimal, draft -f, U+0066, 0x66, base16, hexadecimal, final -F, U+0006, 0x06, base16upper, hexadecimal, final -v, U+0076, 0x76, base32hex, rfc4648 case-insensitive - no padding - highest char, experimental -V, U+0056, 0x56, base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental -t, U+0074, 0x74, base32hexpad, rfc4648 case-insensitive - with padding, experimental -T, U+0054, 0x54, base32hexpadupper, rfc4648 case-insensitive - with padding, experimental -b, U+0062, 0x62, base32, rfc4648 case-insensitive - no padding, final -B, U+0042, 0x42, base32upper, rfc4648 case-insensitive - no padding, final -c, U+0063, 0x63, base32pad, rfc4648 case-insensitive - with padding, draft -C, U+0043, 0x43, base32padupper, rfc4648 case-insensitive - with padding, draft -h, U+0068, 0x68, base32z, z-base-32 (used by Tahoe-LAFS), draft -k, U+006b, 0x6b, base36, base36 [0-9a-z] case-insensitive - no padding, draft -K, U+004b, 0x4b, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft -z, U+007a, 0x7a, base58btc, base58 bitcoin, final -Z, U+005a, 0x5a, base58flickr, base58 flicker, experimental -m, U+006d, 0x6d, base64, rfc4648 no padding, final -M, U+004d, 0x4d, base64pad, rfc4648 with padding - MIME encoding, experimental -u, U+0075, 0x75, base64url, rfc4648 no padding, final -U, U+0055, 0x55, base64urlpad, rfc4648 with padding, final -p, U+0070, 0x70, proquint, [PRO-QUINT], experimental -🚀, U+1F680, 0xF09F9A80, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental +code, Unicode, encoding, description, status +NUL, U+0000, none, (no base encoding), reserved +0, U+0030, base2, binary (01010101), experimental +7, U+0037, base8, octal, draft +9, U+0039, base10, decimal, draft +f, U+0066, base16, hexadecimal, final +F, U+0006, base16upper, hexadecimal, final +v, U+0076, base32hex, rfc4648 case-insensitive - no padding - highest char, experimental +V, U+0056, base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental +t, U+0074, base32hexpad, rfc4648 case-insensitive - with padding, experimental +T, U+0054, base32hexpadupper, rfc4648 case-insensitive - with padding, experimental +b, U+0062, base32, rfc4648 case-insensitive - no padding, final +B, U+0042, base32upper, rfc4648 case-insensitive - no padding, final +c, U+0063, base32pad, rfc4648 case-insensitive - with padding, draft +C, U+0043, base32padupper, rfc4648 case-insensitive - with padding, draft +h, U+0068, base32z, z-base-32 (used by Tahoe-LAFS), draft +k, U+006b, base36, base36 [0-9a-z] case-insensitive - no padding, draft +K, U+004b, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft +z, U+007a, base58btc, base58 bitcoin, final +Z, U+005a, base58flickr, base58 flicker, experimental +m, U+006d, base64, rfc4648 no padding, final +M, U+004d, base64pad, rfc4648 with padding - MIME encoding, experimental +u, U+0075, base64url, rfc4648 no padding, final +U, U+0055, base64urlpad, rfc4648 with padding, final +p, U+0070, proquint, [PRO-QUINT], experimental +🚀, U+1F680, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental ``` diff --git a/multibase.csv b/multibase.csv index 55bb4aa..b2cf901 100644 --- a/multibase.csv +++ b/multibase.csv @@ -1,26 +1,26 @@ -code, Unicode, (UTF-8), encoding, description, status -NUL, U+0000, 0x00, 8-bit binary, (encoder and decoder keeps data unmodified), reserved -0, U+0030, 0x30, base2, binary (01010101), experimental -7, U+0037, 0x37, base8, octal, draft -9, U+0039, 0x39, base10, decimal, draft -f, U+0066, 0x66, base16, hexadecimal, final -F, U+0006, 0x06, base16upper, hexadecimal, final -v, U+0076, 0x76, base32hex, rfc4648 case-insensitive - no padding - highest char, experimental -V, U+0056, 0x56, base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental -t, U+0074, 0x74, base32hexpad, rfc4648 case-insensitive - with padding, experimental -T, U+0054, 0x54, base32hexpadupper, rfc4648 case-insensitive - with padding, experimental -b, U+0062, 0x62, base32, rfc4648 case-insensitive - no padding, final -B, U+0042, 0x42, base32upper, rfc4648 case-insensitive - no padding, final -c, U+0063, 0x63, base32pad, rfc4648 case-insensitive - with padding, draft -C, U+0043, 0x43, base32padupper, rfc4648 case-insensitive - with padding, draft -h, U+0068, 0x68, base32z, z-base-32 (used by Tahoe-LAFS), draft -k, U+006b, 0x6b, base36, base36 [0-9a-z] case-insensitive - no padding, draft -K, U+004b, 0x4b, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft -z, U+007a, 0x7a, base58btc, base58 bitcoin, final -Z, U+005a, 0x5a, base58flickr, base58 flicker, experimental -m, U+006d, 0x6d, base64, rfc4648 no padding, final -M, U+004d, 0x4d, base64pad, rfc4648 with padding - MIME encoding, experimental -u, U+0075, 0x75, base64url, rfc4648 no padding, final -U, U+0055, 0x55, base64urlpad, rfc4648 with padding, final -p, U+0070, 0x70, proquint, [PRO-QUINT], experimental -🚀, U+1F680, 0xF09F9A80, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental +code, Unicode, encoding, description, status +identity, U+0000, none, (no base encoding), reserved +0, U+0030, base2, binary (01010101), experimental +7, U+0037, base8, octal, draft +9, U+0039, base10, decimal, draft +f, U+0066, base16, hexadecimal, final +F, U+0006, base16upper, hexadecimal, final +v, U+0076, base32hex, rfc4648 case-insensitive - no padding - highest char, experimental +V, U+0056, base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental +t, U+0074, base32hexpad, rfc4648 case-insensitive - with padding, experimental +T, U+0054, base32hexpadupper, rfc4648 case-insensitive - with padding, experimental +b, U+0062, base32, rfc4648 case-insensitive - no padding, final +B, U+0042, base32upper, rfc4648 case-insensitive - no padding, final +c, U+0063, base32pad, rfc4648 case-insensitive - with padding, draft +C, U+0043, base32padupper, rfc4648 case-insensitive - with padding, draft +h, U+0068, base32z, z-base-32 (used by Tahoe-LAFS), draft +k, U+006b, base36, base36 [0-9a-z] case-insensitive - no padding, draft +K, U+004b, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft +z, U+007a, base58btc, base58 bitcoin, final +Z, U+005a, base58flickr, base58 flicker, experimental +m, U+006d, base64, rfc4648 no padding, final +M, U+004d, base64pad, rfc4648 with padding - MIME encoding, experimental +u, U+0075, base64url, rfc4648 no padding, final +U, U+0055, base64urlpad, rfc4648 with padding, final +p, U+0070, proquint, [PRO-QUINT], experimental +🚀, U+1F680, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental From caee143139a49ee90717993d968c79caaf44ab7c Mon Sep 17 00:00:00 2001 From: bumblefudge <bumblefudge@learningproof.xyz> Date: Wed, 9 Aug 2023 13:01:02 +0200 Subject: [PATCH 21/31] good catch @vmx --- README.md | 56 +++++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 2eb0355..a70fd93 100644 --- a/README.md +++ b/README.md @@ -54,37 +54,37 @@ Where `<base-encoding-code-point>` is a code representing an entry in the multib The current multibase table is [here](multibase.csv): ``` -code, Unicode, encoding, description, status -NUL, U+0000, none, (no base encoding), reserved -0, U+0030, base2, binary (01010101), experimental -7, U+0037, base8, octal, draft -9, U+0039, base10, decimal, draft -f, U+0066, base16, hexadecimal, final -F, U+0006, base16upper, hexadecimal, final -v, U+0076, base32hex, rfc4648 case-insensitive - no padding - highest char, experimental -V, U+0056, base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental -t, U+0074, base32hexpad, rfc4648 case-insensitive - with padding, experimental -T, U+0054, base32hexpadupper, rfc4648 case-insensitive - with padding, experimental -b, U+0062, base32, rfc4648 case-insensitive - no padding, final -B, U+0042, base32upper, rfc4648 case-insensitive - no padding, final -c, U+0063, base32pad, rfc4648 case-insensitive - with padding, draft -C, U+0043, base32padupper, rfc4648 case-insensitive - with padding, draft -h, U+0068, base32z, z-base-32 (used by Tahoe-LAFS), draft -k, U+006b, base36, base36 [0-9a-z] case-insensitive - no padding, draft -K, U+004b, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft -z, U+007a, base58btc, base58 bitcoin, final -Z, U+005a, base58flickr, base58 flicker, experimental -m, U+006d, base64, rfc4648 no padding, final -M, U+004d, base64pad, rfc4648 with padding - MIME encoding, experimental -u, U+0075, base64url, rfc4648 no padding, final -U, U+0055, base64urlpad, rfc4648 with padding, final -p, U+0070, proquint, [PRO-QUINT], experimental -🚀, U+1F680, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental +Unicode, Character,encoding, description, status +U+0000, NUL, none, (no base encoding), reserved +U+0030, 0, base2, binary (01010101), experimental +U+0037, 7, base8, octal, draft +U+0039, 9, base10, decimal, draft +U+0066, f, base16, hexadecimal, final +U+0006, F, base16upper, hexadecimal, final +U+0076, v, base32hex, rfc4648 case-insensitive - no padding - highest char, experimental +U+0056, V, base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental +U+0074, t, base32hexpad, rfc4648 case-insensitive - with padding, experimental +U+0054, T, base32hexpadupper, rfc4648 case-insensitive - with padding, experimental +U+0062, b, base32, rfc4648 case-insensitive - no padding, final +U+0042, B, base32upper, rfc4648 case-insensitive - no padding, final +U+0063, c, base32pad, rfc4648 case-insensitive - with padding, draft +U+0043, C, base32padupper, rfc4648 case-insensitive - with padding, draft +U+0068, h, base32z, z-base-32 (used by Tahoe-LAFS), draft +U+006b, k, base36, base36 [0-9a-z] case-insensitive - no padding, draft +U+004b, K, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft +U+007a, z, base58btc, base58 bitcoin, final +U+005a, Z, base58flickr, base58 flicker, experimental +U+006d, m, base64, rfc4648 no padding, final +U+004d, M, base64pad, rfc4648 with padding - MIME encoding, experimental +U+0075, u, base64url, rfc4648 no padding, final +U+0055, U, base64urlpad, rfc4648 with padding, final +U+0070, p, proquint, [PRO-QUINT], experimental +U+1F680, 🚀, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental ``` -**NOTE:** Multibase-prefixes are encoding agnostic and their canonical form is a Unicode [code point], not an ASCII character or corresponding UTF-8 bytes. -Since UTF-8 is the most common context for binary data that gets prefixed as a multibase today, the UTF-8 column is provided as a reference for detecting multibase-prefixes, since most of these codes can be detected in the first byte in known-encoding contexts. +**NOTE:** Multibase-prefixes are encoding agnostic and their canonical form is a Unicode [code point], not the raw bytes. +The each character code point displays as in that encoding is provided for convenience, since most of these codes can be detected in the first byte in known-encoding contexts. However, if the string in question came from a UTF-32 context, detecting and dropping an initial byte of `0x7a` would not suffice to confirm the rest was `base58btc`-encoded bytes; `[0x7a, 0x00, 0x00, 0x00]` would instead be the UTF-32 bytes that correspond to the `z` codepoint for that entry, and the entire byte array would need to be detected and dropped. From ff1fe455042a4d2ddcdc680cd46ea9084cb07d5f Mon Sep 17 00:00:00 2001 From: bumblefudge <bumblefudge@learningproof.xyz> Date: Wed, 9 Aug 2023 14:31:13 +0200 Subject: [PATCH 22/31] good catch @vmx --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a70fd93..eb62f7a 100644 --- a/README.md +++ b/README.md @@ -83,8 +83,8 @@ U+1F680, 🚀, base256emoji, base256 with custom alphabet using var ``` -**NOTE:** Multibase-prefixes are encoding agnostic and their canonical form is a Unicode [code point], not the raw bytes. -The each character code point displays as in that encoding is provided for convenience, since most of these codes can be detected in the first byte in known-encoding contexts. +**NOTE:** Multibase-prefixes are encoding agnostic and their canonical form is a Unicode [code point][], not the raw bytes. +The character that each code point displays as in that encoding is provided for convenience. However, if the string in question came from a UTF-32 context, detecting and dropping an initial byte of `0x7a` would not suffice to confirm the rest was `base58btc`-encoded bytes; `[0x7a, 0x00, 0x00, 0x00]` would instead be the UTF-32 bytes that correspond to the `z` codepoint for that entry, and the entire byte array would need to be detected and dropped. From 075fa01bbf919ce2e5190e42f4c2be0e5e8ba606 Mon Sep 17 00:00:00 2001 From: bumblefudge <bumblefudge@learningproof.xyz> Date: Mon, 14 Aug 2023 14:48:18 +0200 Subject: [PATCH 23/31] address VMX's comments and refine ### Reserved Terms section --- README.md | 97 ++++++++++++++++++++++++++------------------------- multibase.csv | 5 ++- 2 files changed, 53 insertions(+), 49 deletions(-) diff --git a/README.md b/README.md index eb62f7a..f3c29f6 100644 --- a/README.md +++ b/README.md @@ -27,17 +27,16 @@ To answer this question, a single code point is prepended to `s` at time of enco ## Table of Contents -- [multibase](#multibase) - - [Format](#format) - - [Multibase Table](#multibase-table) - - [Reserved](#reserved) - - [Status](#status) - - [Multibase By Example](#multibase-by-example) - - [FAQ](#faq) - - [Implementations:](#implementations) - - [Disclaimers](#disclaimers) - - [Contribute](#contribute) - - [License](#license) +- [Format](#format) + - [Multibase Table](#multibase-table) +- [Status](#status) + - [Reserved Terms](#reserved-terms) +- [Multibase By Example](#multibase-by-example) +- [FAQ](#faq) +- [Implementations:](#implementations) +- [Disclaimers](#disclaimers) +- [Contribute](#contribute) +- [License](#license) ## Format @@ -54,33 +53,35 @@ Where `<base-encoding-code-point>` is a code representing an entry in the multib The current multibase table is [here](multibase.csv): ``` -Unicode, Character,encoding, description, status -U+0000, NUL, none, (no base encoding), reserved -U+0030, 0, base2, binary (01010101), experimental -U+0037, 7, base8, octal, draft -U+0039, 9, base10, decimal, draft -U+0066, f, base16, hexadecimal, final -U+0006, F, base16upper, hexadecimal, final -U+0076, v, base32hex, rfc4648 case-insensitive - no padding - highest char, experimental -U+0056, V, base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental -U+0074, t, base32hexpad, rfc4648 case-insensitive - with padding, experimental -U+0054, T, base32hexpadupper, rfc4648 case-insensitive - with padding, experimental -U+0062, b, base32, rfc4648 case-insensitive - no padding, final -U+0042, B, base32upper, rfc4648 case-insensitive - no padding, final -U+0063, c, base32pad, rfc4648 case-insensitive - with padding, draft -U+0043, C, base32padupper, rfc4648 case-insensitive - with padding, draft -U+0068, h, base32z, z-base-32 (used by Tahoe-LAFS), draft -U+006b, k, base36, base36 [0-9a-z] case-insensitive - no padding, draft -U+004b, K, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft -U+007a, z, base58btc, base58 bitcoin, final -U+005a, Z, base58flickr, base58 flicker, experimental -U+006d, m, base64, rfc4648 no padding, final -U+004d, M, base64pad, rfc4648 with padding - MIME encoding, experimental -U+0075, u, base64url, rfc4648 no padding, final -U+0055, U, base64urlpad, rfc4648 with padding, final -U+0070, p, proquint, [PRO-QUINT], experimental -U+1F680, 🚀, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental - +code, Unicode, encoding, description, status +identity, U+0000, none, (no base encoding), reserved +0, U+0030, base2, binary (01010101), experimental +1, U+0031, none, (no base encoding) reserved +7, U+0037, base8, octal, draft +9, U+0039, base10, decimal, draft +f, U+0066, base16, hexadecimal, final +F, U+0006, base16upper, hexadecimal, final +v, U+0076, base32hex, rfc4648 case-insensitive - no padding - highest char, experimental +V, U+0056, base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental +t, U+0074, base32hexpad, rfc4648 case-insensitive - with padding, experimental +T, U+0054, base32hexpadupper, rfc4648 case-insensitive - with padding, experimental +b, U+0062, base32, rfc4648 case-insensitive - no padding, final +B, U+0042, base32upper, rfc4648 case-insensitive - no padding, final +c, U+0063, base32pad, rfc4648 case-insensitive - with padding, draft +C, U+0043, base32padupper, rfc4648 case-insensitive - with padding, draft +h, U+0068, base32z, z-base-32 (used by Tahoe-LAFS), draft +k, U+006b, base36, base36 [0-9a-z] case-insensitive - no padding, draft +K, U+004b, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft +z, U+007a, base58btc, base58 bitcoin, final +Z, U+005a, base58flickr, base58 flicker, experimental +m, U+006d, base64, rfc4648 no padding, final +M, U+004d, base64pad, rfc4648 with padding - MIME encoding, experimental +u, U+0075, base64url, rfc4648 no padding, final +U, U+0055, base64urlpad, rfc4648 with padding, final +p, U+0070, proquint, [PRO-QUINT], experimental +Q, U+002F, none, (no base encoding) reserved +/, U+002F, none, (no base encoding) reserved +🚀, U+1F680, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental ``` **NOTE:** Multibase-prefixes are encoding agnostic and their canonical form is a Unicode [code point][], not the raw bytes. @@ -88,15 +89,6 @@ The character that each code point displays as in that encoding is provided for However, if the string in question came from a UTF-32 context, detecting and dropping an initial byte of `0x7a` would not suffice to confirm the rest was `base58btc`-encoded bytes; `[0x7a, 0x00, 0x00, 0x00]` would instead be the UTF-32 bytes that correspond to the `z` codepoint for that entry, and the entire byte array would need to be detected and dropped. -## Reserved - -The following codes are _reserved_ and cannot be registered in the `multibase` table. Note that all three Unicode entries, expressed as the [unsigned varint] expression of that Unicode code-point in UTF-8, are reserved in the greater [multiformats registry group]; this list of reserved Unicode codepoints may grow in the future to avoid such collisions as other single-byte UTF-8 codes are reserved there. - - -* `/` (U+002F) - Separator used by [multiaddr]. -* `1` (U+0031) - Base58-encoded identity multihashes used by libp2p peer IDs. -* `Q` (U+0011) - Base58-encoded sha2-256 multihashes used by libp2p/ipfs for peer IDs and CIDv0. - ## Status Each multibase encoding has a status: @@ -106,7 +98,16 @@ Each multibase encoding has a status: * draft - these encodings are mature and widely implemented but may not be implemented by all implementations. * final - these encodings should be implemented by all implementations and are widely used. * deprecated - this entry will likely be removed and reassigned in the future and it will not likely become a `final` registration - + +### Reserved Terms + +The following codes are _reserved_ and cannot be registered in the `multibase` table. Note that all three of the Unicode entries, expressed as the [unsigned varint] expression of that Unicode code-point in UTF-8, correspond to widely-used entries in the [multiformats registry group] that could create confusions for some legacy systems handling both binary and multibased structures from other multiformats. While technically the multibase registry is not part of the [multiformats registry group], these reservations minimize risk of confusion when composing multiple multiformats in one data system. + +* `NUL` (n/a) - Legacy data may be found with null-byte-prefixed binary structures mixed in among multibase-encoded ones in arrays of data, although support for this is no longer mandated by conformant implementations. +* `/` (U+002F) - Separator used by [multiaddr]. +* `1` (U+0031) - Base58-encoded identity multihashes used by libp2p peer IDs. +* `Q` (U+0011) - Base58-encoded sha2-256 multihashes used by libp2p/ipfs for peer IDs and CIDv0. + ## Multibase By Example Consider the following encodings of the same binary string: diff --git a/multibase.csv b/multibase.csv index b2cf901..b2de111 100644 --- a/multibase.csv +++ b/multibase.csv @@ -1,6 +1,7 @@ code, Unicode, encoding, description, status identity, U+0000, none, (no base encoding), reserved 0, U+0030, base2, binary (01010101), experimental +1, U+0031, none, (no base encoding) reserved 7, U+0037, base8, octal, draft 9, U+0039, base10, decimal, draft f, U+0066, base16, hexadecimal, final @@ -23,4 +24,6 @@ M, U+004d, base64pad, rfc4648 with padding - MIME encoding u, U+0075, base64url, rfc4648 no padding, final U, U+0055, base64urlpad, rfc4648 with padding, final p, U+0070, proquint, [PRO-QUINT], experimental -🚀, U+1F680, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental +Q, U+002F, none, (no base encoding) reserved +/, U+002F, none, (no base encoding) reserved +🚀, U+1F680, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental \ No newline at end of file From 873440dfcddac7840c7cc4a1e6e0dcfd4f48573e Mon Sep 17 00:00:00 2001 From: bumblefudge <bumblefudge@learningproof.xyz> Date: Mon, 14 Aug 2023 15:17:59 +0200 Subject: [PATCH 24/31] rename code to char and flip column order --- README.md | 58 +++++++++++++++++++++++++-------------------------- multibase.csv | 58 +++++++++++++++++++++++++-------------------------- 2 files changed, 58 insertions(+), 58 deletions(-) diff --git a/README.md b/README.md index f3c29f6..14bee5a 100644 --- a/README.md +++ b/README.md @@ -53,35 +53,35 @@ Where `<base-encoding-code-point>` is a code representing an entry in the multib The current multibase table is [here](multibase.csv): ``` -code, Unicode, encoding, description, status -identity, U+0000, none, (no base encoding), reserved -0, U+0030, base2, binary (01010101), experimental -1, U+0031, none, (no base encoding) reserved -7, U+0037, base8, octal, draft -9, U+0039, base10, decimal, draft -f, U+0066, base16, hexadecimal, final -F, U+0006, base16upper, hexadecimal, final -v, U+0076, base32hex, rfc4648 case-insensitive - no padding - highest char, experimental -V, U+0056, base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental -t, U+0074, base32hexpad, rfc4648 case-insensitive - with padding, experimental -T, U+0054, base32hexpadupper, rfc4648 case-insensitive - with padding, experimental -b, U+0062, base32, rfc4648 case-insensitive - no padding, final -B, U+0042, base32upper, rfc4648 case-insensitive - no padding, final -c, U+0063, base32pad, rfc4648 case-insensitive - with padding, draft -C, U+0043, base32padupper, rfc4648 case-insensitive - with padding, draft -h, U+0068, base32z, z-base-32 (used by Tahoe-LAFS), draft -k, U+006b, base36, base36 [0-9a-z] case-insensitive - no padding, draft -K, U+004b, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft -z, U+007a, base58btc, base58 bitcoin, final -Z, U+005a, base58flickr, base58 flicker, experimental -m, U+006d, base64, rfc4648 no padding, final -M, U+004d, base64pad, rfc4648 with padding - MIME encoding, experimental -u, U+0075, base64url, rfc4648 no padding, final -U, U+0055, base64urlpad, rfc4648 with padding, final -p, U+0070, proquint, [PRO-QUINT], experimental -Q, U+002F, none, (no base encoding) reserved -/, U+002F, none, (no base encoding) reserved -🚀, U+1F680, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental +Unicode, character, encoding, description, status +U+0000, identity, none, (no base encoding), reserved +U+0030, 0, base2, binary (01010101), experimental +U+0031, 1, none, (no base encoding) reserved +U+0037, 7, base8, octal, draft +U+0039, 9, base10, decimal, draft +U+0066, f, base16, hexadecimal, final +U+0006, F, base16upper, hexadecimal, final +U+0076, v, base32hex, rfc4648 case-insensitive - no padding - highest char, experimental +U+0056, V, base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental +U+0074, t, base32hexpad, rfc4648 case-insensitive - with padding, experimental +U+0054, T, base32hexpadupper, rfc4648 case-insensitive - with padding, experimental +U+0062, b, base32, rfc4648 case-insensitive - no padding, final +U+0042, B, base32upper, rfc4648 case-insensitive - no padding, final +U+0063, c, base32pad, rfc4648 case-insensitive - with padding, draft +U+0043, C, base32padupper, rfc4648 case-insensitive - with padding, draft +U+0068, h, base32z, z-base-32 (used by Tahoe-LAFS), draft +U+006b, k, base36, base36 [0-9a-z] case-insensitive - no padding, draft +U+004b, K, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft +U+007a, z, base58btc, base58 bitcoin, final +U+005a, Z, base58flickr, base58 flicker, experimental +U+006d, m, base64, rfc4648 no padding, final +U+004d, M, base64pad, rfc4648 with padding - MIME encoding, experimental +U+0075, u, base64url, rfc4648 no padding, final +U+0055, U, base64urlpad, rfc4648 with padding, final +U+0070, p, proquint, [PRO-QUINT], experimental +U+002F, Q, none, (no base encoding) reserved +U+002F, /, none, (no base encoding) reserved +U+1F680, 🚀, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental ``` **NOTE:** Multibase-prefixes are encoding agnostic and their canonical form is a Unicode [code point][], not the raw bytes. diff --git a/multibase.csv b/multibase.csv index b2de111..1647933 100644 --- a/multibase.csv +++ b/multibase.csv @@ -1,29 +1,29 @@ -code, Unicode, encoding, description, status -identity, U+0000, none, (no base encoding), reserved -0, U+0030, base2, binary (01010101), experimental -1, U+0031, none, (no base encoding) reserved -7, U+0037, base8, octal, draft -9, U+0039, base10, decimal, draft -f, U+0066, base16, hexadecimal, final -F, U+0006, base16upper, hexadecimal, final -v, U+0076, base32hex, rfc4648 case-insensitive - no padding - highest char, experimental -V, U+0056, base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental -t, U+0074, base32hexpad, rfc4648 case-insensitive - with padding, experimental -T, U+0054, base32hexpadupper, rfc4648 case-insensitive - with padding, experimental -b, U+0062, base32, rfc4648 case-insensitive - no padding, final -B, U+0042, base32upper, rfc4648 case-insensitive - no padding, final -c, U+0063, base32pad, rfc4648 case-insensitive - with padding, draft -C, U+0043, base32padupper, rfc4648 case-insensitive - with padding, draft -h, U+0068, base32z, z-base-32 (used by Tahoe-LAFS), draft -k, U+006b, base36, base36 [0-9a-z] case-insensitive - no padding, draft -K, U+004b, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft -z, U+007a, base58btc, base58 bitcoin, final -Z, U+005a, base58flickr, base58 flicker, experimental -m, U+006d, base64, rfc4648 no padding, final -M, U+004d, base64pad, rfc4648 with padding - MIME encoding, experimental -u, U+0075, base64url, rfc4648 no padding, final -U, U+0055, base64urlpad, rfc4648 with padding, final -p, U+0070, proquint, [PRO-QUINT], experimental -Q, U+002F, none, (no base encoding) reserved -/, U+002F, none, (no base encoding) reserved -🚀, U+1F680, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental \ No newline at end of file +Unicode, character, encoding, description, status +U+0000, identity, none, (no base encoding), reserved +U+0030, 0, base2, binary (01010101), experimental +U+0031, 1, none, (no base encoding) reserved +U+0037, 7, base8, octal, draft +U+0039, 9, base10, decimal, draft +U+0066, f, base16, hexadecimal, final +U+0006, F, base16upper, hexadecimal, final +U+0076, v, base32hex, rfc4648 case-insensitive - no padding - highest char, experimental +U+0056, V, base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental +U+0074, t, base32hexpad, rfc4648 case-insensitive - with padding, experimental +U+0054, T, base32hexpadupper, rfc4648 case-insensitive - with padding, experimental +U+0062, b, base32, rfc4648 case-insensitive - no padding, final +U+0042, B, base32upper, rfc4648 case-insensitive - no padding, final +U+0063, c, base32pad, rfc4648 case-insensitive - with padding, draft +U+0043, C, base32padupper, rfc4648 case-insensitive - with padding, draft +U+0068, h, base32z, z-base-32 (used by Tahoe-LAFS), draft +U+006b, k, base36, base36 [0-9a-z] case-insensitive - no padding, draft +U+004b, K, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft +U+007a, z, base58btc, base58 bitcoin, final +U+005a, Z, base58flickr, base58 flicker, experimental +U+006d, m, base64, rfc4648 no padding, final +U+004d, M, base64pad, rfc4648 with padding - MIME encoding, experimental +U+0075, u, base64url, rfc4648 no padding, final +U+0055, U, base64urlpad, rfc4648 with padding, final +U+0070, p, proquint, [PRO-QUINT], experimental +U+002F, Q, none, (no base encoding) reserved +U+002F, /, none, (no base encoding) reserved +U+1F680, 🚀, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental \ No newline at end of file From ccca39b18522d630aea2b20069f2880e7917f0ce Mon Sep 17 00:00:00 2001 From: bumblefudge <bumblefudge@learningproof.xyz> Date: Mon, 14 Aug 2023 15:26:36 +0200 Subject: [PATCH 25/31] fix merge conflict artefacts --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a2ac5bd..526744c 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ To answer this question, a single code point is prepended to `s` at time of enco - [Format](#format) - [Multibase Table](#multibase-table) +- [Specifications](#specifications) - [Status](#status) - [Reserved Terms](#reserved-terms) - [Multibase By Example](#multibase-by-example) @@ -84,7 +85,7 @@ U+002F, /, none, (no base encoding) U+1F680, 🚀, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental ``` -**NOTE:** Multibase-prefixes are encoding agnostic. "z" is "z", not 0x7a ("z" encoded as ASCII/UTF-8). For example, in UTF-32, "z" would be `[0x7a, 0x00, 0x00, 0x00]`. Also note the difference between `0x00` (codepoint 0 or 0x00) and `0` (codepoint 48 or 0x30). +**NOTE:** Multibase-prefixes are encoding agnostic. "z" is "z", not 0x7a ("z" encoded as ASCII/UTF-8). In UTF-32, for example, that same "z" would be `[0x7a, 0x00, 0x00, 0x00]` not ``[0x7a]`, so detecting and dropping an initial byte of `0x7a` would not suffice to confirm the rest was `base58btc`-encoded bytes; `[0x7a, 0x00, 0x00, 0x00]` would instead be the UTF-32 bytes that correspond to the `z` codepoint for that entry, and the entire byte array would need to be detected and dropped. Also note the difference between `0x00` (codepoint 0 or 0x00) and `0` (codepoint 48 or 0x30). ## Specifications @@ -102,7 +103,7 @@ Below is a list of specs for the underlying base encodings: - `base58flickr` https://datatracker.ietf.org/doc/html/draft-msporny-base58-02, but using a different alphabet - `proquint` [Proquint RFC](rfcs/Proquint.md), which is the [original spec](https://arxiv.org/html/0901.4016) with an added prefix for legibility -However, if the string in question came from a UTF-32 context, detecting and dropping an initial byte of `0x7a` would not suffice to confirm the rest was `base58btc`-encoded bytes; `[0x7a, 0x00, 0x00, 0x00]` would instead be the UTF-32 bytes that correspond to the `z` codepoint for that entry, and the entire byte array would need to be detected and dropped. + ## Status From d773cd8c78d1b5d6bf7dd0c71296053fccd53a74 Mon Sep 17 00:00:00 2001 From: Bumblefudge <bumblefudge@learningproof.xyz> Date: Mon, 14 Aug 2023 17:43:22 +0200 Subject: [PATCH 26/31] fix merge conflict artefacts take 2 --- README.md | 48 ++++++++++++++++++++++++------------------------ multibase.csv | 48 ++++++++++++++++++++++++------------------------ 2 files changed, 48 insertions(+), 48 deletions(-) diff --git a/README.md b/README.md index 526744c..5e7f69c 100644 --- a/README.md +++ b/README.md @@ -55,31 +55,31 @@ The current multibase table is [here](multibase.csv): ``` Unicode, character, encoding, description, status -U+0000, identity, none, (no base encoding), reserved -U+0030, 0, base2, binary (01010101), experimental -U+0031, 1, none, (no base encoding) reserved -U+0037, 7, base8, octal, draft -U+0039, 9, base10, decimal, draft -U+0066, f, base16, hexadecimal, final -U+0006, F, base16upper, hexadecimal, final -U+0076, v, base32hex, rfc4648 case-insensitive - no padding - highest char, experimental -U+0056, V, base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental -U+0074, t, base32hexpad, rfc4648 case-insensitive - with padding, experimental -U+0054, T, base32hexpadupper, rfc4648 case-insensitive - with padding, experimental -U+0062, b, base32, rfc4648 case-insensitive - no padding, final -U+0042, B, base32upper, rfc4648 case-insensitive - no padding, final -U+0063, c, base32pad, rfc4648 case-insensitive - with padding, draft -U+0043, C, base32padupper, rfc4648 case-insensitive - with padding, draft +U+0000, NUL, none, (No base encoding), reserved +U+0030, 0, base2, Binary (01010101), experimental +U+0031, 1, none, (No base encoding) reserved +U+0037, 7, base8, Octal, draft +U+0039, 9, base10, Decimal, draft +U+0066, f, base16, Hexadecimal (lowercase), final +U+0006, F, base16upper, Hexadecimal (uppercase), final +U+0076, v, base32hex, RFC4648 case-insensitive - no padding - highest char, experimental +U+0056, V, base32hexupper, RFC4648 case-insensitive - no padding - highest char, experimental +U+0074, t, base32hexpad, RFC4648 case-insensitive - with padding, experimental +U+0054, T, base32hexpadupper, RFC4648 case-insensitive - with padding, experimental +U+0062, b, base32, RFC4648 case-insensitive - no padding, final +U+0042, B, base32upper, RFC4648 case-insensitive - no padding, final +U+0063, c, base32pad, RFC4648 case-insensitive - with padding, draft +U+0043, C, base32padupper, RFC4648 case-insensitive - with padding, draft U+0068, h, base32z, z-base-32 (used by Tahoe-LAFS), draft -U+006b, k, base36, base36 [0-9a-z] case-insensitive - no padding, draft -U+004b, K, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft -U+007a, z, base58btc, base58 bitcoin, final -U+005a, Z, base58flickr, base58 flicker, experimental -U+006d, m, base64, rfc4648 no padding, final -U+004d, M, base64pad, rfc4648 with padding - MIME encoding, experimental -U+0075, u, base64url, rfc4648 no padding, final -U+0055, U, base64urlpad, rfc4648 with padding, final -U+0070, p, proquint, [PRO-QUINT], experimental +U+006b, k, base36, Base36 [0-9a-z] case-insensitive - no padding, draft +U+004b, K, base36upper, Base36 [0-9a-z] case-insensitive - no padding, draft +U+007a, z, base58btc, Base58 Bitcoin, final +U+005a, Z, base58flickr, Base58 Flicker, experimental +U+006d, m, base64, RFC4648 no padding, final +U+004d, M, base64pad, RFC4648 with padding - MIME encoding, experimental +U+0075, u, base64url, RFC4648 no padding, final +U+0055, U, base64urlpad, RFC4648 with padding, final +U+0070, p, proquint, Proquint (https://arxiv.org/html/0901.4016), experimental U+002F, Q, none, (no base encoding) reserved U+002F, /, none, (no base encoding) reserved U+1F680, 🚀, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental diff --git a/multibase.csv b/multibase.csv index 51f4d74..2472ed9 100644 --- a/multibase.csv +++ b/multibase.csv @@ -1,29 +1,29 @@ Unicode, character, encoding, description, status -U+0000, identity, none, (no base encoding), reserved -U+0030, 0, base2, binary (01010101), experimental -U+0031, 1, none, (no base encoding) reserved -U+0037, 7, base8, octal, draft -U+0039, 9, base10, decimal, draft -U+0066, f, base16, hexadecimal, final -U+0006, F, base16upper, hexadecimal, final -U+0076, v, base32hex, rfc4648 case-insensitive - no padding - highest char, experimental -U+0056, V, base32hexupper, rfc4648 case-insensitive - no padding - highest char, experimental -U+0074, t, base32hexpad, rfc4648 case-insensitive - with padding, experimental -U+0054, T, base32hexpadupper, rfc4648 case-insensitive - with padding, experimental -U+0062, b, base32, rfc4648 case-insensitive - no padding, final -U+0042, B, base32upper, rfc4648 case-insensitive - no padding, final -U+0063, c, base32pad, rfc4648 case-insensitive - with padding, draft -U+0043, C, base32padupper, rfc4648 case-insensitive - with padding, draft +U+0000, NUL, none, (No base encoding), reserved +U+0030, 0, base2, Binary (01010101), experimental +U+0031, 1, none, (No base encoding) reserved +U+0037, 7, base8, Octal, draft +U+0039, 9, base10, Decimal, draft +U+0066, f, base16, Hexadecimal (lowercase), final +U+0006, F, base16upper, Hexadecimal (uppercase), final +U+0076, v, base32hex, RFC4648 case-insensitive - no padding - highest char, experimental +U+0056, V, base32hexupper, RFC4648 case-insensitive - no padding - highest char, experimental +U+0074, t, base32hexpad, RFC4648 case-insensitive - with padding, experimental +U+0054, T, base32hexpadupper, RFC4648 case-insensitive - with padding, experimental +U+0062, b, base32, RFC4648 case-insensitive - no padding, final +U+0042, B, base32upper, RFC4648 case-insensitive - no padding, final +U+0063, c, base32pad, RFC4648 case-insensitive - with padding, draft +U+0043, C, base32padupper, RFC4648 case-insensitive - with padding, draft U+0068, h, base32z, z-base-32 (used by Tahoe-LAFS), draft -U+006b, k, base36, base36 [0-9a-z] case-insensitive - no padding, draft -U+004b, K, base36upper, base36 [0-9a-z] case-insensitive - no padding, draft -U+007a, z, base58btc, base58 bitcoin, final -U+005a, Z, base58flickr, base58 flicker, experimental -U+006d, m, base64, rfc4648 no padding, final -U+004d, M, base64pad, rfc4648 with padding - MIME encoding, experimental -U+0075, u, base64url, rfc4648 no padding, final -U+0055, U, base64urlpad, rfc4648 with padding, final -U+0070, p, proquint, [PRO-QUINT], experimental +U+006b, k, base36, Base36 [0-9a-z] case-insensitive - no padding, draft +U+004b, K, base36upper, Base36 [0-9a-z] case-insensitive - no padding, draft +U+007a, z, base58btc, Base58 Bitcoin, final +U+005a, Z, base58flickr, Base58 Flicker, experimental +U+006d, m, base64, RFC4648 no padding, final +U+004d, M, base64pad, RFC4648 with padding - MIME encoding, experimental +U+0075, u, base64url, RFC4648 no padding, final +U+0055, U, base64urlpad, RFC4648 with padding, final +U+0070, p, proquint, Proquint (https://arxiv.org/html/0901.4016), experimental U+002F, Q, none, (no base encoding) reserved U+002F, /, none, (no base encoding) reserved U+1F680, 🚀, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental From 4c51364079bdcc16dbd77feea3409f08a895d3a2 Mon Sep 17 00:00:00 2001 From: Bumblefudge <caballerojuan@pm.me> Date: Tue, 22 Aug 2023 11:03:41 +0200 Subject: [PATCH 27/31] less required fields in .github/ISSUE_TEMPLATE/BUG-REPORT.yml Co-authored-by: Rod Vagg <rod@vagg.org> --- .github/ISSUE_TEMPLATE/BUG-REPORT.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/BUG-REPORT.yml b/.github/ISSUE_TEMPLATE/BUG-REPORT.yml index 5422dc0..bfae0bf 100644 --- a/.github/ISSUE_TEMPLATE/BUG-REPORT.yml +++ b/.github/ISSUE_TEMPLATE/BUG-REPORT.yml @@ -28,4 +28,4 @@ body: description: Please provide additional context placeholder: "Context or external links needed to explain the possible mistake" validations: - required: true \ No newline at end of file + required: false \ No newline at end of file From 432b2707bb267ba759b6b4eb48fe4d018950b21c Mon Sep 17 00:00:00 2001 From: Bumblefudge <caballerojuan@pm.me> Date: Tue, 22 Aug 2023 11:03:50 +0200 Subject: [PATCH 28/31] less required fields in .github/ISSUE_TEMPLATE/BUG-REPORT.yml Co-authored-by: Rod Vagg <rod@vagg.org> --- .github/ISSUE_TEMPLATE/BUG-REPORT.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/BUG-REPORT.yml b/.github/ISSUE_TEMPLATE/BUG-REPORT.yml index bfae0bf..7b3dffa 100644 --- a/.github/ISSUE_TEMPLATE/BUG-REPORT.yml +++ b/.github/ISSUE_TEMPLATE/BUG-REPORT.yml @@ -20,7 +20,7 @@ body: description: Please enter your GitHub URL to provide a reproduction of the issue placeholder: ex. https://github.com/multiformats/multibase/ validations: - required: true + required: false - type: textarea id: context attributes: From e0f3632bf9234eec0e2b00fd9bc806d6bb57d3fa Mon Sep 17 00:00:00 2001 From: Bumblefudge <caballerojuan@pm.me> Date: Tue, 22 Aug 2023 11:04:34 +0200 Subject: [PATCH 29/31] typo in README.md Co-authored-by: Rod Vagg <rod@vagg.org> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5e7f69c..f2d9218 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,7 @@ U+002F, /, none, (no base encoding) U+1F680, 🚀, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental ``` -**NOTE:** Multibase-prefixes are encoding agnostic. "z" is "z", not 0x7a ("z" encoded as ASCII/UTF-8). In UTF-32, for example, that same "z" would be `[0x7a, 0x00, 0x00, 0x00]` not ``[0x7a]`, so detecting and dropping an initial byte of `0x7a` would not suffice to confirm the rest was `base58btc`-encoded bytes; `[0x7a, 0x00, 0x00, 0x00]` would instead be the UTF-32 bytes that correspond to the `z` codepoint for that entry, and the entire byte array would need to be detected and dropped. Also note the difference between `0x00` (codepoint 0 or 0x00) and `0` (codepoint 48 or 0x30). +**NOTE:** Multibase-prefixes are encoding agnostic. "z" is "z", not 0x7a ("z" encoded as ASCII/UTF-8). In UTF-32, for example, that same "z" would be `[0x7a, 0x00, 0x00, 0x00]` not `[0x7a]`, so detecting and dropping an initial byte of `0x7a` would not suffice to confirm the rest was `base58btc`-encoded bytes; `[0x7a, 0x00, 0x00, 0x00]` would instead be the UTF-32 bytes that correspond to the `z` codepoint for that entry, and the entire byte array would need to be detected and dropped. Also note the difference between `0x00` (codepoint 0 or 0x00) and `0` (codepoint 48 or 0x30). ## Specifications From d20580d00abac517cc412c20b95b53ecddd3195e Mon Sep 17 00:00:00 2001 From: Bumblefudge <bumblefudge@learningproof.xyz> Date: Tue, 22 Aug 2023 11:39:43 +0200 Subject: [PATCH 30/31] require registrants to volunteer as change controller and PR opener --- .github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml b/.github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml index d417555..a7d1524 100644 --- a/.github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml +++ b/.github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml @@ -27,6 +27,12 @@ body: description: I have reviewed the [multiformats mega-table](https://github.com/multiformats/multicodec/blob/master/table.csv) to assess viable sub-namespace for a registry if applicable options: - label: I read it! + - type: checkboxes + attributes: + label: "Willing to open a PR" + description: Once my questions are answered and my plan is confirmed, I will open a PR myself that adds the registration and be its change controller, or close this issue myself if I cannot + options: + - label: I will own this registration - type: input id: codepoint attributes: @@ -63,7 +69,7 @@ body: id: solution_and_rationale attributes: label: "Proposed solution and rationale" - description: Please describe at a high level what you are exploring building and current open research questions + description: Please describe at a high level what you are exploring building and current open research questions. placeholder: Detail welcome validations: required: true From 6eba60ffbf137cc4efa290c5689f616910b5c2f7 Mon Sep 17 00:00:00 2001 From: Bumblefudge <bumblefudge@learningproof.xyz> Date: Tue, 22 Aug 2023 11:42:18 +0200 Subject: [PATCH 31/31] remove new implementation template for now --- .github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml | 55 ------------------- 1 file changed, 55 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml diff --git a/.github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml b/.github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml deleted file mode 100644 index c3fd2f4..0000000 --- a/.github/ISSUE_TEMPLATE/NEW-IMPLEMENTATION.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: "Interest in Implementing - New Multibase Library or System" -description: Express interest in possible new multibase library or system -title: "📚 [NEW PROJECT] - <title>" -labels: [ - "ideation" -] -body: - - type: input - id: project-name - attributes: - label: "Name of Project" - description: Name this library or system - placeholder: (language-)Multibase(-usecase), for example - validations: - required: false - - type: checkboxes - attributes: - label: "Have read contributing" - description: I have read the [contributing](https://github.com/multiformats/multiformats/blob/master/contributing.md) document - options: - - label: I read it! - validations: - required: true - - type: textarea - id: problem_statement - attributes: - label: "Description of parsing, sniffing, or encoding problem solved" - description: Please describe the problem solved by a new multibase library or system depending on multibase codecs - placeholder: Feel free to provide links for context and use-case descriptions, and how this problem is not solved by existing multi-formats entries or mini-registries - validations: - required: true - - type: textarea - id: prior_art - attributes: - label: "Description of relevant prior art and status quo" - description: Please describe relevant prior art and how this is use-case functions today - placeholder: Links welcome - validations: - required: true - - type: textarea - id: solution_and_rationale - attributes: - label: "Proposed solution and rationale" - description: Please describe at a high level what you are exploring building and current open research questions - placeholder: Detail welcome - validations: - required: true - - type: textarea - id: questions - attributes: - label: "Any further questions or requests" - description: Anything else you'd like to ask the maintainers or community? - placeholder: ok to leave blank! - validations: - required: false