unicode-org · eemeli · Jun 13, 2022 · Jun 9, 2022 · Jun 9, 2022 · Jun 9, 2022
diff --git a/spec/message.ebnf b/spec/message.ebnf
@@ -33,7 +33,7 @@ PlainEnd ::= PlainChar - WhiteSpace
 /* Text */
 Text ::= (TextChar | TextEscape)+
 TextChar ::= AnyChar - ('[' | ']' | '{' | '}' | Esc)
-AnyChar ::= .
+AnyChar ::= [#x0-#x10FFFF]
 
 /* Names */
 Variable ::= '$' Name /* ws: explicit */

diff --git a/spec/syntax.md b/spec/syntax.md
@@ -406,7 +406,7 @@ and `\` (which starts an escape sequence).
 ```ebnf
 Text ::= (TextChar | TextEscape)+ /* ws: explicit */
 TextChar ::= AnyChar - ('[' | ']' | '{' | '}' | Esc)
-AnyChar ::= .
+AnyChar ::= [#x0-#x10FFFF]
 ```
 
 ### Names
@@ -446,6 +446,15 @@ Any Unicode code point is allowed in literals,
 with the exception of its delimiters `(` and `)`,
 and `\` (which starts an escape sequence).
 
+This includes line-breaking characters (such as U+000A LINE FEED and U+000D CARRIAGE RETURN),
+other control characters (such as U+0000 NULL and U+0009 TAB),
+permanently reserved noncharacters (U+FDD0 through U+FDEF and U+<i>n</i>FFFE and U+<i>n</i>FFFF where <i>n</i> is 0x0 through 0x10),
+surrogate code points (U+D800 through U+DBFF),
+private-use code points (U+E000 through U+F8FF, U+F0000 through U+FFFFD, and U+100000 through U+10FFFD),
+and unassigned code points.
+
+All code points of a literal are preserved.
+
 ```ebnf
 Literal ::= '(' (LiteralChar | LiteralEscape)* ')' /* ws: explicit */
 LiteralChar ::= AnyChar - ('(' | ')' | Esc)