From 62c0e8855440531faeafbdd1e0d1ff47102526da Mon Sep 17 00:00:00 2001 From: Eemeli Aro Date: Tue, 6 Aug 2024 09:41:27 +0300 Subject: [PATCH 1/3] Allow whitespace at complex-message start --- spec/message.abnf | 4 ++-- spec/syntax.md | 13 +++++++++---- test/tests/syntax.json | 17 +++++++++++++---- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index 583b680b8..1cd7a1fa5 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -1,11 +1,11 @@ message = simple-message / complex-message -simple-message = [simple-start pattern] +simple-message = [s] [simple-start pattern] simple-start = simple-start-char / escaped-char / placeholder pattern = *(text-char / escaped-char / placeholder) placeholder = expression / markup -complex-message = *(declaration [s]) complex-body [s] +complex-message = [s] *(declaration [s]) complex-body [s] declaration = input-declaration / local-declaration / reserved-statement complex-body = quoted-pattern / matcher diff --git a/spec/syntax.md b/spec/syntax.md index 6f7f7e9c6..6359e9c92 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -153,11 +153,14 @@ message = simple-message / complex-message ``` A **_simple message_** contains a single _pattern_, -with restrictions on its first character. +with restrictions on its first non-whitespace character. An empty string is a valid _simple message_. +Whitespace at the start of a _simple message_ is significant, +and a part of the _text_ of the _message_. + ```abnf -simple-message = [simple-start pattern] +simple-message = [s] [simple-start pattern] simple-start = simple-start-char / escaped-char / placeholder ``` @@ -169,8 +172,10 @@ and consists of: 1. an optional list of _declarations_, followed by 2. a _complex body_ +Whitespace at the start of a _complex message_ is ignored. + ```abnf -complex-message = *(declaration [s]) complex-body [s] +complex-message = [s] *(declaration [s]) complex-body [s] ``` ### Declarations @@ -300,7 +305,7 @@ U+007B LEFT CURLY BRACKET `{`, and U+007D RIGHT CURLY BRACKET `}` MUST be escaped as `\\`, `\{`, and `\}` respectively. In the ABNF, _text_ is represented by non-empty sequences of -`simple-start-char`, `text-char`, and `escaped-char`. +`simple-start-char`, `text-char`, `escaped-char`, and `s`. The first of these is used at the start of a _simple message_, and matches `text-char` except for not allowing U+002E FULL STOP `.`. The ABNF uses `content-char` as a shared base for _text_ and _quoted literal_ characters. diff --git a/test/tests/syntax.json b/test/tests/syntax.json index 9425b538b..1a2d601a2 100644 --- a/test/tests/syntax.json +++ b/test/tests/syntax.json @@ -36,6 +36,11 @@ "src": "hello {|world|}", "exp": "hello world" }, + { + "description": "message -> simple-message -> s simple-start pattern -> s simple-start-char pattern -> ...", + "src": "\n hello\t", + "exp": "\n hello\t" + }, { "src": "hello {$place}", "params": [ @@ -134,6 +139,11 @@ "src": ".input{$x}{{}}", "exp": "" }, + { + "description": "message -> complex-message -> s *(declaration [s]) complex-body s -> s declaration complex-body s -> s input-declaration complex-body s -> s input variable-expression complex-body s", + "src": "\t.input{$x}{{}}\n", + "exp": "" + }, { "description": "message -> complex-message -> *(declaration [s]) complex-body -> declaration declaration complex-body -> input-declaration input-declaration complex-body -> input variable-expression input variable-expression complex-body", "src": ".input{$x}.input{$y}{{}}", @@ -145,8 +155,8 @@ "exp": "" }, { - "description": "message -> complex-message -> *(declaration [s]) complex-body s -> complex-body s", - "src": "{{}} ", + "description": "message -> complex-message -> s *(declaration [s]) complex-body s -> s complex-body s", + "src": " {{}} ", "exp": "" }, { @@ -398,8 +408,7 @@ "source": "|42|", "value": "42" } - ], - "exp": "42" + ] }, { "description": "... literal -> quoted-literal -> \"|\" \"|\" ...", From 73f579b54c22d4b899b413b4edb048fd4fd79833 Mon Sep 17 00:00:00 2001 From: Eemeli Aro Date: Tue, 6 Aug 2024 18:04:11 +0300 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Addison Phillips --- spec/syntax.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/spec/syntax.md b/spec/syntax.md index 6359e9c92..36ef3eec6 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -156,7 +156,7 @@ A **_simple message_** contains a single _pattern_, with restrictions on its first non-whitespace character. An empty string is a valid _simple message_. -Whitespace at the start of a _simple message_ is significant, +Whitespace at the start or end of a _simple message_ is significant, and a part of the _text_ of the _message_. ```abnf @@ -172,7 +172,8 @@ and consists of: 1. an optional list of _declarations_, followed by 2. a _complex body_ -Whitespace at the start of a _complex message_ is ignored. +Whitespace at the start or end of a _complex message_ is not significant, +and does not affect the processing of the _message_. ```abnf complex-message = [s] *(declaration [s]) complex-body [s] @@ -306,7 +307,7 @@ MUST be escaped as `\\`, `\{`, and `\}` respectively. In the ABNF, _text_ is represented by non-empty sequences of `simple-start-char`, `text-char`, `escaped-char`, and `s`. -The first of these is used at the start of a _simple message_, +The production `simple-start-char` represents the first non-whitespace in a _simple message_ and matches `text-char` except for not allowing U+002E FULL STOP `.`. The ABNF uses `content-char` as a shared base for _text_ and _quoted literal_ characters. From 613171abe6bcc1d266f38ee5514adb26f288728f Mon Sep 17 00:00:00 2001 From: Eemeli Aro Date: Tue, 6 Aug 2024 18:14:30 +0300 Subject: [PATCH 3/3] Remove whitespace from simple-start-char --- spec/message.abnf | 2 +- spec/syntax.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index 1cd7a1fa5..3377275da 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -86,7 +86,7 @@ name-char = name-start / DIGIT / "-" / "." / %xB7 / %x300-36F / %x203F-2040 ; Restrictions on characters in various contexts -simple-start-char = content-char / s / "@" / "|" +simple-start-char = content-char / "@" / "|" text-char = content-char / s / "." / "@" / "|" quoted-char = content-char / s / "." / "@" / "{" / "}" reserved-char = content-char / "." diff --git a/spec/syntax.md b/spec/syntax.md index 36ef3eec6..0d5ec2657 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -315,7 +315,7 @@ Whitespace in _text_, including tabs, spaces, and newlines is significant and MU be preserved during formatting. ```abnf -simple-start-char = content-char / s / "@" / "|" +simple-start-char = content-char / "@" / "|" text-char = content-char / s / "." / "@" / "|" quoted-char = content-char / s / "." / "@" / "{" / "}" reserved-char = content-char / "."