From 7f9a4dea37d06be008516f3e6e6e701c02971bcd Mon Sep 17 00:00:00 2001 From: Eemeli Aro Date: Mon, 21 Oct 2024 19:45:38 +0300 Subject: [PATCH] Add u: options namespace (#846) * Move spec/registry.md -> spec/registry/default.md * Add Unicode Registry definition * Refer to BCP47, add note about only requiring normal tags * Call it a namespace * Apply suggestions from code review Co-authored-by: Addison Phillips * Fix test file reference Co-authored-by: Tim Chevalier * Apply suggestions from code review * Update spec/u-namespace.md Co-authored-by: Eemeli Aro * Apply suggestions from code review Co-authored-by: Addison Phillips * Apply suggestions from code review Co-authored-by: Addison Phillips * Add mention of functions to namespace description --------- Co-authored-by: Addison Phillips Co-authored-by: Tim Chevalier --- spec/README.md | 1 + spec/formatting.md | 23 +++++- spec/u-namespace.md | 87 +++++++++++++++++++++ test/README.md | 3 + test/schemas/v0/tests.schema.json | 3 + test/tests/u-options.json | 126 ++++++++++++++++++++++++++++++ 6 files changed, 240 insertions(+), 3 deletions(-) create mode 100644 spec/u-namespace.md create mode 100644 test/tests/u-options.json diff --git a/spec/README.md b/spec/README.md index 76cc998cf..c603282ca 100644 --- a/spec/README.md +++ b/spec/README.md @@ -17,6 +17,7 @@ 1. [Resolution Errors](errors.md#resolution-errors) 1. [Message Function Errors](errors.md#message-function-errors) 1. [Default Function Registry](registry.md) +1. [`u:` Namespace](u-namespace.md) 1. [Formatting](formatting.md) 1. [Interchange data model](data-model/README.md) diff --git a/spec/formatting.md b/spec/formatting.md index 27495fb01..f1a12cae0 100644 --- a/spec/formatting.md +++ b/spec/formatting.md @@ -260,9 +260,22 @@ the following steps are taken: 3. Perform _option resolution_. -4. Call the _function handler_ with the following arguments: +4. Determine the _function context_ for calling the _function handler_. - - The current _locale_. + The **_function context_** contains the context necessary for + the _function handler_ to resolve the _expression_. This includes: + + - The current _locale_, + potentially including a fallback chain of locales. + - The base directionality of the _message_ and its _text_ tokens. + + If the resolved mapping of _options_ includes any _`u:` options_ + supported by the implementation, process them as specified. + Such `u:` options MAY be removed from the resolved mapping of _options_. + +5. Call the function implementation with the following arguments: + + - The _function context_. - The resolved mapping of _options_. - If the _expression_ includes an _operand_, its _resolved value_. @@ -272,7 +285,7 @@ the following steps are taken: as long as reasonable precautions are taken to keep the function interface simple and minimal, and avoid introducing potential security vulnerabilities. -5. If the call succeeds, +6. If the call succeeds, resolve the value of the _expression_ as the result of that function call. If the call fails or does not return a valid value, @@ -345,6 +358,10 @@ The _resolved value_ of _markup_ includes the following fields: - The _identifier_ of the _markup_ - The resolved _options_ values after _option resolution_. +If the resolved mapping of _options_ includes any _`u:` options_ +supported by the implementation, process them as specified. +Such `u:` options MAY be removed from the resolved mapping of _options_. + The resolution of _markup_ MUST always succeed. ### Fallback Resolution diff --git a/spec/u-namespace.md b/spec/u-namespace.md new file mode 100644 index 000000000..dabbcc70f --- /dev/null +++ b/spec/u-namespace.md @@ -0,0 +1,87 @@ +# MessageFormat 2.0 Unicode Namespace + +The `u:` _namespace_ is reserved for the definition of _options_ +which affect the _function context_ of the specific _expressions_ +in which they appear, +or for the definition of _options_ that are universally applicable +rather than function-specific. +It might also be used to define _functions_ in a future release. + +The CLDR Technical Committee of the Unicode Consortium +manages the specification for this namespace, hence the name `u:`. + +## Options + +This section describes common **_`u:` options_** which each implementation SHOULD support +for all _functions_ and _markup_. + +### `u:id` + +A string value that is included as an `id` or other suitable value +in the formatted parts for the _placeholder_, +or any other structured formatted results. + +Ignored when formatting a message to a string. + +The value of the `u:id` _option_ MUST be a _literal_ or a +_variable_ whose _resolved value_ is either a string +or can be resolved to a string without error. +For other values, a _Bad Option_ error is emitted +and the `u:id` option is ignored. + +### `u:locale` + +Replaces the _locale_ defined in the _function context_ for this _expression_. + +A comma-delimited list consisting of +well-formed [BCP 47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) +language tags, +or an implementation-defined list of such tags. + +If this option is set on _markup_, a _Bad Option_ error is emitted +and the value of the `u:locale` option is ignored. + +During processing, the `u:locale` option +MUST be removed from the resolved mapping of _options_ +before calling the _function handler_. + +Values matching the following ABNF are always accepted: +```abnf +u-locale-option = unicode_bcp47_locale_id *(o "," o unicode_bcp47_locale_id) +``` +using `unicode_bcp47_locale_id` as defined for +[Unicode Locale Identifier](https://cldr-smoke.unicode.org/spec/main/ldml/tr35.html#unicode_bcp47_locale_id). + +Implementations MAY support additional language tags, +such as private-use or grandfathered tags, +or tags using `_` instead of `-` as a separator. +When the value of `u:locale` is set by a _variable_, +implementations MAY support non-string values otherwise representing locales. + +Implementations MAY emit a _Bad Option_ error +and MAY ignore the value of the `u:locale` _option_ as a whole +or any of the entries in the list of language tags. +This might be because the locale specified is not supported +or because the language tag is not well-formed, +not valid, or some other reason. + +### `u:dir` + +Replaces the base directionality defined in +the _function context_ for this _expression_. + +If this option is set on _markup_, a _Bad Option_ error is emitted +and the value of the `u:dir` option is ignored. + +During processing, the `u:dir` option +MUST be removed from the resolved mapping of _options_ +before calling the _function handler_. + +The value of the `u:dir` _option_ MUST be one of the following _literal_ values +or a _variable_ whose _resolved value_ is one of these _literals_: +- `ltr`: left-to-right directionality +- `rtl`: right-to-left directionality +- `auto`: directionality determined from _expression_ contents + +For other values, a _Bad Option_ error is emitted +and the value of the `u:dir` option is ignored. diff --git a/test/README.md b/test/README.md index 6fa50dfc5..d5cbee831 100644 --- a/test/README.md +++ b/test/README.md @@ -10,6 +10,8 @@ These test files are intended to be useful for testing multiple different messag - `data-model-errors.json` - Strings that should produce a Data Model Error when processed. Error names are defined in ["MessageFormat 2.0 Errors"](../spec/errors.md) in the spec. +- `u-options.json` — Test cases for the `u:` options, using built-in functions. + - `functions/` — Test cases that correspond to built-in functions. The behaviour of the built-in formatters is implementation-specific so the `exp` field is often omitted and assertions are made on error cases. @@ -21,6 +23,7 @@ Some examples of test harnesses using these tests, from the source repository: - [Formatting tests](https://github.com/messageformat/messageformat/blob/11c95dab2b25db8454e49ff4daadb817e1d5b770/packages/mf2-messageformat/src/messageformat.test.ts) A [JSON schema](./schemas/) is included for the test files in this repository. + ## Error Codes The following table relates the error names used in the [JSON schema](./schemas/) diff --git a/test/schemas/v0/tests.schema.json b/test/schemas/v0/tests.schema.json index a0dd0a56e..a37dcfa8d 100644 --- a/test/schemas/v0/tests.schema.json +++ b/test/schemas/v0/tests.schema.json @@ -269,6 +269,9 @@ "name": { "type": "string" }, + "id": { + "type": "string" + }, "options": { "type": "object" } diff --git a/test/tests/u-options.json b/test/tests/u-options.json new file mode 100644 index 000000000..3e13b30a2 --- /dev/null +++ b/test/tests/u-options.json @@ -0,0 +1,126 @@ +{ + "$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json", + "scenario": "u: Options", + "description": "Common options affecting the function context", + "defaultTestProperties": { + "locale": "en-US" + }, + "tests": [ + { + "src": "{#tag u:id=x}content{/ns:tag u:id=x}", + "exp": "content", + "expParts": [ + { + "type": "markup", + "kind": "open", + "id": "x", + "name": "tag" + }, + { + "type": "literal", + "value": "content" + }, + { + "type": "markup", + "kind": "close", + "id": "x", + "name": "tag" + } + ] + }, + { + "src": "{#tag u:dir=rtl u:locale=ar}content{/ns:tag}", + "exp": "content", + "expErrors": [{ "type": "bad-option" }, { "type": "bad-option" }], + "expParts": [ + { + "type": "markup", + "kind": "open", + "name": "tag" + }, + { + "type": "literal", + "value": "content" + }, + { + "type": "markup", + "kind": "close", + "name": "tag" + } + ] + }, + { + "src": "hello {4.2 :number u:locale=fr}", + "exp": "hello 4,2" + }, + { + "src": "hello {world :string u:dir=ltr u:id=foo}", + "exp": "hello world", + "expParts": [ + { + "type": "literal", + "value": "hello " + }, + { + "type": "string", + "source": "|world|", + "dir": "ltr", + "id": "foo", + "value": "world" + } + ] + }, + { + "src": "hello {world :string u:dir=rtl}", + "exp": "hello \u2067world\u2069", + "expParts": [ + { + "type": "literal", + "value": "hello " + }, + { + "type": "string", + "source": "|world|", + "dir": "rtl", + "value": "world" + } + ] + }, + { + "src": "hello {world :string u:dir=auto}", + "exp": "hello \u2068world\u2069", + "expParts": [ + { + "type": "literal", + "value": "hello " + }, + { + "type": "string", + "source": "|world|", + "dir": "auto", + "value": "world" + } + ] + }, + { + "locale": "ar", + "src": "أهلاً {بالعالم :string u:dir=rtl}", + "exp": "أهلاً \u2067بالعالم\u2069" + }, + { + "locale": "ar", + "src": "أهلاً {بالعالم :string u:dir=auto}", + "exp": "أهلاً \u2068بالعالم\u2069" + }, + { + "locale": "ar", + "src": "أهلاً {world :string u:dir=ltr}", + "exp": "أهلاً \u2066world\u2069" + }, + { + "locale": "ar", + "src": "أهلاً {بالعالم :string}", + "exp": "أهلاً \u2067بالعالم\u2069" + } + ] +}