From 78f15948a20a708a7016a294ce51d15803b208c0 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Thu, 19 Sep 2024 15:22:14 -0700 Subject: [PATCH] ICU-22902 Remove support for Unsupported, Private & Reserved constructs Matching PR #883 in the message-format-wg repo. Also move spec tests for unsupported statements and expressions into new files to serve as syntax error tests. --- icu4c/source/common/unicode/utypes.h | 4 +- icu4c/source/common/utypes.cpp | 2 - icu4c/source/i18n/messageformat2.cpp | 30 - icu4c/source/i18n/messageformat2_checker.cpp | 22 +- .../source/i18n/messageformat2_data_model.cpp | 268 +-------- icu4c/source/i18n/messageformat2_errors.cpp | 19 - icu4c/source/i18n/messageformat2_errors.h | 3 - icu4c/source/i18n/messageformat2_parser.cpp | 341 +----------- icu4c/source/i18n/messageformat2_parser.h | 3 - .../source/i18n/messageformat2_serializer.cpp | 66 +-- icu4c/source/i18n/messageformat2_serializer.h | 2 - .../i18n/unicode/messageformat2_data_model.h | 524 +----------------- .../intltest/messageformat2test_read_json.cpp | 17 +- .../ibm/icu/dev/test/message2/CoreTest.java | 2 + testdata/message2/more-data-model-errors.json | 28 - testdata/message2/resolution-errors.json | 9 +- testdata/message2/spec/syntax.json | 271 --------- .../message2/spec/unsupported-statements.json | 18 - .../{spec => }/unsupported-expressions.json | 30 +- testdata/message2/unsupported-statements.json | 27 + 20 files changed, 109 insertions(+), 1577 deletions(-) delete mode 100644 testdata/message2/spec/unsupported-statements.json rename testdata/message2/{spec => }/unsupported-expressions.json (67%) create mode 100644 testdata/message2/unsupported-statements.json diff --git a/icu4c/source/common/unicode/utypes.h b/icu4c/source/common/unicode/utypes.h index 87fa5ddf7e87..0151ebd47015 100644 --- a/icu4c/source/common/unicode/utypes.h +++ b/icu4c/source/common/unicode/utypes.h @@ -597,15 +597,13 @@ typedef enum UErrorCode { U_MF_MISSING_SELECTOR_ANNOTATION_ERROR, /**< A selector expression evaluates to an unannotated operand. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */ U_MF_DUPLICATE_DECLARATION_ERROR, /**< The same variable is declared in more than one .local or .input declaration. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */ U_MF_OPERAND_MISMATCH_ERROR, /**< An operand provided to a function does not have the required form for that function @internal ICU 75 technology preview @deprecated This API is for technology preview only. */ - U_MF_UNSUPPORTED_STATEMENT_ERROR, /**< A message includes a reserved statement. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */ - U_MF_UNSUPPORTED_EXPRESSION_ERROR, /**< A message includes syntax reserved for future standardization or private implementation use. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */ U_MF_DUPLICATE_VARIANT_ERROR, /**< A message includes a variant with the same key list as another variant. @internal ICU 76 technology preview @deprecated This API is for technology preview only. */ #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal formatting API error code. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - U_FMT_PARSE_ERROR_LIMIT = 0x10122, + U_FMT_PARSE_ERROR_LIMIT = 0x10120, #endif // U_HIDE_DEPRECATED_API /* diff --git a/icu4c/source/common/utypes.cpp b/icu4c/source/common/utypes.cpp index 2f449ffea033..4602314147f1 100644 --- a/icu4c/source/common/utypes.cpp +++ b/icu4c/source/common/utypes.cpp @@ -140,8 +140,6 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = { "U_MF_MISSING_SELECTOR_ANNOTATION_ERROR", "U_MF_DUPLICATE_DECLARATION_ERROR", "U_MF_OPERAND_MISMATCH_ERROR", - "U_MF_UNSUPPORTED_STATEMENT_ERROR", - "U_MF_UNSUPPORTED_EXPRESSION_ERROR", "U_MF_DUPLICATE_VARIANT_ERROR" }; diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index 2febdf0ade15..73f7fa45e69f 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -241,24 +241,6 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O return FormattedPlaceholder(fallback); } -// Per https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution -static UnicodeString reservedFallback (const Expression& e) { - UErrorCode localErrorCode = U_ZERO_ERROR; - const Operator* rator = e.getOperator(localErrorCode); - U_ASSERT(U_SUCCESS(localErrorCode)); - const Reserved& r = rator->asReserved(); - - // An empty Reserved isn't representable in the syntax - U_ASSERT(r.numParts() > 0); - - const UnicodeString& contents = r.getPart(0).unquoted(); - // Parts should never be empty - U_ASSERT(contents.length() > 0); - - // Return first character of string - return UnicodeString(contents, 0, 1); -} - // Formats an expression using `globalEnv` for the values of variables [[nodiscard]] FormattedPlaceholder MessageFormatter::formatExpression(const Environment& globalEnv, const Expression& expr, @@ -268,12 +250,6 @@ static UnicodeString reservedFallback (const Expression& e) { return {}; } - // Formatting error - if (expr.isReserved()) { - context.getErrors().setReservedError(status); - return FormattedPlaceholder(reservedFallback(expr)); - } - const Operand& rand = expr.getOperand(); // Format the operand (formatOperand handles the case of a null operand) FormattedPlaceholder randVal = formatOperand(globalEnv, rand, context, status); @@ -675,12 +651,6 @@ ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, return {}; } - // A `reserved` is an error - if (expr.isReserved()) { - context.getErrors().setReservedError(status); - return ResolvedSelector(FormattedPlaceholder(reservedFallback(expr))); - } - // Function call -- resolve the operand and options if (expr.isFunctionCall()) { const Operator* rator = expr.getOperator(status); diff --git a/icu4c/source/i18n/messageformat2_checker.cpp b/icu4c/source/i18n/messageformat2_checker.cpp index 824b798410d5..bdc5c383b6e8 100644 --- a/icu4c/source/i18n/messageformat2_checker.cpp +++ b/icu4c/source/i18n/messageformat2_checker.cpp @@ -136,9 +136,7 @@ void Checker::addFreeVars(TypeEnvironment& t, const OptionMap& opts, UErrorCode& void Checker::addFreeVars(TypeEnvironment& t, const Operator& rator, UErrorCode& status) { CHECK_ERROR(status); - if (!rator.isReserved()) { - addFreeVars(t, rator.getOptionsInternal(), status); - } + addFreeVars(t, rator.getOptionsInternal(), status); } void Checker::addFreeVars(TypeEnvironment& t, const Expression& rhs, UErrorCode& status) { @@ -213,12 +211,10 @@ void Checker::requireAnnotated(const TypeEnvironment& t, const Expression& selec if (selectorExpr.isFunctionCall()) { return; // No error } - if (!selectorExpr.isReserved()) { - const Operand& rand = selectorExpr.getOperand(); - if (rand.isVariable()) { - if (t.get(rand.asVariable()) == TypeEnvironment::Type::Annotated) { - return; // No error - } + const Operand& rand = selectorExpr.getOperand(); + if (rand.isVariable()) { + if (t.get(rand.asVariable()) == TypeEnvironment::Type::Annotated) { + return; // No error } } // If this code is reached, an error was detected @@ -240,9 +236,6 @@ TypeEnvironment::Type typeOf(TypeEnvironment& t, const Expression& expr) { if (expr.isFunctionCall()) { return TypeEnvironment::Type::Annotated; } - if (expr.isReserved()) { - return TypeEnvironment::Type::Unannotated; - } const Operand& rand = expr.getOperand(); U_ASSERT(!rand.isNull()); if (rand.isLiteral()) { @@ -296,11 +289,6 @@ void Checker::checkDeclarations(TypeEnvironment& t, UErrorCode& status) { // Next, extend the type environment with a binding from lhs to its type t.extend(lhs, typeOf(t, rhs), status); } - - // Check for unsupported statements - if (dataModel.unsupportedStatementsLen > 0) { - errors.addError(StaticErrorType::UnsupportedStatementError, status); - } } void Checker::check(UErrorCode& status) { diff --git a/icu4c/source/i18n/messageformat2_data_model.cpp b/icu4c/source/i18n/messageformat2_data_model.cpp index 523ff1b39384..3fe5f65b5323 100644 --- a/icu4c/source/i18n/messageformat2_data_model.cpp +++ b/icu4c/source/i18n/messageformat2_data_model.cpp @@ -199,77 +199,6 @@ const Literal& Key::asLiteral() const { Key::~Key() {} -// ------------ Reserved - -// Copy constructor -Reserved::Reserved(const Reserved& other) : len(other.len) { - U_ASSERT(!other.bogus); - - UErrorCode localErrorCode = U_ZERO_ERROR; - if (len == 0) { - parts.adoptInstead(nullptr); - } else { - parts.adoptInstead(copyArray(other.parts.getAlias(), len, localErrorCode)); - } - if (U_FAILURE(localErrorCode)) { - bogus = true; - } -} - -Reserved& Reserved::operator=(Reserved other) noexcept { - swap(*this, other); - return *this; -} - -Reserved::Reserved(const UVector& ps, UErrorCode& status) noexcept : len(ps.size()) { - if (U_FAILURE(status)) { - return; - } - parts = LocalArray(copyVectorToArray(ps, status)); -} - -int32_t Reserved::numParts() const { - U_ASSERT(!bogus); - return len; -} - -const Literal& Reserved::getPart(int32_t i) const { - U_ASSERT(!bogus); - U_ASSERT(i < numParts()); - return parts[i]; -} - -Reserved::Builder::Builder(UErrorCode& status) { - parts = createUVector(status); -} - -Reserved Reserved::Builder::build(UErrorCode& status) const noexcept { - if (U_FAILURE(status)) { - return {}; - } - U_ASSERT(parts != nullptr); - return Reserved(*parts, status); -} - -Reserved::Builder& Reserved::Builder::add(Literal&& part, UErrorCode& status) noexcept { - U_ASSERT(parts != nullptr); - if (U_SUCCESS(status)) { - Literal* l = create(std::move(part), status); - parts->adoptElement(l, status); - } - return *this; -} - -Reserved::Builder::~Builder() { - if (parts != nullptr) { - delete parts; - } -} - -Reserved::~Reserved() { - len = 0; -} - //------------------------ Operator OptionMap::OptionMap(const UVector& opts, UErrorCode& status) : len(opts.size()) { @@ -379,14 +308,8 @@ OptionMap::Builder::~Builder() { } } -const Reserved& Operator::asReserved() const { - U_ASSERT(isReserved()); - return *(std::get_if(&contents)); -} - const OptionMap& Operator::getOptionsInternal() const { - U_ASSERT(!isReserved()); - return std::get_if(&contents)->getOptions(); + return options; } Option::Option(const Option& other): name(other.name), rand(other.rand) {} @@ -400,62 +323,28 @@ Option::~Option() {} Operator::Builder::Builder(UErrorCode& status) : options(OptionMap::Builder(status)) {} -Operator::Builder& Operator::Builder::setReserved(Reserved&& reserved) { - isReservedSequence = true; - hasFunctionName = false; - hasOptions = false; - asReserved = std::move(reserved); - return *this; -} - Operator::Builder& Operator::Builder::setFunctionName(FunctionName&& func) { - isReservedSequence = false; - hasFunctionName = true; functionName = std::move(func); return *this; } const FunctionName& Operator::getFunctionName() const { - U_ASSERT(!isReserved()); - return std::get_if(&contents)->getName(); + return name; } Operator::Builder& Operator::Builder::addOption(const UnicodeString &key, Operand&& value, UErrorCode& errorCode) noexcept { THIS_ON_ERROR(errorCode); - isReservedSequence = false; - hasOptions = true; options.add(Option(key, std::move(value)), errorCode); return *this; } Operator Operator::Builder::build(UErrorCode& errorCode) { - Operator result; - if (U_FAILURE(errorCode)) { - return result; - } - // Must be either reserved or function, not both; enforced by methods - if (isReservedSequence) { - // Methods enforce that the function name and options are unset - // if `setReserved()` is called, so if they were valid, that - // would indicate a bug. - U_ASSERT(!hasOptions && !hasFunctionName); - result = Operator(asReserved); - } else { - if (!hasFunctionName) { - // Neither function name nor reserved was set - // There is no default, so this case could occur if the - // caller creates a builder and doesn't make any calls - // before calling build(). - errorCode = U_INVALID_STATE_ERROR; - return result; - } - result = Operator(functionName, options.build(errorCode)); - } - return result; + return Operator(functionName, options.build(errorCode)); } -Operator::Operator(const Operator& other) noexcept : contents(other.contents) {} +Operator::Operator(const Operator& other) noexcept + : name(other.name), options(other.options) {} Operator& Operator::operator=(Operator other) noexcept { swap(*this, other); @@ -463,23 +352,13 @@ Operator& Operator::operator=(Operator other) noexcept { } // Function call -Operator::Operator(const FunctionName& f, const UVector& optsVector, UErrorCode& status) : contents(Callable(f, OptionMap(optsVector, status))) {} -Operator::Operator(const FunctionName& f, const OptionMap& opts) : contents(Callable(f, opts)) {} +Operator::Operator(const FunctionName& f, const OptionMap& opts) : name(f), options(opts) {} Operator::Builder::~Builder() {} Operator::~Operator() {} -Callable& Callable::operator=(Callable other) noexcept { - swap(*this, other); - return *this; -} - -Callable::Callable(const Callable& other) : name(other.name), options(other.options) {} - -Callable::~Callable() {} - // ------------ Markup Markup::Builder::Builder(UErrorCode& status) @@ -538,19 +417,14 @@ UBool Expression::isStandaloneAnnotation() const { // Returns true for function calls with operands as well as // standalone annotations. -// Reserved sequences are not function calls UBool Expression::isFunctionCall() const { - return (rator.has_value() && !rator->isReserved()); -} - -UBool Expression::isReserved() const { - return (rator.has_value() && rator->isReserved()); + return rator.has_value(); } const Operator* Expression::getOperator(UErrorCode& status) const { NULL_ON_ERROR(status); - if (!(isReserved() || isFunctionCall())) { + if (!isFunctionCall()) { status = U_INVALID_STATE_ERROR; return nullptr; } @@ -617,92 +491,6 @@ Expression::Builder::~Builder() {} Expression::~Expression() {} -// ----------- UnsupportedStatement - -UnsupportedStatement::Builder::Builder(UErrorCode& status) { - expressions = createUVector(status); -} - -UnsupportedStatement::Builder& UnsupportedStatement::Builder::setKeyword(const UnicodeString& k) { - keyword = k; - return *this; -} - -UnsupportedStatement::Builder& UnsupportedStatement::Builder::setBody(Reserved&& r) { - body.emplace(r); - return *this; -} - -UnsupportedStatement::Builder& UnsupportedStatement::Builder::addExpression(Expression&& e, UErrorCode& status) { - U_ASSERT(expressions != nullptr); - if (U_SUCCESS(status)) { - Expression* expr = create(std::move(e), status); - expressions->adoptElement(expr, status); - } - return *this; -} - -UnsupportedStatement UnsupportedStatement::Builder::build(UErrorCode& status) const { - if (U_SUCCESS(status)) { - U_ASSERT(expressions != nullptr); - if (keyword.length() <= 0) { - status = U_ILLEGAL_ARGUMENT_ERROR; - } else if (expressions->size() < 1) { - status = U_ILLEGAL_ARGUMENT_ERROR; - } else { - return UnsupportedStatement(keyword, body, *expressions, status); - } - } - return {}; -} - -const Reserved* UnsupportedStatement::getBody(UErrorCode& errorCode) const { - if (U_SUCCESS(errorCode)) { - if (body.has_value()) { - return &(*body); - } - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - } - return nullptr; -} - -UnsupportedStatement::UnsupportedStatement(const UnicodeString& k, - const std::optional& r, - const UVector& es, - UErrorCode& status) - : keyword(k), body(r), expressionsLen(es.size()) { - CHECK_ERROR(status); - - U_ASSERT(expressionsLen >= 1); - Expression* result = copyVectorToArray(es, status); - CHECK_ERROR(status); - expressions.adoptInstead(result); -} - -UnsupportedStatement::UnsupportedStatement(const UnsupportedStatement& other) { - keyword = other.keyword; - body = other.body; - expressionsLen = other.expressionsLen; - U_ASSERT(expressionsLen > 0); - UErrorCode localErrorCode = U_ZERO_ERROR; - expressions.adoptInstead(copyArray(other.expressions.getAlias(), expressionsLen, localErrorCode)); - if (U_FAILURE(localErrorCode)) { - expressionsLen = 0; - } -} - -UnsupportedStatement& UnsupportedStatement::operator=(UnsupportedStatement other) noexcept { - swap(*this, other); - return *this; -} - -UnsupportedStatement::Builder::~Builder() { - if (expressions != nullptr) { - delete expressions; - } -} - -UnsupportedStatement::~UnsupportedStatement() {} // ----------- PatternPart // PatternPart needs a copy constructor in order to make Pattern deeply copyable @@ -844,7 +632,7 @@ const Expression& Binding::getValue() const { if (hasOperator) { rator = b.getValue().getOperator(errorCode); U_ASSERT(U_SUCCESS(errorCode)); - b.annotation = &rator->contents; + b.annotation = rator; } else { b.annotation = nullptr; } @@ -856,18 +644,17 @@ const Expression& Binding::getValue() const { const OptionMap& Binding::getOptionsInternal() const { U_ASSERT(annotation != nullptr); - U_ASSERT(std::holds_alternative(*annotation)); - return std::get_if(annotation)->getOptions(); + return annotation->getOptionsInternal(); } void Binding::updateAnnotation() { UErrorCode localErrorCode = U_ZERO_ERROR; const Operator* rator = expr.getOperator(localErrorCode); - if (U_FAILURE(localErrorCode) || rator->isReserved()) { + if (U_FAILURE(localErrorCode)) { return; } - U_ASSERT(U_SUCCESS(localErrorCode) && !rator->isReserved()); - annotation = &rator->contents; + U_ASSERT(U_SUCCESS(localErrorCode)); + annotation = rator; } Binding::Binding(const Binding& other) : var(other.var), expr(other.expr), local(other.local) { @@ -949,17 +736,8 @@ const Variant* MFDataModel::getVariantsInternal() const { return std::get_if(&body)->variants.getAlias(); } -// Returns nullptr if no unsupported statements -const UnsupportedStatement* MFDataModel::getUnsupportedStatementsInternal() const { - U_ASSERT(!bogus); - U_ASSERT(unsupportedStatementsLen == 0 || unsupportedStatements != nullptr); - return unsupportedStatements.getAlias(); -} - - MFDataModel::Builder::Builder(UErrorCode& status) { bindings = createUVector(status); - unsupportedStatements = createUVector(status); } // Invalidate pattern and create selectors/variants if necessary @@ -1008,14 +786,6 @@ MFDataModel::Builder& MFDataModel::Builder::addBinding(Binding&& b, UErrorCode& return *this; } -MFDataModel::Builder& MFDataModel::Builder::addUnsupportedStatement(UnsupportedStatement&& s, UErrorCode& status) { - if (U_SUCCESS(status)) { - U_ASSERT(unsupportedStatements != nullptr); - unsupportedStatements->adoptElement(create(std::move(s), status), status); - } - return *this; -} - /* selector must be non-null */ @@ -1077,10 +847,6 @@ MFDataModel::MFDataModel(const MFDataModel& other) : body(Pattern()) { if (bindingsLen > 0) { bindings.adoptInstead(copyArray(other.bindings.getAlias(), bindingsLen, localErrorCode)); } - unsupportedStatementsLen = other.unsupportedStatementsLen; - if (unsupportedStatementsLen > 0) { - unsupportedStatements.adoptInstead(copyArray(other.unsupportedStatements.getAlias(), unsupportedStatementsLen, localErrorCode)); - } if (U_FAILURE(localErrorCode)) { bogus = true; } @@ -1110,11 +876,6 @@ MFDataModel::MFDataModel(const MFDataModel::Builder& builder, UErrorCode& errorC if (bindingsLen > 0) { bindings.adoptInstead(copyVectorToArray(*builder.bindings, errorCode)); } - unsupportedStatementsLen = builder.unsupportedStatements->size(); - if (unsupportedStatementsLen > 0) { - unsupportedStatements.adoptInstead(copyVectorToArray(*builder.unsupportedStatements, - errorCode)); - } if (U_FAILURE(errorCode)) { bogus = true; } @@ -1149,9 +910,6 @@ MFDataModel::Builder::~Builder() { if (bindings != nullptr) { delete bindings; } - if (unsupportedStatements != nullptr) { - delete unsupportedStatements; - } } } // namespace message2 diff --git a/icu4c/source/i18n/messageformat2_errors.cpp b/icu4c/source/i18n/messageformat2_errors.cpp index 32990a670b11..9d1d6bab81a1 100644 --- a/icu4c/source/i18n/messageformat2_errors.cpp +++ b/icu4c/source/i18n/messageformat2_errors.cpp @@ -19,10 +19,6 @@ namespace message2 { // Errors // ----------- - void DynamicErrors::setReservedError(UErrorCode& status) { - addError(DynamicError(DynamicErrorType::ReservedError), status); - } - void DynamicErrors::setFormattingError(const FunctionName& formatterName, UErrorCode& status) { addError(DynamicError(DynamicErrorType::FormattingError, formatterName), status); } @@ -143,10 +139,6 @@ namespace message2 { status = U_MF_OPERAND_MISMATCH_ERROR; break; } - case DynamicErrorType::ReservedError: { - status = U_MF_UNSUPPORTED_EXPRESSION_ERROR; - break; - } case DynamicErrorType::SelectorError: { status = U_MF_SELECTOR_ERROR; break; @@ -196,10 +188,6 @@ namespace message2 { dataModelError = true; break; } - case StaticErrorType::UnsupportedStatementError: { - dataModelError = true; - break; - } } syntaxAndDataModelErrors->adoptElement(errorP, status); } @@ -228,10 +216,6 @@ namespace message2 { resolutionAndFormattingErrors->adoptElement(errorP, status); break; } - case DynamicErrorType::ReservedError: { - resolutionAndFormattingErrors->adoptElement(errorP, status); - break; - } case DynamicErrorType::SelectorError: { selectorError = true; resolutionAndFormattingErrors->adoptElement(errorP, status); @@ -279,9 +263,6 @@ namespace message2 { status = U_MF_SYNTAX_ERROR; break; } - case StaticErrorType::UnsupportedStatementError: { - status = U_MF_UNSUPPORTED_STATEMENT_ERROR; - } } } } diff --git a/icu4c/source/i18n/messageformat2_errors.h b/icu4c/source/i18n/messageformat2_errors.h index d1766af184d1..f84aa7362837 100644 --- a/icu4c/source/i18n/messageformat2_errors.h +++ b/icu4c/source/i18n/messageformat2_errors.h @@ -58,7 +58,6 @@ namespace message2 { MissingSelectorAnnotation, NonexhaustivePattern, SyntaxError, - UnsupportedStatementError, VariantKeyMismatchError }; @@ -66,7 +65,6 @@ namespace message2 { UnresolvedVariable, FormattingError, OperandMismatchError, - ReservedError, SelectorError, UnknownFunction, }; @@ -123,7 +121,6 @@ namespace message2 { int32_t count() const; void setSelectorError(const FunctionName&, UErrorCode&); - void setReservedError(UErrorCode&); void setUnresolvedVariable(const VariableName&, UErrorCode&); void setUnknownFunction(const FunctionName&, UErrorCode&); void setFormattingError(const FunctionName&, UErrorCode&); diff --git a/icu4c/source/i18n/messageformat2_parser.cpp b/icu4c/source/i18n/messageformat2_parser.cpp index ad11324a964d..b4768756c5ea 100644 --- a/icu4c/source/i18n/messageformat2_parser.cpp +++ b/icu4c/source/i18n/messageformat2_parser.cpp @@ -104,8 +104,6 @@ static bool inRange(UChar32 c, UChar32 first, UChar32 last) { `isContentChar()` : `content-char` `isTextChar()` : `text-char` - `isReservedStart()` : `reserved-start` - `isReservedChar()` : `reserved-char` `isAlpha()` : `ALPHA` `isDigit()` : `DIGIT` `isNameStart()` : `name-start` @@ -149,35 +147,6 @@ static bool isTextChar(UChar32 c) { || c == PIPE; } -// Note: this doesn't distinguish between private-use -// and reserved, since the data model doesn't -static bool isReservedStart(UChar32 c) { - switch (c) { - case BANG: - case PERCENT: - case ASTERISK: - case PLUS: - case LESS_THAN: - case GREATER_THAN: - case QUESTION: - case TILDE: - // Private-use - case CARET: - case AMPERSAND: - return true; - default: - return false; - } -} - -static bool isReservedChar(UChar32 c) { - return isContentChar(c) || c == PERIOD; -} - -static bool isReservedBodyStart(UChar32 c) { - return isReservedChar(c) || c == BACKSLASH || c == PIPE; -} - static bool isAlpha(UChar32 c) { return inRange(c, 0x0041, 0x005A) || inRange(c, 0x0061, 0x007A); } static bool isDigit(UChar32 c) { return inRange(c, 0x0030, 0x0039); } @@ -230,24 +199,7 @@ static bool isFunctionStart(UChar32 c) { // Returns true iff `c` can begin an `annotation` nonterminal static bool isAnnotationStart(UChar32 c) { - return isFunctionStart(c) || isReservedStart(c); -} - -// Returns true iff `c` can begin either a `reserved-char` or `reserved-escape` -// literal -static bool reservedChunkFollows(UChar32 c) { - switch(c) { - // reserved-escape - case BACKSLASH: - // literal - case PIPE: { - return true; - } - default: { - // reserved-char - return (isReservedChar(c)); - } - } + return isFunctionStart(c); } // Returns true iff `c` can begin a `literal` nonterminal @@ -1121,189 +1073,7 @@ Arbitrary lookahead is required to parse attribute lists, similarly to option li } /* - Consumes a non-empty sequence of reserved-chars, reserved-escapes, and - literals (as in 1*(reserved-char / reserved-escape / literal) in the `reserved-body` rule) - - Appends it to `str` -*/ -void Parser::parseReservedChunk(Reserved::Builder& result, UErrorCode& status) { - CHECK_ERROR(status); - - bool empty = true; - UnicodeString chunk; - while(reservedChunkFollows(peek())) { - empty = false; - // reserved-char - if (isReservedChar(peek())) { - chunk += peek(); - normalizedInput += peek(); - // consume the char - next(); - // Restore precondition - CHECK_BOUNDS(status); - continue; - } - - if (chunk.length() > 0) { - result.add(Literal(false, chunk), status); - chunk.setTo(u"", 0); - } - - if (peek() == BACKSLASH) { - // reserved-escape - result.add(Literal(false, parseEscapeSequence(status)), status); - chunk.setTo(u"", 0); - } else if (peek() == PIPE || isUnquotedStart(peek())) { - result.add(parseLiteral(status), status); - } else { - // The reserved chunk ends here - break; - } - - CHECK_ERROR(status); // Avoid looping infinitely - } - - // Add the last chunk if necessary - if (chunk.length() > 0) { - result.add(Literal(false, chunk), status); - } - - if (empty) { - ERROR(status); - } -} - -/* - Consume a `reserved-start` character followed by a possibly-empty sequence - of non-empty sequences of reserved characters, separated by whitespace. - Matches the `reserved` nonterminal in the grammar - -*/ -Reserved Parser::parseReserved(UErrorCode& status) { - Reserved::Builder builder(status); - - if (U_FAILURE(status)) { - return {}; - } - - U_ASSERT(inBounds()); - - // Require a `reservedStart` character - if (!isReservedStart(peek())) { - ERROR(status); - return Reserved(); - } - - // Add the start char as a separate text chunk - UnicodeString firstCharString(peek()); - builder.add(Literal(false, firstCharString), status); - if (U_FAILURE(status)) { - return {}; - } - // Consume reservedStart - normalizedInput += peek(); - next(); - return parseReservedBody(builder, status); -} - -Reserved Parser::parseReservedBody(Reserved::Builder& builder, UErrorCode& status) { - if (U_FAILURE(status)) { - return {}; - } - -/* - Arbitrary lookahead is required to parse a `reserved`, for similar reasons - to why it's required for parsing function annotations. - - In the grammar: - - annotation = (function *(s option)) / reserved - expression = "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}" - reserved = reserved-start reserved-body - reserved-body = *( [s] 1*(reserved-char / reserved-escape / literal)) - - When reading a whitespace character, it's ambiguous whether it's the optional - whitespace in this rule, or the optional whitespace that precedes a '}' in an - expression. - - The ambiguity is resolved using the same grammar refactoring as shown in - the comment in `parseOptions()`. -*/ - // Consume reserved characters / literals / reserved escapes - // until a character that can't be in a `reserved-body` is seen - while (true) { - /* - First, if there is whitespace, it means either a chunk follows it, - or this is the trailing whitespace before the '}' that terminates an - expression. - - Next, if the next character can start a reserved-char, reserved-escape, - or literal, then parse a "chunk" of reserved things. - In any other case, we exit successfully, since per the refactored - grammar rule: - annotation = (function *(s option) [s]) / (reserved [s]) - it's valid to consume whitespace after a `reserved`. - (`parseExpression()` is responsible for checking that the next - character is in fact a '}'.) - */ - if (!inBounds()) { - break; - } - int32_t numWhitespaceChars = 0; - int32_t savedIndex = index; - if (isWhitespace(peek())) { - parseOptionalWhitespace(status); - numWhitespaceChars = index - savedIndex; - // Restore precondition - if (!inBounds()) { - break; - } - } - - if (reservedChunkFollows(peek())) { - parseReservedChunk(builder, status); - - // Avoid looping infinitely - if (U_FAILURE(status) || !inBounds()) { - break; - } - } else { - if (numWhitespaceChars > 0) { - if (peek() == LEFT_CURLY_BRACE) { - // Resolve even more ambiguity (space preceding another piece of - // a `reserved-body`, vs. space preceding an expression in `reserved-statement` - // "Backtrack" - index -= numWhitespaceChars; - break; - } - if (peek() == RIGHT_CURLY_BRACE) { - // Not an error: just means there's no trailing whitespace - // after this `reserved` - break; - } - if (peek() == AT) { - // Not an error, but we have to "backtrack" due to the ambiguity - // between an `s` preceding another reserved chunk - // and an `s` preceding an attribute list - index -= numWhitespaceChars; - break; - } - // Error: if there's whitespace, it must either be followed - // by a non-empty sequence or by '}' - ERROR(status); - break; - } - // If there was no whitespace, it's not an error, - // just the end of the reserved string - break; - } - } - - return builder.build(status); -} - -/* - Consume a function call or reserved string, matching the `annotation` + Consume a function call, matching the `annotation` nonterminal in the grammar Returns an `Operator` representing this (a reserved is a parse error) @@ -1323,17 +1093,9 @@ Operator Parser::parseAnnotation(UErrorCode& status) { // Consume the options (which may be empty) parseOptions(addOptions, status); } else { - // Must be reserved - // A reserved sequence is not a parse error, but might be a formatting error - Reserved rator = parseReserved(status); - ratorBuilder.setReserved(std::move(rator)); + ERROR(status); } - UErrorCode localStatus = U_ZERO_ERROR; - Operator result = ratorBuilder.build(localStatus); - // Either `setReserved` or `setFunctionName` was called, - // so there shouldn't be an error. - U_ASSERT(U_SUCCESS(localStatus)); - return result; + return ratorBuilder.build(status); } /* @@ -1613,95 +1375,6 @@ void Parser::parseInputDeclaration(UErrorCode& status) { } } -/* - Parses a `reserved-statement` per the grammar - */ -void Parser::parseUnsupportedStatement(UErrorCode& status) { - U_ASSERT(inBounds() && peek() == PERIOD); - - UnsupportedStatement::Builder builder(status); - CHECK_ERROR(status); - - // Parse the keyword - UnicodeString keyword(PERIOD); - normalizedInput += UnicodeString(PERIOD); - next(); - keyword += parseName(status); - builder.setKeyword(keyword); - - // Parse the body, which is optional - // Lookahead is required to distinguish the `s` in reserved-body - // from the `s` in `[s] expression` - // Next character may be: - // * whitespace (followed by either a reserved-body start or - // a '{') - // * a '{' - - CHECK_BOUNDS(status); - - if (peek() != LEFT_CURLY_BRACE) { - if (!isWhitespace(peek())) { - ERROR(status); - return; - } - // Expect a reserved-body start - int32_t savedIndex = index; - parseRequiredWhitespace(status); - CHECK_BOUNDS(status); - if (isReservedBodyStart(peek())) { - // There is a reserved body - Reserved::Builder r(status); - builder.setBody(parseReservedBody(r, status)); - } else { - // No body -- backtrack so we can parse 1*([s] expression) - index = savedIndex; - normalizedInput.truncate(normalizedInput.length() - 1); - } - // Otherwise, the next character must be a '{' - // to open the required expression (or optional whitespace) - if (peek() != LEFT_CURLY_BRACE && !isWhitespace(peek())) { - ERROR(status); - return; - } - } - - // Finally, parse the expressions - - // Need to look ahead to disambiguate a '{' beginning - // an expression from one beginning with a quoted pattern - int32_t expressionCount = 0; - while (peek() == LEFT_CURLY_BRACE || isWhitespace(peek())) { - parseOptionalWhitespace(status); - - bool nextIsLbrace = peek() == LEFT_CURLY_BRACE; - bool nextIsQuotedPattern = nextIsLbrace && inBounds(1) - && peek(1) == LEFT_CURLY_BRACE; - if (nextIsQuotedPattern) { - break; - } - - builder.addExpression(parseExpression(status), status); - expressionCount++; - } - if (expressionCount <= 0) { - // At least one expression is required - ERROR(status); - return; - } - dataModel.addUnsupportedStatement(builder.build(status), status); -} - -// Terrible hack to get around the ambiguity between unsupported keywords -// and supported keywords -bool Parser::nextIs(const std::u16string_view &keyword) const { - for (int32_t i = 0; i < static_cast(keyword.length()); i++) { - if (!inBounds(i) || peek(i) != keyword[i]) { - return false; - } - } - return true; -} - /* Consume a possibly-empty sequence of declarations separated by whitespace; each declaration matches the `declaration` nonterminal in the grammar @@ -1715,11 +1388,7 @@ void Parser::parseDeclarations(UErrorCode& status) { while (peek() == PERIOD) { CHECK_BOUNDS_1(status); - // Check for unsupported statements first - // Lookahead is needed to disambiguate keyword from known keywords - if (!nextIs(ID_MATCH) && !nextIs(ID_LOCAL) && !nextIs(ID_INPUT)) { - parseUnsupportedStatement(status); - } else if (peek(1) == ID_LOCAL[1]) { + if (peek(1) == ID_LOCAL[1]) { parseLocalDeclaration(status); } else if (peek(1) == ID_INPUT[1]) { parseInputDeclaration(status); diff --git a/icu4c/source/i18n/messageformat2_parser.h b/icu4c/source/i18n/messageformat2_parser.h index 4b35760cb118..b62cbe9200b9 100644 --- a/icu4c/source/i18n/messageformat2_parser.h +++ b/icu4c/source/i18n/messageformat2_parser.h @@ -127,9 +127,6 @@ namespace message2 { void parseOption(OptionAdder&, UErrorCode&); template void parseOptions(OptionAdder&, UErrorCode&); - void parseReservedChunk(Reserved::Builder&, UErrorCode&); - Reserved parseReserved(UErrorCode&); - Reserved parseReservedBody(Reserved::Builder&, UErrorCode&); Operator parseAnnotation(UErrorCode&); void parseLiteralOrVariableWithAnnotation(bool, Expression::Builder&, UErrorCode&); Markup parseMarkup(UErrorCode&); diff --git a/icu4c/source/i18n/messageformat2_serializer.cpp b/icu4c/source/i18n/messageformat2_serializer.cpp index 228cf34181cd..b2765f5acf43 100644 --- a/icu4c/source/i18n/messageformat2_serializer.cpp +++ b/icu4c/source/i18n/messageformat2_serializer.cpp @@ -134,36 +134,10 @@ void Serializer::emitAttributes(const OptionMap& attributes) { } } -void Serializer::emit(const Reserved& reserved) { - // Re-escape '\' / '{' / '|' / '}' - for (int32_t i = 0; i < reserved.numParts(); i++) { - const Literal& l = reserved.getPart(i); - if (l.isQuoted()) { - emit(l); - } else { - const UnicodeString& s = l.unquoted(); - for (int32_t j = 0; j < s.length(); j++) { - switch(s[j]) { - case LEFT_CURLY_BRACE: - case PIPE: - case RIGHT_CURLY_BRACE: - case BACKSLASH: { - emit(BACKSLASH); - break; - } - default: - break; - } - emit(s[j]); - } - } - } -} - void Serializer::emit(const Expression& expr) { emit(LEFT_CURLY_BRACE); - if (!expr.isReserved() && !expr.isFunctionCall()) { + if (!expr.isFunctionCall()) { // Literal or variable, no annotation emit(expr.getOperand()); } else { @@ -176,17 +150,12 @@ void Serializer::emit(const Reserved& reserved) { UErrorCode localStatus = U_ZERO_ERROR; const Operator* rator = expr.getOperator(localStatus); U_ASSERT(U_SUCCESS(localStatus)); - if (rator->isReserved()) { - const Reserved& reserved = rator->asReserved(); - emit(reserved); - } else { - emit(COLON); - emit(rator->getFunctionName()); - // No whitespace after function name, in case it has - // no options. (when there are options, emit(OptionMap) will - // emit the leading whitespace) - emit(rator->getOptionsInternal()); - } + emit(COLON); + emit(rator->getFunctionName()); + // No whitespace after function name, in case it has + // no options. (when there are options, emit(OptionMap) will + // emit the leading whitespace) + emit(rator->getOptionsInternal()); } emitAttributes(expr.getAttributesInternal()); emit(RIGHT_CURLY_BRACE); @@ -273,26 +242,6 @@ void Serializer::serializeDeclarations() { } } -void Serializer::serializeUnsupported() { - const UnsupportedStatement* statements = dataModel.getUnsupportedStatementsInternal(); - U_ASSERT(dataModel.unsupportedStatementsLen == 0 || statements != nullptr); - - for (int32_t i = 0; i < dataModel.unsupportedStatementsLen; i++) { - const UnsupportedStatement& s = statements[i]; - emit(s.getKeyword()); - UErrorCode localErrorCode = U_ZERO_ERROR; - const Reserved* r = s.getBody(localErrorCode); - if (U_SUCCESS(localErrorCode)) { - whitespace(); - emit(*r); - } - const Expression* e = s.getExpressionsInternal(); - for (int32_t j = 0; j < s.expressionsLen; j++) { - emit(e[j]); - } - } -} - void Serializer::serializeSelectors() { U_ASSERT(!dataModel.hasPattern()); const Expression* selectors = dataModel.getSelectorsInternal(); @@ -319,7 +268,6 @@ void Serializer::serializeVariants() { // Main (public) serializer method void Serializer::serialize() { serializeDeclarations(); - serializeUnsupported(); // Pattern message if (dataModel.hasPattern()) { emit(dataModel.getPattern()); diff --git a/icu4c/source/i18n/messageformat2_serializer.h b/icu4c/source/i18n/messageformat2_serializer.h index 38d3412665a4..1b97b3b93008 100644 --- a/icu4c/source/i18n/messageformat2_serializer.h +++ b/icu4c/source/i18n/messageformat2_serializer.h @@ -44,14 +44,12 @@ namespace message2 { void emit(const Key&); void emit(const SelectorKeys&); void emit(const Operand&); - void emit(const Reserved&); void emit(const Expression&); void emit(const PatternPart&); void emit(const Pattern&); void emit(const Variant*); void emitAttributes(const OptionMap&); void emit(const OptionMap&); - void serializeUnsupported(); void serializeDeclarations(); void serializeSelectors(); void serializeVariants(); diff --git a/icu4c/source/i18n/unicode/messageformat2_data_model.h b/icu4c/source/i18n/unicode/messageformat2_data_model.h index 65dc836caeb3..0c836af1bdfd 100644 --- a/icu4c/source/i18n/unicode/messageformat2_data_model.h +++ b/icu4c/source/i18n/unicode/messageformat2_data_model.h @@ -62,163 +62,6 @@ namespace message2 { class Literal; class Operator; - /** - * The `Reserved` class represents a `reserved` annotation, as in the `reserved` nonterminal - * in the MessageFormat 2 grammar or the `Reserved` interface - * defined in - * https://github.com/unicode-org/message-format-wg/blob/main/spec/data-model.md#expressions - * - * `Reserved` is immutable, copyable and movable. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - class U_I18N_API Reserved : public UMemory { - public: - /** - * A `Reserved` is a sequence of literals. - * - * @return The number of literals. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - int32_t numParts() const; - /** - * Indexes into the sequence. - * Precondition: i < numParts() - * - * @param i Index of the part being accessed. - * @return A reference to he i'th literal in the sequence - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const Literal& getPart(int32_t i) const; - - /** - * The mutable `Reserved::Builder` class allows the reserved sequence to be - * constructed one part at a time. - * - * Builder is not copyable or movable. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - class U_I18N_API Builder : public UMemory { - private: - UVector* parts; // Not a LocalPointer for the same reason as in `SelectorKeys::Builder` - - public: - /** - * Adds a single literal to the reserved sequence. - * - * @param part The literal to be added. Passed by move. - * @param status Input/output error code - * @return A reference to the builder. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - Builder& add(Literal&& part, UErrorCode& status) noexcept; - /** - * Constructs a new immutable `Reserved` using the list of parts - * set with previous `add()` calls. - * - * The builder object (`this`) can still be used after calling `build()`. - * - * param status Input/output error code - * @return The new Reserved object - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - Reserved build(UErrorCode& status) const noexcept; - /** - * Default constructor. - * Returns a builder with an empty Reserved sequence. - * - * param status Input/output error code - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - Builder(UErrorCode& status); - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~Builder(); - Builder(const Builder&) = delete; - Builder& operator=(const Builder&) = delete; - Builder(Builder&&) = delete; - Builder& operator=(Builder&&) = delete; - }; // class Reserved::Builder - /** - * Non-member swap function. - * @param r1 will get r2's contents - * @param r2 will get r1's contents - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - friend inline void swap(Reserved& r1, Reserved& r2) noexcept { - using std::swap; - - swap(r1.bogus, r2.bogus); - swap(r1.parts, r2.parts); - swap(r1.len, r2.len); - } - /** - * Copy constructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - Reserved(const Reserved& other); - /** - * Assignment operator - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - Reserved& operator=(Reserved) noexcept; - /** - * Default constructor. - * Puts the Reserved into a valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - Reserved() { parts = LocalArray(); } - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~Reserved(); - private: - friend class Builder; - friend class Operator; - - // True if a copy failed; this has to be distinguished - // from a valid `Reserved` with empty parts - bool bogus = false; - - // Possibly-empty list of parts - // `literal` reserved as a quoted literal; `reserved-char` / `reserved-escape` - // strings represented as unquoted literals - /* const */ LocalArray parts; - int32_t len = 0; - - Reserved(const UVector& parts, UErrorCode& status) noexcept; - // Helper - static void initLiterals(Reserved&, const Reserved&); - }; - /** * The `Literal` class corresponds to the `literal` nonterminal in the MessageFormat 2 grammar, * https://github.com/unicode-org/message-format-wg/blob/main/spec/message.abnf and the @@ -349,8 +192,6 @@ namespace message2 { virtual ~Literal(); private: - friend class Reserved::Builder; - /* const */ bool thisIsQuoted = false; /* const */ UnicodeString contents; }; @@ -986,60 +827,22 @@ namespace message2 { }; // class OptionMap #endif - // Internal use only - #ifndef U_IN_DOXYGEN - class U_I18N_API Callable : public UObject { - public: - friend inline void swap(Callable& c1, Callable& c2) noexcept { - using std::swap; - - swap(c1.name, c2.name); - swap(c1.options, c2.options); - } - const FunctionName& getName() const { return name; } - const OptionMap& getOptions() const { return options; } - Callable(const FunctionName& f, const OptionMap& opts) : name(f), options(opts) {} - Callable& operator=(Callable) noexcept; - Callable(const Callable&); - Callable() = default; - virtual ~Callable(); - private: - /* const */ FunctionName name; - /* const */ OptionMap options; - }; - #endif } // namespace data_model } // namespace message2 U_NAMESPACE_END -/// @cond DOXYGEN_IGNORE -// Export an explicit template instantiation of the std::variant that is used as a -// data member of various MFDataModel classes. -// (When building DLLs for Windows this is required.) -// (See measunit_impl.h, datefmt.h, collationiterator.h, erarules.h and others -// for similar examples.) -#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN -#if defined(U_REAL_MSVC) && defined(_MSVC_STL_VERSION) -template class U_I18N_API std::_Variant_storage_; -#endif -template class U_I18N_API std::variant; -#endif -/// @endcond - U_NAMESPACE_BEGIN namespace message2 { namespace data_model { /** - * The `Operator` class corresponds to the `FunctionRef | Reserved` type in the + * The `Operator` class corresponds to the `FunctionRef` type in the * `Expression` interface defined in * https://github.com/unicode-org/message-format-wg/blob/main/spec/data-model.md#patterns * - * It represents the annotation that an expression can have: either a function name paired - * with a map from option names to operands (possibly empty), - * or a reserved sequence, which has no meaning and results in an error if the formatter - * is invoked. + * It represents the annotation that an expression can have: a function name paired + * with a map from option names to operands (possibly empty). * * `Operator` is immutable, copyable and movable. * @@ -1048,18 +851,8 @@ namespace message2 { */ class U_I18N_API Operator : public UObject { public: - /** - * Determines if this operator is a reserved annotation. - * - * @return true if and only if this operator represents a reserved sequence. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - UBool isReserved() const { return std::holds_alternative(contents); } /** * Accesses the function name. - * Precondition: !isReserved() * * @return The function name of this operator. * @@ -1067,19 +860,8 @@ namespace message2 { * @deprecated This API is for technology preview only. */ const FunctionName& getFunctionName() const; - /** - * Accesses the underlying reserved sequence. - * Precondition: isReserved() - * - * @return The reserved sequence represented by this operator. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const Reserved& asReserved() const; /** * Accesses function options. - * Precondition: !isReserved() * * @return A vector of function options for this operator. * @@ -1087,11 +869,7 @@ namespace message2 { * @deprecated This API is for technology preview only. */ std::vector