Skip to content

Commit

Permalink
ICU-22294 UTS46 transitional=deprecated, change DEFAULT
Browse files Browse the repository at this point in the history
  • Loading branch information
markusicu committed Sep 6, 2024
1 parent 415a7ac commit f062f52
Show file tree
Hide file tree
Showing 5 changed files with 217 additions and 138 deletions.
3 changes: 3 additions & 0 deletions icu4c/source/common/unicode/idna.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class U_COMMON_API IDNA : public UObject {
* The worker functions use transitional processing, including deviation mappings,
* unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
* is used in which case the deviation characters are passed through without change.
* <b>Unicode 15.1 UTS #46 deprecated transitional processing.</b>
*
* Disallowed characters are mapped to U+FFFD.
*
Expand All @@ -82,6 +83,8 @@ class U_COMMON_API IDNA : public UObject {
* letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
*
* @param options Bit set to modify the processing and error checking.
* These should include UIDNA_DEFAULT, or
* UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE.
* See option bit set values in uidna.h.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
Expand Down
22 changes: 20 additions & 2 deletions icu4c/source/common/unicode/uidna.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,19 @@
*/
enum {
/**
* Default options value: None of the other options are set.
* Default options value: UTS #46 nontransitional processing.
* For use in static worker and factory methods.
*
* Since ICU 76, this is the same as
* UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE,
* corresponding to Unicode 15.1 UTS #46 deprecating transitional processing.
* (These options are ignored by the IDNA2003 implementation.)
*
* Before ICU 76, this constant did not set any of the options.
*
* @stable ICU 2.6
*/
UIDNA_DEFAULT=0,
UIDNA_DEFAULT=0x30,
#ifndef U_HIDE_DEPRECATED_API
/**
* Option to allow unassigned code points in domain names and labels.
Expand Down Expand Up @@ -91,19 +99,27 @@ enum {
/**
* IDNA option for nontransitional processing in ToASCII().
* For use in static worker and factory methods.
*
* <p>By default, ToASCII() uses transitional processing.
* Unicode 15.1 UTS #46 deprecated transitional processing.
*
* <p>This option is ignored by the IDNA2003 implementation.
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
* @stable ICU 4.6
* @see UIDNA_DEFAULT
*/
UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
/**
* IDNA option for nontransitional processing in ToUnicode().
* For use in static worker and factory methods.
*
* <p>By default, ToUnicode() uses transitional processing.
* Unicode 15.1 UTS #46 deprecated transitional processing.
*
* <p>This option is ignored by the IDNA2003 implementation.
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
* @stable ICU 4.6
* @see UIDNA_DEFAULT
*/
UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
/**
Expand Down Expand Up @@ -134,6 +150,8 @@ typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */
* For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
*
* @param options Bit set to modify the processing and error checking.
* These should include UIDNA_DEFAULT, or
* UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE.
* See option bit set values in uidna.h.
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
Expand Down
23 changes: 23 additions & 0 deletions icu4c/source/test/intltest/uts46test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class UTS46Test : public IntlTest {
void TestNotSTD3();
void TestInvalidPunycodeDigits();
void TestACELabelEdgeCases();
void TestDefaultNontransitional();
void TestTooLong();
void TestSomeCases();
void IdnaTest();
Expand Down Expand Up @@ -88,6 +89,7 @@ void UTS46Test::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
TESTCASE_AUTO(TestNotSTD3);
TESTCASE_AUTO(TestInvalidPunycodeDigits);
TESTCASE_AUTO(TestACELabelEdgeCases);
TESTCASE_AUTO(TestDefaultNontransitional);
TESTCASE_AUTO(TestTooLong);
TESTCASE_AUTO(TestSomeCases);
TESTCASE_AUTO(IdnaTest);
Expand Down Expand Up @@ -354,6 +356,27 @@ void UTS46Test::TestACELabelEdgeCases() {
}
}

void UTS46Test::TestDefaultNontransitional() {
IcuTestErrorCode errorCode(*this, "TestDefaultNontransitional()");
// Unicode 15.1 UTS #46 deprecated transitional processing.
// ICU 76 changed UIDNA_DEFAULT to set the nontransitional options.
LocalPointer<IDNA> forZero(IDNA::createUTS46Instance(0, errorCode));
LocalPointer<IDNA> forDefault(IDNA::createUTS46Instance(UIDNA_DEFAULT, errorCode));
if(errorCode.isFailure()) {
return;
}
UnicodeString result;
IDNAInfo info;
forZero->labelToUnicode(u"Fⓤßẞ", result, info, errorCode);
assertEquals("forZero.toUnicode(Fⓤßẞ)", u"fussss", result);
forZero->labelToASCII(u"Fⓤßẞ", result, info, errorCode);
assertEquals("forZero.toASCII(Fⓤßẞ)", u"fussss", result);
forDefault->labelToUnicode(u"Fⓤßẞ", result, info, errorCode);
assertEquals("forDefault.toUnicode(Fⓤßẞ)", u"fußß", result);
forDefault->labelToASCII(u"Fⓤßẞ", result, info, errorCode);
assertEquals("forDefault.toASCII(Fⓤßẞ)", u"xn--fu-hiaa", result);
}

void UTS46Test::TestTooLong() {
// ICU-13727: Limit input length for n^2 algorithm
// where well-formed strings are at most 59 characters long.
Expand Down
Loading

0 comments on commit f062f52

Please sign in to comment.