Skip to content

Commit

Permalink
Unicode 16 regenerated security files
Browse files Browse the repository at this point in the history
  • Loading branch information
markusicu committed Aug 15, 2024
1 parent 5845839 commit 7ea598e
Show file tree
Hide file tree
Showing 6 changed files with 311 additions and 322 deletions.
11 changes: 6 additions & 5 deletions unicodetools/data/security/dev/IdentifierType.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# IdentifierType.txt
# Date: 2024-05-04, 21:31:06 GMT
# Date: 2024-08-14, 23:39:57 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -1406,12 +1406,14 @@ A930..A953 ; Exclusion # 5.1 [36] REJANG LETTER KA..RE
0830..083E ; Exclusion Not_XID # 5.2 [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU
1680 ; Exclusion Not_XID # 3.0 OGHAM SPACE MARK
169B..169C ; Exclusion Not_XID # 3.0 [2] OGHAM FEATHER MARK..OGHAM REVERSED FEATHER MARK
16EB..16ED ; Exclusion Not_XID # 3.0 [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION
1735..1736 ; Exclusion Not_XID # 3.2 [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
1800..180A ; Exclusion Not_XID # 3.0 [11] MONGOLIAN BIRGA..MONGOLIAN NIRUGU
1A1E..1A1F ; Exclusion Not_XID # 4.1 [2] BUGINESE PALLAWA..BUGINESE END OF SECTION
2CE5..2CEA ; Exclusion Not_XID # 4.1 [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA
2CF9..2CFF ; Exclusion Not_XID # 4.1 [7] COPTIC OLD NUBIAN FULL STOP..COPTIC MORPHOLOGICAL DIVIDER
2E30 ; Exclusion Not_XID # 5.1 RING POINT
2E3C ; Exclusion Not_XID # 7.0 STENOGRAPHIC FULL STOP
A874..A877 ; Exclusion Not_XID # 5.0 [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD
A95F ; Exclusion Not_XID # 5.1 REJANG SECTION MARK
10100..10102 ; Exclusion Not_XID # 4.0 [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK
Expand Down Expand Up @@ -1509,7 +1511,7 @@ A95F ; Exclusion Not_XID # 5.1 REJANG SECTION MARK
1E5FF ; Exclusion Not_XID # 16.0 OL ONAL ABBREVIATION SIGN
1E8C7..1E8CF ; Exclusion Not_XID # 7.0 [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE

# Total code points: 1138
# Total code points: 1142

# Identifier_Type: Obsolete

Expand Down Expand Up @@ -1752,7 +1754,6 @@ A8F8..A8FA ; Obsolete Not_XID # 5.2 [3] DEVANAGARI SIGN PUSH
1361..1368 ; Not_XID # 3.0 [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR
1372..137C ; Not_XID # 3.0 [11] ETHIOPIC NUMBER TEN..ETHIOPIC NUMBER TEN THOUSAND
1390..1399 ; Not_XID # 4.1 [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT
16EB..16ED ; Not_XID # 3.0 [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION
17D4..17D6 ; Not_XID # 3.0 [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH
17D9..17DB ; Not_XID # 3.0 [3] KHMER SIGN PHNAEK MUAN..KHMER CURRENCY SYMBOL RIEL
17F0..17F9 ; Not_XID # 4.0 [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON
Expand Down Expand Up @@ -1923,7 +1924,7 @@ A8F8..A8FA ; Obsolete Not_XID # 5.2 [3] DEVANAGARI SIGN PUSH
2E33..2E34 ; Not_XID # 6.1 [2] RAISED DOT..RAISED COMMA
2E36..2E38 ; Not_XID # 6.1 [3] DAGGER WITH LEFT GUARD..TURNED DAGGER
2E3A..2E3B ; Not_XID # 6.1 [2] TWO-EM DASH..THREE-EM DASH
2E3C..2E42 ; Not_XID # 7.0 [7] STENOGRAPHIC FULL STOP..DOUBLE LOW-REVERSED-9 QUOTATION MARK
2E3D..2E42 ; Not_XID # 7.0 [6] VERTICAL SIX DOTS..DOUBLE LOW-REVERSED-9 QUOTATION MARK
2E43..2E44 ; Not_XID # 9.0 [2] DASH WITH LEFT UPTURN..DOUBLE SUSPENSION MARK
2E45..2E49 ; Not_XID # 10.0 [5] INVERTED LOW KAVYKA..DOUBLE STACKED COMMA
2E4A..2E4E ; Not_XID # 11.0 [5] DOTTED SOLIDUS..PUNCTUS ELEVATUS MARK
Expand Down Expand Up @@ -2193,7 +2194,7 @@ FFFD ; Not_XID # 1.1 REPLACEMENT CHARACTE
1FB94..1FBCA ; Not_XID # 13.0 [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
1FBCB..1FBEF ; Not_XID # 16.0 [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE

# Total code points: 6415
# Total code points: 6411

# Identifier_Type: Not_NFKC

Expand Down
10 changes: 5 additions & 5 deletions unicodetools/data/security/dev/confusables.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# confusables.txt
# Date: 2024-05-31, 21:12:55 GMT
# Date: 2024-08-14, 23:39:57 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -593,7 +593,7 @@ FF40 ; 0027 ; MA #* ( ` → ' ) FULLWIDTH GRAVE ACCENT → APOSTROPHE # →‘
02B9 ; 0027 ; MA # ( ʹ → ' ) MODIFIER LETTER PRIME → APOSTROPHE #
0374 ; 0027 ; MA # ( ʹ → ' ) GREEK NUMERAL SIGN → APOSTROPHE # →′→
02C8 ; 0027 ; MA # ( ˈ → ' ) MODIFIER LETTER VERTICAL LINE → APOSTROPHE #
02CA ; 0027 ; MA # ( ˊ → ' ) MODIFIER LETTER ACUTE ACCENT → APOSTROPHE # →΄→→ʹ
02CA ; 0027 ; MA # ( ˊ → ' ) MODIFIER LETTER ACUTE ACCENT → APOSTROPHE # →ʹ→→′
02CB ; 0027 ; MA # ( ˋ → ' ) MODIFIER LETTER GRAVE ACCENT → APOSTROPHE # →`→→‘→
02F4 ; 0027 ; MA #* ( ˴ → ' ) MODIFIER LETTER MIDDLE GRAVE ACCENT → APOSTROPHE # →ˋ→→`→→‘→
02BB ; 0027 ; MA # ( ʻ → ' ) MODIFIER LETTER TURNED COMMA → APOSTROPHE # →‘→
Expand Down Expand Up @@ -1904,8 +1904,8 @@ ABAF ; 0063 ; MA # ( ꮯ → c ) CHEROKEE SMALL LETTER TLI → LATIN SMALL LETTE
2DED ; 0368 ; MA # ( ⷭ → ͨ ) COMBINING CYRILLIC LETTER ES → COMBINING LATIN SMALL LETTER C #

1F74C ; 0043 ; MA #* ( 🝌 → C ) ALCHEMICAL SYMBOL FOR CALX → LATIN CAPITAL LETTER C #
118F2 ; 0043 ; MA #* ( 𑣲 → C ) WARANG CITI NUMBER NINETY → LATIN CAPITAL LETTER C #
118E9 ; 0043 ; MA # ( 𑣩 → C ) WARANG CITI DIGIT NINE → LATIN CAPITAL LETTER C #
118F2 ; 0043 ; MA #* ( 𑣲 → C ) WARANG CITI NUMBER NINETY → LATIN CAPITAL LETTER C #
FF23 ; 0043 ; MA # ( C → C ) FULLWIDTH LATIN CAPITAL LETTER C → LATIN CAPITAL LETTER C # →С→
216D ; 0043 ; MA # ( Ⅽ → C ) ROMAN NUMERAL ONE HUNDRED → LATIN CAPITAL LETTER C #
2102 ; 0043 ; MA # ( ℂ → C ) DOUBLE-STRUCK CAPITAL C → LATIN CAPITAL LETTER C #
Expand Down Expand Up @@ -3795,8 +3795,8 @@ A7DC ; 0245 0338 ; MA # ( Ƛ → Ʌ̸ ) LATIN CAPITAL LETTER LAMBDA WITH STROKE
1170F ; 0077 ; MA # ( 𑜏 → w ) AHOM LETTER SA → LATIN SMALL LETTER W #
AB83 ; 0077 ; MA # ( ꮃ → w ) CHEROKEE SMALL LETTER LA → LATIN SMALL LETTER W # →ᴡ→

118EF ; 0057 ; MA #* ( 𑣯 → W ) WARANG CITI NUMBER SIXTY → LATIN CAPITAL LETTER W #
118E6 ; 0057 ; MA # ( 𑣦 → W ) WARANG CITI DIGIT SIX → LATIN CAPITAL LETTER W #
118EF ; 0057 ; MA #* ( 𑣯 → W ) WARANG CITI NUMBER SIXTY → LATIN CAPITAL LETTER W #
1CCEC ; 0057 ; MA #* ( 𜳬 → W ) OUTLINED LATIN CAPITAL LETTER W → LATIN CAPITAL LETTER W #
1D416 ; 0057 ; MA # ( 𝐖 → W ) MATHEMATICAL BOLD CAPITAL W → LATIN CAPITAL LETTER W #
1D44A ; 0057 ; MA # ( 𝑊 → W ) MATHEMATICAL ITALIC CAPITAL W → LATIN CAPITAL LETTER W #
Expand Down Expand Up @@ -4002,8 +4002,8 @@ A76B ; 021D ; MA # ( ꝫ → ȝ ) LATIN SMALL LETTER ET → LATIN SMALL LETTER Y
AB93 ; 007A ; MA # ( ꮓ → z ) CHEROKEE SMALL LETTER NO → LATIN SMALL LETTER Z # →ᴢ→
118C4 ; 007A ; MA # ( 𑣄 → z ) WARANG CITI SMALL LETTER YA → LATIN SMALL LETTER Z #

102F5 ; 005A ; MA #* ( 𐋵 → Z ) COPTIC EPACT NUMBER THREE HUNDRED → LATIN CAPITAL LETTER Z #
118E5 ; 005A ; MA # ( 𑣥 → Z ) WARANG CITI DIGIT FIVE → LATIN CAPITAL LETTER Z #
102F5 ; 005A ; MA #* ( 𐋵 → Z ) COPTIC EPACT NUMBER THREE HUNDRED → LATIN CAPITAL LETTER Z #
FF3A ; 005A ; MA # ( Z → Z ) FULLWIDTH LATIN CAPITAL LETTER Z → LATIN CAPITAL LETTER Z # →Ζ→
2124 ; 005A ; MA # ( ℤ → Z ) DOUBLE-STRUCK CAPITAL Z → LATIN CAPITAL LETTER Z #
2128 ; 005A ; MA # ( ℨ → Z ) BLACK-LETTER CAPITAL Z → LATIN CAPITAL LETTER Z #
Expand Down
4 changes: 2 additions & 2 deletions unicodetools/data/security/dev/confusablesSummary.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# confusablesSummary.txt
# Date: 2024-05-31, 21:12:55 GMT
# Date: 2024-08-14, 23:39:56 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -91,7 +91,7 @@
← (‎ ʽ ‎) 02BD MODIFIER LETTER REVERSED COMMA # →‘→
← (‎ ʾ ‎) 02BE MODIFIER LETTER RIGHT HALF RING # →ʼ→→′→
← (‎ ˈ ‎) 02C8 MODIFIER LETTER VERTICAL LINE
← (‎ ˊ ‎) 02CA MODIFIER LETTER ACUTE ACCENT # →΄→→ʹ
← (‎ ˊ ‎) 02CA MODIFIER LETTER ACUTE ACCENT # →ʹ→→′
← (‎ ˋ ‎) 02CB MODIFIER LETTER GRAVE ACCENT # →`→→‘→
← (‎ ߴ ‎) 07F4 NKO HIGH TONE APOSTROPHE # →’→
← (‎ ߵ ‎) 07F5 NKO LOW TONE APOSTROPHE # →‘→
Expand Down
7 changes: 2 additions & 5 deletions unicodetools/data/security/dev/data/draft-restrictions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24194,6 +24194,7 @@ AB63 ; ; Uncommon_Use # (ꭣ) LATIN SMALL LETTER UO
0038 ; Allowed ; Recommended # (8) DIGIT EIGHT
0039 ; Allowed ; Recommended # (9) DIGIT NINE
0041..005A ; Allowed ; Recommended # [26] (A..Z) LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
005F ; Allowed ; Recommended # (_) LOW LINE
0061 ; Allowed ; Recommended # (a) LATIN SMALL LETTER A
0062 ; Allowed ; Recommended # (b) LATIN SMALL LETTER B
0063 ; Allowed ; Recommended # (c) LATIN SMALL LETTER C
Expand Down Expand Up @@ -54817,11 +54818,7 @@ FA29 ; Allowed ; Recommended # (﨩) CJK COMPATIBILITY IDEOGRAPH-
323AF ; Allowed ; Recommended # (𲎯) CJK UNIFIED IDEOGRAPH-323AF
E0100..E01EF ; Allowed ; Recommended # [240] (U+E0100..U+E01EF) VARIATION SELECTOR-17..VARIATION SELECTOR-256

# Total code points: 113001

005F ; ~IDNA # (_) LOW LINE

# Total code points: 1
# Total code points: 113002

0000..002F ; ~Unicode Identifier # [48] (U+0000../) <control-0000>..SOLIDUS
003A..0040 ; ~Unicode Identifier # [7] (:..@) COLON..COMMERCIAL AT
Expand Down
8 changes: 5 additions & 3 deletions unicodetools/data/security/dev/data/idnchars.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# idnchars.txt
# Date: 2024-05-03, 03:51:22 GMT
# Date: 2024-08-14, 23:39:58 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand All @@ -11,8 +11,10 @@
#
# Allowed as output characters

0027 ; output # (') APOSTROPHE
002D..002E ; output # [2] (-...) HYPHEN-MINUS..FULL STOP
0030..0039 ; output # [10] (0..9) DIGIT ZERO..DIGIT NINE
0030..003A ; output # [11] (0..:) DIGIT ZERO..COLON
005F ; output # (_) LOW LINE
0061..007A ; output # [26] (a..z) LATIN SMALL LETTER A..LATIN SMALL LETTER Z
00B7 ; output # (·) MIDDLE DOT
00DF..00F6 ; output # [24] (ß..ö) LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS
Expand Down Expand Up @@ -663,7 +665,7 @@ FA27..FA29 ; output # [3] (﨧..﨩) CJK COMPATIBILITY IDEOGRAPH-FA27..CJK
30000..3134A ; output # [4939] (𰀀..𱍊) CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; output # [4192] (𱍐..𲎯) CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF

# Total code points: 112107
# Total code points: 112110

# Not allowed at start of identifier

Expand Down
Loading

0 comments on commit 7ea598e

Please sign in to comment.