From 19e4c53e4c1664491646acb2e7ff69f36c4ec97d Mon Sep 17 00:00:00 2001 From: Leon Liang Date: Tue, 5 May 2020 12:35:39 -0700 Subject: [PATCH 1/9] adding the rest of the emoji codepoints in to see --- src/types/CodepointWidthDetector.cpp | 175 +++++++++++++++++++-------- src/types/convert.cpp | 90 -------------- 2 files changed, 122 insertions(+), 143 deletions(-) diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index 3d9b58bcec3..d5f661167ee 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -19,9 +19,13 @@ namespace return range.upperBound < searchTerm; } - static constexpr std::array s_wideAndAmbiguousTable{ + static constexpr std::array s_wideAndAmbiguousTable{ // generated from http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt // anything not present here is presumed to be Narrow. + + // GH #900 Supplemented with ranges from https://www.unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt + // Codepoints marked as "emoji_presentation" (characters that by default should + // appear with an emoji presentation and so categorized as Wide). UnicodeRange{ 0xa1, 0xa1, CodepointWidth::Ambiguous }, UnicodeRange{ 0xa4, 0xa4, CodepointWidth::Ambiguous }, UnicodeRange{ 0xa7, 0xa8, CodepointWidth::Ambiguous }, @@ -112,7 +116,9 @@ namespace UnicodeRange{ 0x2160, 0x216b, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2170, 0x2179, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2189, 0x2189, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2190, 0x2199, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2190, 0x2193, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2194, 0x2199, CodepointWidth::Wide }, + UnicodeRange{ 0x21a9, 0x21aa, CodepointWidth::Wide }, UnicodeRange{ 0x21b8, 0x21b9, CodepointWidth::Ambiguous }, UnicodeRange{ 0x21d2, 0x21d2, CodepointWidth::Ambiguous }, UnicodeRange{ 0x21d4, 0x21d4, CodepointWidth::Ambiguous }, @@ -147,78 +153,122 @@ namespace UnicodeRange{ 0x22bf, 0x22bf, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2312, 0x2312, CodepointWidth::Ambiguous }, UnicodeRange{ 0x231a, 0x231b, CodepointWidth::Wide }, - UnicodeRange{ 0x2329, 0x232a, CodepointWidth::Wide }, - UnicodeRange{ 0x23e9, 0x23ec, CodepointWidth::Wide }, - UnicodeRange{ 0x23f0, 0x23f0, CodepointWidth::Wide }, - UnicodeRange{ 0x23f3, 0x23f3, CodepointWidth::Wide }, - UnicodeRange{ 0x2460, 0x24e9, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2328, 0x232a, CodepointWidth::Wide }, + UnicodeRange{ 0x23cf, 0x23cf, CodepointWidth::Wide }, + UnicodeRange{ 0x23e9, 0x23ef, CodepointWidth::Wide }, + UnicodeRange{ 0x23f0, 0x23f3, CodepointWidth::Wide }, + UnicodeRange{ 0x23f8, 0x23fa, CodepointWidth::Wide }, + UnicodeRange{ 0x2460, 0x24c1, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x24c2, 0x24c2, CodepointWidth::Wide }, + UnicodeRange{ 0x24c3, 0x24e9, CodepointWidth::Ambiguous }, UnicodeRange{ 0x24eb, 0x254b, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2550, 0x2573, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2580, 0x258f, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2592, 0x2595, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25a0, 0x25a1, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25a3, 0x25a9, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x25aa, 0x25ab, CodepointWidth::Wide }, UnicodeRange{ 0x25b2, 0x25b3, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25b6, 0x25b7, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x25b6, 0x25b6, CodepointWidth::Wide }, + UnicodeRange{ 0x25b7, 0x25b7, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25bc, 0x25bd, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25c0, 0x25c1, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x25c0, 0x25c0, CodepointWidth::Wide }, + UnicodeRange{ 0x25c1, 0x25c1, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25c6, 0x25c8, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25cb, 0x25cb, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25ce, 0x25d1, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25e2, 0x25e5, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25ef, 0x25ef, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25fd, 0x25fe, CodepointWidth::Wide }, + UnicodeRange{ 0x25fb, 0x25fe, CodepointWidth::Wide }, + UnicodeRange{ 0x2600, 0x2604, CodepointWidth::Wide }, UnicodeRange{ 0x2605, 0x2606, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2609, 0x2609, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x260e, 0x260e, CodepointWidth::Wide }, UnicodeRange{ 0x260e, 0x260f, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2611, 0x2611, CodepointWidth::Wide }, UnicodeRange{ 0x2614, 0x2615, CodepointWidth::Wide }, + UnicodeRange{ 0x2618, 0x2618, CodepointWidth::Wide }, UnicodeRange{ 0x261c, 0x261c, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x261d, 0x261d, CodepointWidth::Wide }, UnicodeRange{ 0x261e, 0x261e, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2620, 0x2620, CodepointWidth::Wide }, + UnicodeRange{ 0x2622, 0x2623, CodepointWidth::Wide }, + UnicodeRange{ 0x2626, 0x2626, CodepointWidth::Wide }, + UnicodeRange{ 0x262a, 0x262a, CodepointWidth::Wide }, + UnicodeRange{ 0x262e, 0x262f, CodepointWidth::Wide }, + UnicodeRange{ 0x2638, 0x263a, CodepointWidth::Wide }, UnicodeRange{ 0x2640, 0x2640, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2642, 0x2642, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2648, 0x2653, CodepointWidth::Wide }, - UnicodeRange{ 0x2660, 0x2661, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2663, 0x2665, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2667, 0x266a, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x265f, 0x265f, CodepointWidth::Wide }, + UnicodeRange{ 0x2660, 0x2660, CodepointWidth::Wide }, + UnicodeRange{ 0x2661, 0x2661, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2663, 0x2663, CodepointWidth::Wide }, + UnicodeRange{ 0x2664, 0x2664, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2665, 0x2666, CodepointWidth::Wide }, + UnicodeRange{ 0x2667, 0x2667, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2668, 0x2668, CodepointWidth::Wide }, + UnicodeRange{ 0x2669, 0x266a, CodepointWidth::Ambiguous }, UnicodeRange{ 0x266c, 0x266d, CodepointWidth::Ambiguous }, UnicodeRange{ 0x266f, 0x266f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x267f, 0x267f, CodepointWidth::Wide }, - UnicodeRange{ 0x2693, 0x2693, CodepointWidth::Wide }, + UnicodeRange{ 0x267b, 0x267b, CodepointWidth::Wide }, + UnicodeRange{ 0x267e, 0x267f, CodepointWidth::Wide }, + UnicodeRange{ 0x2692, 0x2697, CodepointWidth::Wide }, + UnicodeRange{ 0x2699, 0x2699, CodepointWidth::Wide }, + UnicodeRange{ 0x269b, 0x269c, CodepointWidth::Wide }, UnicodeRange{ 0x269e, 0x269f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26a1, 0x26a1, CodepointWidth::Wide }, + UnicodeRange{ 0x26a0, 0x26a1, CodepointWidth::Wide }, + UnicodeRange{ 0x26a7, 0x26a7, CodepointWidth::Wide }, UnicodeRange{ 0x26aa, 0x26ab, CodepointWidth::Wide }, + UnicodeRange{ 0x26b0, 0x26b1, CodepointWidth::Wide }, UnicodeRange{ 0x26bd, 0x26be, CodepointWidth::Wide }, UnicodeRange{ 0x26bf, 0x26bf, CodepointWidth::Ambiguous }, UnicodeRange{ 0x26c4, 0x26c5, CodepointWidth::Wide }, - UnicodeRange{ 0x26c6, 0x26cd, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26ce, 0x26ce, CodepointWidth::Wide }, - UnicodeRange{ 0x26cf, 0x26d3, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26d4, 0x26d4, CodepointWidth::Wide }, + UnicodeRange{ 0x26c4, 0x26c5, CodepointWidth::Wide }, + UnicodeRange{ 0x26c6, 0x26c7, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x26c8, 0x26c8, CodepointWidth::Wide }, + UnicodeRange{ 0x26c9, 0x26cd, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x26ce, 0x26cf, CodepointWidth::Wide }, + UnicodeRange{ 0x26d0, 0x26d0, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x26d1, 0x26d1, CodepointWidth::Wide }, + UnicodeRange{ 0x26d2, 0x26d2, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x26d3, 0x26d4, CodepointWidth::Wide }, UnicodeRange{ 0x26d5, 0x26e1, CodepointWidth::Ambiguous }, UnicodeRange{ 0x26e3, 0x26e3, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26e8, 0x26e9, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26ea, 0x26ea, CodepointWidth::Wide }, - UnicodeRange{ 0x26eb, 0x26f1, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26f2, 0x26f3, CodepointWidth::Wide }, - UnicodeRange{ 0x26f4, 0x26f4, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26f5, 0x26f5, CodepointWidth::Wide }, - UnicodeRange{ 0x26f6, 0x26f9, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26fa, 0x26fa, CodepointWidth::Wide }, + UnicodeRange{ 0x26e8, 0x26e8, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x26e9, 0x26ea, CodepointWidth::Wide }, + UnicodeRange{ 0x26eb, 0x26ef, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x26f0, 0x26f5, CodepointWidth::Wide }, + UnicodeRange{ 0x26f6, 0x26f6, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x26f7, 0x26fa, CodepointWidth::Wide }, UnicodeRange{ 0x26fb, 0x26fc, CodepointWidth::Ambiguous }, UnicodeRange{ 0x26fd, 0x26fd, CodepointWidth::Wide }, UnicodeRange{ 0x26fe, 0x26ff, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2702, 0x2702, CodepointWidth::Wide }, UnicodeRange{ 0x2705, 0x2705, CodepointWidth::Wide }, - UnicodeRange{ 0x270a, 0x270b, CodepointWidth::Wide }, + UnicodeRange{ 0x2708, 0x270f, CodepointWidth::Wide }, + UnicodeRange{ 0x2712, 0x2712, CodepointWidth::Wide }, + UnicodeRange{ 0x2714, 0x2714, CodepointWidth::Wide }, + UnicodeRange{ 0x2716, 0x2716, CodepointWidth::Wide }, + UnicodeRange{ 0x271d, 0x271d, CodepointWidth::Wide }, + UnicodeRange{ 0x2721, 0x2721, CodepointWidth::Wide }, UnicodeRange{ 0x2728, 0x2728, CodepointWidth::Wide }, + UnicodeRange{ 0x2733, 0x2734, CodepointWidth::Wide }, UnicodeRange{ 0x273d, 0x273d, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2744, 0x2744, CodepointWidth::Wide }, + UnicodeRange{ 0x2747, 0x2747, CodepointWidth::Wide }, UnicodeRange{ 0x274c, 0x274c, CodepointWidth::Wide }, UnicodeRange{ 0x274e, 0x274e, CodepointWidth::Wide }, UnicodeRange{ 0x2753, 0x2755, CodepointWidth::Wide }, UnicodeRange{ 0x2757, 0x2757, CodepointWidth::Wide }, + UnicodeRange{ 0x2763, 0x2764, CodepointWidth::Wide }, UnicodeRange{ 0x2776, 0x277f, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2795, 0x2797, CodepointWidth::Wide }, + UnicodeRange{ 0x27a1, 0x27a1, CodepointWidth::Wide }, UnicodeRange{ 0x27b0, 0x27b0, CodepointWidth::Wide }, UnicodeRange{ 0x27bf, 0x27bf, CodepointWidth::Wide }, + UnicodeRange{ 0x2934, 0x2935, CodepointWidth::Wide }, + UnicodeRange{ 0x2b05, 0x2b07, CodepointWidth::Wide }, UnicodeRange{ 0x2b1b, 0x2b1c, CodepointWidth::Wide }, UnicodeRange{ 0x2b50, 0x2b50, CodepointWidth::Wide }, UnicodeRange{ 0x2b55, 0x2b55, CodepointWidth::Wide }, @@ -268,40 +318,59 @@ namespace UnicodeRange{ 0x1f18f, 0x1f190, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f191, 0x1f19a, CodepointWidth::Wide }, UnicodeRange{ 0x1f19b, 0x1f1ac, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x1f1e6, 0x1f1ff, CodepointWidth::Wide }, UnicodeRange{ 0x1f200, 0x1f202, CodepointWidth::Wide }, UnicodeRange{ 0x1f210, 0x1f23b, CodepointWidth::Wide }, UnicodeRange{ 0x1f240, 0x1f248, CodepointWidth::Wide }, UnicodeRange{ 0x1f250, 0x1f251, CodepointWidth::Wide }, UnicodeRange{ 0x1f260, 0x1f265, CodepointWidth::Wide }, - UnicodeRange{ 0x1f300, 0x1f320, CodepointWidth::Wide }, - UnicodeRange{ 0x1f32d, 0x1f335, CodepointWidth::Wide }, - UnicodeRange{ 0x1f337, 0x1f37c, CodepointWidth::Wide }, - UnicodeRange{ 0x1f37e, 0x1f393, CodepointWidth::Wide }, - UnicodeRange{ 0x1f3a0, 0x1f3ca, CodepointWidth::Wide }, - UnicodeRange{ 0x1f3cf, 0x1f3d3, CodepointWidth::Wide }, - UnicodeRange{ 0x1f3e0, 0x1f3f0, CodepointWidth::Wide }, - UnicodeRange{ 0x1f3f4, 0x1f3f4, CodepointWidth::Wide }, - UnicodeRange{ 0x1f3f8, 0x1f43e, CodepointWidth::Wide }, - UnicodeRange{ 0x1f440, 0x1f440, CodepointWidth::Wide }, - UnicodeRange{ 0x1f442, 0x1f4fc, CodepointWidth::Wide }, + UnicodeRange{ 0x1f300, 0x1f321, CodepointWidth::Wide }, + UnicodeRange{ 0x1f324, 0x1f393, CodepointWidth::Wide }, + UnicodeRange{ 0x1f396, 0x1f397, CodepointWidth::Wide }, + UnicodeRange{ 0x1f399, 0x1f39b, CodepointWidth::Wide }, + UnicodeRange{ 0x1f39e, 0x1f39f, CodepointWidth::Wide }, + UnicodeRange{ 0x1f3a0, 0x1f3f0, CodepointWidth::Wide }, + UnicodeRange{ 0x1f3f3, 0x1f3f5, CodepointWidth::Wide }, + UnicodeRange{ 0x1f3f7, 0x1f4fd, CodepointWidth::Wide }, UnicodeRange{ 0x1f4ff, 0x1f53d, CodepointWidth::Wide }, - UnicodeRange{ 0x1f54b, 0x1f54e, CodepointWidth::Wide }, + UnicodeRange{ 0x1f549, 0x1f54e, CodepointWidth::Wide }, UnicodeRange{ 0x1f550, 0x1f567, CodepointWidth::Wide }, - UnicodeRange{ 0x1f57a, 0x1f57a, CodepointWidth::Wide }, + UnicodeRange{ 0x1f56f, 0x1f570, CodepointWidth::Wide }, + UnicodeRange{ 0x1f573, 0x1f57a, CodepointWidth::Wide }, + UnicodeRange{ 0x1f587, 0x1f587, CodepointWidth::Wide }, + UnicodeRange{ 0x1f58a, 0x1f58d, CodepointWidth::Wide }, + UnicodeRange{ 0x1f590, 0x1f590, CodepointWidth::Wide }, UnicodeRange{ 0x1f595, 0x1f596, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5a4, 0x1f5a4, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5fb, 0x1f64f, CodepointWidth::Wide }, + UnicodeRange{ 0x1f5a4, 0x1f5a5, CodepointWidth::Wide }, + UnicodeRange{ 0x1f5a8, 0x1f5a8, CodepointWidth::Wide }, + UnicodeRange{ 0x1f5b1, 0x1f5b2, CodepointWidth::Wide }, + UnicodeRange{ 0x1f5bc, 0x1f5bc, CodepointWidth::Wide }, + UnicodeRange{ 0x1f5c2, 0x1f5c4, CodepointWidth::Wide }, + UnicodeRange{ 0x1f5d1, 0x1f5d2, CodepointWidth::Wide }, + UnicodeRange{ 0x1f5dc, 0x1f5de, CodepointWidth::Wide }, + UnicodeRange{ 0x1f5e1, 0x1f5e1, CodepointWidth::Wide }, + UnicodeRange{ 0x1f5e3, 0x1f5e3, CodepointWidth::Wide }, + UnicodeRange{ 0x1f5e8, 0x1f5e8, CodepointWidth::Wide }, + UnicodeRange{ 0x1f5ef, 0x1f5ef, CodepointWidth::Wide }, + UnicodeRange{ 0x1f5f3, 0x1f5f3, CodepointWidth::Wide }, + UnicodeRange{ 0x1f5fa, 0x1f64f, CodepointWidth::Wide }, UnicodeRange{ 0x1f680, 0x1f6c5, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6cc, 0x1f6cc, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6d0, 0x1f6d2, CodepointWidth::Wide }, + UnicodeRange{ 0x1f6cb, 0x1f6d2, CodepointWidth::Wide }, + UnicodeRange{ 0x1f6d5, 0x1f6d7, CodepointWidth::Wide }, + UnicodeRange{ 0x1f6e0, 0x1f6e5, CodepointWidth::Wide }, + UnicodeRange{ 0x1f6e9, 0x1f6e9, CodepointWidth::Wide }, UnicodeRange{ 0x1f6eb, 0x1f6ec, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6f4, 0x1f6f8, CodepointWidth::Wide }, - UnicodeRange{ 0x1f910, 0x1f93e, CodepointWidth::Wide }, - UnicodeRange{ 0x1f940, 0x1f94c, CodepointWidth::Wide }, - UnicodeRange{ 0x1f950, 0x1f96b, CodepointWidth::Wide }, - UnicodeRange{ 0x1f980, 0x1f997, CodepointWidth::Wide }, - UnicodeRange{ 0x1f9c0, 0x1f9c0, CodepointWidth::Wide }, - UnicodeRange{ 0x1f9d0, 0x1f9e6, CodepointWidth::Wide }, + UnicodeRange{ 0x1f6f0, 0x1f6f0, CodepointWidth::Wide }, + UnicodeRange{ 0x1f6f3, 0x1f6fc, CodepointWidth::Wide }, + UnicodeRange{ 0x1f7e0, 0x1f7eb, CodepointWidth::Wide }, + UnicodeRange{ 0x1f90c, 0x1f9ff, CodepointWidth::Wide }, + UnicodeRange{ 0x1fa70, 0x1fa74, CodepointWidth::Wide }, + UnicodeRange{ 0x1fa78, 0x1fa7A, CodepointWidth::Wide }, + UnicodeRange{ 0x1fa80, 0x1fa86, CodepointWidth::Wide }, + UnicodeRange{ 0x1fa90, 0x1faa8, CodepointWidth::Wide }, + UnicodeRange{ 0x1fab0, 0x1fab6, CodepointWidth::Wide }, + UnicodeRange{ 0x1fac0, 0x1fac2, CodepointWidth::Wide }, + UnicodeRange{ 0x1fad0, 0x1fad6, CodepointWidth::Wide }, UnicodeRange{ 0x20000, 0x2fffd, CodepointWidth::Wide }, UnicodeRange{ 0x30000, 0x3fffd, CodepointWidth::Wide }, UnicodeRange{ 0xe0100, 0xe01ef, CodepointWidth::Ambiguous }, diff --git a/src/types/convert.cpp b/src/types/convert.cpp index f4608a2e35a..f33ab36201d 100644 --- a/src/types/convert.cpp +++ b/src/types/convert.cpp @@ -384,96 +384,6 @@ CodepointWidth GetQuickCharWidth(const wchar_t wch) noexcept { return CodepointWidth::Narrow; } - // 0x2010 - 0x2B59 varies between narrow, ambiguous, and wide by character and font (Unicode 9.0) - // However, there are a bunch of retroactive-emoji in this range. Things that weren't emoji and then they became - // "emoji" later. As a result, they jumped from a fixed narrow definition to a now ambiguous definition. - // There are others in this range already defined as wide or ambiguous, but we're just going to - // implicitly say they're all ambiguous here to force a font lookup. - // I picked the ones that looked like color double-wide emoji in my browser that weren't already - // covered easily by the half-width/full-width table (see CodepointWidthDetector.cpp) - // See https://unicode.org/Public/emoji/12.0/emoji-data.txt - else if ((0x2194 <= wch && wch <= 0x2199) || - (0x21A9 <= wch && wch <= 0x21AA) || - (0x231A <= wch && wch <= 0x231B) || - 0x2328 == wch || - 0x23CF == wch || - (0x23E9 <= wch && wch <= 0x23F3) || - (0x23F8 <= wch && wch <= 0x23FA) || - 0x24C2 == wch || - (0x25AA <= wch && wch <= 0x25AB) || - 0x25B6 == wch || - 0x25C0 == wch || - (0x25FB <= wch && wch <= 0x25FE) || - (0x2600 <= wch && wch <= 0x2604) || - 0x260E == wch || - 0x2611 == wch || - (0x2614 <= wch && wch <= 0x2615) || - 0x2618 == wch || - 0x261D == wch || - 0x2620 == wch || - (0x2622 <= wch && wch <= 0x2623) || - 0x2626 == wch || - 0x262A == wch || - (0x262E <= wch && wch <= 0x262F) || - (0x2638 <= wch && wch <= 0x263A) || - 0x2640 == wch || - 0x2642 == wch || - (0x2648 <= wch && wch <= 0x2653) || - (0x265F <= wch && wch <= 0x2660) || - 0x2663 == wch || - (0x2665 <= wch && wch <= 0x2666) || - 0x2668 == wch || - 0x267B == wch || - (0x267E <= wch && wch <= 0x267F) || - (0x2692 <= wch && wch <= 0x2697) || - 0x2699 == wch || - (0x269B <= wch && wch <= 0x269C) || - (0x26A0 <= wch && wch <= 0x26A1) || - (0x26AA <= wch && wch <= 0x26AB) || - (0x26B0 <= wch && wch <= 0x26B1) || - (0x26BD <= wch && wch <= 0x26BE) || - (0x26C4 <= wch && wch <= 0x26C5) || - 0x26C8 == wch || - 0x26CE == wch || - 0x26CF == wch || - 0x26D1 == wch || - (0x26D3 <= wch && wch <= 0x26D4) || - (0x26E9 <= wch && wch <= 0x26EA) || - (0x26F0 <= wch && wch <= 0x26F5) || - (0x26F7 <= wch && wch <= 0x26FA) || - 0x26FD == wch || - 0x2702 == wch || - 0x2705 == wch || - (0x2708 <= wch && wch <= 0x2709) || - (0x270A <= wch && wch <= 0x270B) || - (0x270C <= wch && wch <= 0x270D) || - 0x270F == wch || - 0x2712 == wch || - 0x2714 == wch || - 0x2716 == wch || - 0x271D == wch || - 0x2721 == wch || - 0x2728 == wch || - (0x2733 <= wch && wch <= 0x2734) || - 0x2744 == wch || - 0x2747 == wch || - 0x274C == wch || - 0x274E == wch || - (0x2753 <= wch && wch <= 0x2755) || - 0x2757 == wch || - (0x2763 <= wch && wch <= 0x2764) || - (0x2795 <= wch && wch <= 0x2797) || - 0x27A1 == wch || - 0x27B0 == wch || - 0x27BF == wch || - (0x2934 <= wch && wch <= 0x2935) || - (0x2B05 <= wch && wch <= 0x2B07) || - (0x2B1B <= wch && wch <= 0x2B1C) || - 0x2B50 == wch || - 0x2B55 == wch) - { - return CodepointWidth::Ambiguous; - } else if (0x2B5A <= wch && wch <= 0x2E44) { // From Unicode 9.0, this range is narrow (assorted languages) From e0eac63e4f01c257beb3b1ce2aae4e23a5a27092 Mon Sep 17 00:00:00 2001 From: Leon Liang Date: Tue, 5 May 2020 14:50:38 -0700 Subject: [PATCH 2/9] comment --- src/types/CodepointWidthDetector.cpp | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index d5f661167ee..6f296638a54 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -22,10 +22,7 @@ namespace static constexpr std::array s_wideAndAmbiguousTable{ // generated from http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt // anything not present here is presumed to be Narrow. - - // GH #900 Supplemented with ranges from https://www.unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt - // Codepoints marked as "emoji_presentation" (characters that by default should - // appear with an emoji presentation and so categorized as Wide). + // GH #900 Supplemented with emoji ranges from https://www.unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt UnicodeRange{ 0xa1, 0xa1, CodepointWidth::Ambiguous }, UnicodeRange{ 0xa4, 0xa4, CodepointWidth::Ambiguous }, UnicodeRange{ 0xa7, 0xa8, CodepointWidth::Ambiguous }, @@ -116,9 +113,7 @@ namespace UnicodeRange{ 0x2160, 0x216b, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2170, 0x2179, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2189, 0x2189, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2190, 0x2193, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2194, 0x2199, CodepointWidth::Wide }, - UnicodeRange{ 0x21a9, 0x21aa, CodepointWidth::Wide }, + UnicodeRange{ 0x2190, 0x2199, CodepointWidth::Ambiguous }, UnicodeRange{ 0x21b8, 0x21b9, CodepointWidth::Ambiguous }, UnicodeRange{ 0x21d2, 0x21d2, CodepointWidth::Ambiguous }, UnicodeRange{ 0x21d4, 0x21d4, CodepointWidth::Ambiguous }, @@ -169,11 +164,9 @@ namespace UnicodeRange{ 0x25a3, 0x25a9, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25aa, 0x25ab, CodepointWidth::Wide }, UnicodeRange{ 0x25b2, 0x25b3, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25b6, 0x25b6, CodepointWidth::Wide }, - UnicodeRange{ 0x25b7, 0x25b7, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x25b6, 0x25b7, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25bc, 0x25bd, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25c0, 0x25c0, CodepointWidth::Wide }, - UnicodeRange{ 0x25c1, 0x25c1, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x25c0, 0x25c1, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25c6, 0x25c8, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25cb, 0x25cb, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25ce, 0x25d1, CodepointWidth::Ambiguous }, @@ -267,8 +260,6 @@ namespace UnicodeRange{ 0x27a1, 0x27a1, CodepointWidth::Wide }, UnicodeRange{ 0x27b0, 0x27b0, CodepointWidth::Wide }, UnicodeRange{ 0x27bf, 0x27bf, CodepointWidth::Wide }, - UnicodeRange{ 0x2934, 0x2935, CodepointWidth::Wide }, - UnicodeRange{ 0x2b05, 0x2b07, CodepointWidth::Wide }, UnicodeRange{ 0x2b1b, 0x2b1c, CodepointWidth::Wide }, UnicodeRange{ 0x2b50, 0x2b50, CodepointWidth::Wide }, UnicodeRange{ 0x2b55, 0x2b55, CodepointWidth::Wide }, @@ -313,7 +304,10 @@ namespace UnicodeRange{ 0x1f100, 0x1f10a, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f110, 0x1f12d, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f130, 0x1f169, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1f170, 0x1f18d, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x1f170, 0x1f171, CodepointWidth::Wide }, + UnicodeRange{ 0x1f172, 0x1f17d, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x1f17e, 0x1f17f, CodepointWidth::Wide }, + UnicodeRange{ 0x1f180, 0x1f18d, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f18e, 0x1f18e, CodepointWidth::Wide }, UnicodeRange{ 0x1f18f, 0x1f190, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f191, 0x1f19a, CodepointWidth::Wide }, From e8e2beb8be59d39b2ce2ca4a25a241d81b6ebc92 Mon Sep 17 00:00:00 2001 From: Leon Liang Date: Thu, 7 May 2020 11:14:49 -0700 Subject: [PATCH 3/9] comments and microsoft emoji additions --- src/types/CodepointWidthDetector.cpp | 30 +++++++++++++++------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index 6f296638a54..610ae9bf2ab 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -22,7 +22,15 @@ namespace static constexpr std::array s_wideAndAmbiguousTable{ // generated from http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt // anything not present here is presumed to be Narrow. - // GH #900 Supplemented with emoji ranges from https://www.unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt + // + // GH #900 - Supplemented with emoji codepoints from https://www.unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt + // Emojis in 0x2010 - 0x2B59 used to be marked as Ambiguous in GetQuickCharWidth() in order to force a font lookup, + // but since we default all Ambiguous width to Narrow, those emojis always came out looking squished/tiny. They've been + // moved into this table and marked as Wide. + // There are also some codepoints that Microsoft has given an emoji presentation where it isn't specified + // as emoji in the unicode standard or are specified as Narrow/Ambiguous in the EastAsianWidth.txt. + // Some of them are included in here, but there is no easy way to get a full list + // of Microsoft specific emoji, so others may be missing. UnicodeRange{ 0xa1, 0xa1, CodepointWidth::Ambiguous }, UnicodeRange{ 0xa4, 0xa4, CodepointWidth::Ambiguous }, UnicodeRange{ 0xa7, 0xa8, CodepointWidth::Ambiguous }, @@ -178,12 +186,10 @@ namespace UnicodeRange{ 0x2609, 0x2609, CodepointWidth::Ambiguous }, UnicodeRange{ 0x260e, 0x260e, CodepointWidth::Wide }, UnicodeRange{ 0x260e, 0x260f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2611, 0x2611, CodepointWidth::Wide }, + UnicodeRange{ 0x2611, 0x2612, CodepointWidth::Wide }, // MS addition UnicodeRange{ 0x2614, 0x2615, CodepointWidth::Wide }, UnicodeRange{ 0x2618, 0x2618, CodepointWidth::Wide }, - UnicodeRange{ 0x261c, 0x261c, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x261d, 0x261d, CodepointWidth::Wide }, - UnicodeRange{ 0x261e, 0x261e, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x261a, 0x261f, CodepointWidth::Wide }, // 0x261A-0x261C, 0x261E-0x261F MS addition UnicodeRange{ 0x2620, 0x2620, CodepointWidth::Wide }, UnicodeRange{ 0x2622, 0x2623, CodepointWidth::Wide }, UnicodeRange{ 0x2626, 0x2626, CodepointWidth::Wide }, @@ -195,11 +201,7 @@ namespace UnicodeRange{ 0x2648, 0x2653, CodepointWidth::Wide }, UnicodeRange{ 0x265f, 0x265f, CodepointWidth::Wide }, UnicodeRange{ 0x2660, 0x2660, CodepointWidth::Wide }, - UnicodeRange{ 0x2661, 0x2661, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2663, 0x2663, CodepointWidth::Wide }, - UnicodeRange{ 0x2664, 0x2664, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2665, 0x2666, CodepointWidth::Wide }, - UnicodeRange{ 0x2667, 0x2667, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2661, 0x2667, CodepointWidth::Wide }, // 0x2661, 0x2662, 0x2664, 0x2666 MS addition UnicodeRange{ 0x2668, 0x2668, CodepointWidth::Wide }, UnicodeRange{ 0x2669, 0x266a, CodepointWidth::Ambiguous }, UnicodeRange{ 0x266c, 0x266d, CodepointWidth::Ambiguous }, @@ -239,7 +241,7 @@ namespace UnicodeRange{ 0x26fe, 0x26ff, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2702, 0x2702, CodepointWidth::Wide }, UnicodeRange{ 0x2705, 0x2705, CodepointWidth::Wide }, - UnicodeRange{ 0x2708, 0x270f, CodepointWidth::Wide }, + UnicodeRange{ 0x2708, 0x2710, CodepointWidth::Wide }, // 0x2710, 0x270E MS addition UnicodeRange{ 0x2712, 0x2712, CodepointWidth::Wide }, UnicodeRange{ 0x2714, 0x2714, CodepointWidth::Wide }, UnicodeRange{ 0x2716, 0x2716, CodepointWidth::Wide }, @@ -254,7 +256,7 @@ namespace UnicodeRange{ 0x274e, 0x274e, CodepointWidth::Wide }, UnicodeRange{ 0x2753, 0x2755, CodepointWidth::Wide }, UnicodeRange{ 0x2757, 0x2757, CodepointWidth::Wide }, - UnicodeRange{ 0x2763, 0x2764, CodepointWidth::Wide }, + UnicodeRange{ 0x2763, 0x2765, CodepointWidth::Wide }, // 0x2765 MS addition UnicodeRange{ 0x2776, 0x277f, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2795, 0x2797, CodepointWidth::Wide }, UnicodeRange{ 0x27a1, 0x27a1, CodepointWidth::Wide }, @@ -299,7 +301,7 @@ namespace UnicodeRange{ 0x18800, 0x18af2, CodepointWidth::Wide }, UnicodeRange{ 0x1b000, 0x1b11e, CodepointWidth::Wide }, UnicodeRange{ 0x1b170, 0x1b2fb, CodepointWidth::Wide }, - UnicodeRange{ 0x1f004, 0x1f004, CodepointWidth::Wide }, + UnicodeRange{ 0x1f000, 0x1f02b, CodepointWidth::Wide }, // All mahjong tiles except 0x1f004 are MS addition UnicodeRange{ 0x1f0cf, 0x1f0cf, CodepointWidth::Wide }, UnicodeRange{ 0x1f100, 0x1f10a, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f110, 0x1f12d, CodepointWidth::Ambiguous }, @@ -334,7 +336,7 @@ namespace UnicodeRange{ 0x1f587, 0x1f587, CodepointWidth::Wide }, UnicodeRange{ 0x1f58a, 0x1f58d, CodepointWidth::Wide }, UnicodeRange{ 0x1f590, 0x1f590, CodepointWidth::Wide }, - UnicodeRange{ 0x1f595, 0x1f596, CodepointWidth::Wide }, + UnicodeRange{ 0x1f594, 0x1f596, CodepointWidth::Wide }, // 0x1f594 MS addition UnicodeRange{ 0x1f5a4, 0x1f5a5, CodepointWidth::Wide }, UnicodeRange{ 0x1f5a8, 0x1f5a8, CodepointWidth::Wide }, UnicodeRange{ 0x1f5b1, 0x1f5b2, CodepointWidth::Wide }, From 7ae5d203c9d04936904a05e97769c955aff2775f Mon Sep 17 00:00:00 2001 From: Leon Liang Date: Fri, 8 May 2020 10:49:05 -0700 Subject: [PATCH 4/9] marking the overridden ones --- src/types/CodepointWidthDetector.cpp | 191 +++++++++++++-------------- 1 file changed, 93 insertions(+), 98 deletions(-) diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index 610ae9bf2ab..24049f2ddab 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -19,18 +19,17 @@ namespace return range.upperBound < searchTerm; } - static constexpr std::array s_wideAndAmbiguousTable{ + static constexpr std::array s_wideAndAmbiguousTable{ // generated from http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt // anything not present here is presumed to be Narrow. // // GH #900 - Supplemented with emoji codepoints from https://www.unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt - // Emojis in 0x2010 - 0x2B59 used to be marked as Ambiguous in GetQuickCharWidth() in order to force a font lookup, - // but since we default all Ambiguous width to Narrow, those emojis always came out looking squished/tiny. They've been - // moved into this table and marked as Wide. - // There are also some codepoints that Microsoft has given an emoji presentation where it isn't specified - // as emoji in the unicode standard or are specified as Narrow/Ambiguous in the EastAsianWidth.txt. - // Some of them are included in here, but there is no easy way to get a full list - // of Microsoft specific emoji, so others may be missing. + // Emojis in 0x2010 - 0x2B59 used to be marked as Ambiguous in GetQuickCharWidth() in order to + // force a font lookup, but since we default all Ambiguous width to Narrow, those emojis always + // came out looking squished/tiny. They've been moved into this table and marked as Wide. + // ** Codepoint ranges marked with "OVR" have their given width from EastAsianWidth.txt overridden. + // ** Codepoint ranges marked with "MS" are given an emoji presentation by Microsoft but are not + // necessarily specified as an emoji in the unicode standard. UnicodeRange{ 0xa1, 0xa1, CodepointWidth::Ambiguous }, UnicodeRange{ 0xa4, 0xa4, CodepointWidth::Ambiguous }, UnicodeRange{ 0xa7, 0xa8, CodepointWidth::Ambiguous }, @@ -156,13 +155,13 @@ namespace UnicodeRange{ 0x22bf, 0x22bf, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2312, 0x2312, CodepointWidth::Ambiguous }, UnicodeRange{ 0x231a, 0x231b, CodepointWidth::Wide }, - UnicodeRange{ 0x2328, 0x232a, CodepointWidth::Wide }, - UnicodeRange{ 0x23cf, 0x23cf, CodepointWidth::Wide }, - UnicodeRange{ 0x23e9, 0x23ef, CodepointWidth::Wide }, - UnicodeRange{ 0x23f0, 0x23f3, CodepointWidth::Wide }, - UnicodeRange{ 0x23f8, 0x23fa, CodepointWidth::Wide }, + UnicodeRange{ 0x2328, 0x232a, CodepointWidth::Wide }, // OVR 328 + UnicodeRange{ 0x23cf, 0x23cf, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x23e9, 0x23ef, CodepointWidth::Wide }, // OVR 3ed-3ef + UnicodeRange{ 0x23f0, 0x23f3, CodepointWidth::Wide }, // OVR 3f1-3f2 + UnicodeRange{ 0x23f8, 0x23fa, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2460, 0x24c1, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x24c2, 0x24c2, CodepointWidth::Wide }, + UnicodeRange{ 0x24c2, 0x24c2, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x24c3, 0x24e9, CodepointWidth::Ambiguous }, UnicodeRange{ 0x24eb, 0x254b, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2550, 0x2573, CodepointWidth::Ambiguous }, @@ -170,7 +169,7 @@ namespace UnicodeRange{ 0x2592, 0x2595, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25a0, 0x25a1, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25a3, 0x25a9, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25aa, 0x25ab, CodepointWidth::Wide }, + UnicodeRange{ 0x25aa, 0x25ab, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x25b2, 0x25b3, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25b6, 0x25b7, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25bc, 0x25bd, CodepointWidth::Ambiguous }, @@ -180,86 +179,82 @@ namespace UnicodeRange{ 0x25ce, 0x25d1, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25e2, 0x25e5, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25ef, 0x25ef, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25fb, 0x25fe, CodepointWidth::Wide }, - UnicodeRange{ 0x2600, 0x2604, CodepointWidth::Wide }, + UnicodeRange{ 0x25fb, 0x25fe, CodepointWidth::Wide }, // OVR 5fb-5fc + UnicodeRange{ 0x2600, 0x2604, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2605, 0x2606, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2609, 0x2609, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x260e, 0x260e, CodepointWidth::Wide }, + UnicodeRange{ 0x260e, 0x260e, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x260e, 0x260f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2611, 0x2612, CodepointWidth::Wide }, // MS addition + UnicodeRange{ 0x2611, 0x2612, CodepointWidth::Wide }, // OVR all; MS all UnicodeRange{ 0x2614, 0x2615, CodepointWidth::Wide }, - UnicodeRange{ 0x2618, 0x2618, CodepointWidth::Wide }, - UnicodeRange{ 0x261a, 0x261f, CodepointWidth::Wide }, // 0x261A-0x261C, 0x261E-0x261F MS addition - UnicodeRange{ 0x2620, 0x2620, CodepointWidth::Wide }, - UnicodeRange{ 0x2622, 0x2623, CodepointWidth::Wide }, - UnicodeRange{ 0x2626, 0x2626, CodepointWidth::Wide }, - UnicodeRange{ 0x262a, 0x262a, CodepointWidth::Wide }, - UnicodeRange{ 0x262e, 0x262f, CodepointWidth::Wide }, - UnicodeRange{ 0x2638, 0x263a, CodepointWidth::Wide }, + UnicodeRange{ 0x2618, 0x2618, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x261a, 0x261f, CodepointWidth::Wide }, // OVR all; MS 61a-61c, 61e-61f + UnicodeRange{ 0x2620, 0x2620, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x2622, 0x2623, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x2626, 0x2626, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x262a, 0x262a, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x262e, 0x262f, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x2638, 0x263a, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2640, 0x2640, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2642, 0x2642, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2648, 0x2653, CodepointWidth::Wide }, - UnicodeRange{ 0x265f, 0x265f, CodepointWidth::Wide }, - UnicodeRange{ 0x2660, 0x2660, CodepointWidth::Wide }, - UnicodeRange{ 0x2661, 0x2667, CodepointWidth::Wide }, // 0x2661, 0x2662, 0x2664, 0x2666 MS addition - UnicodeRange{ 0x2668, 0x2668, CodepointWidth::Wide }, + UnicodeRange{ 0x265f, 0x2668, CodepointWidth::Wide }, // OVR all; MS 661, 662, 664, 666 UnicodeRange{ 0x2669, 0x266a, CodepointWidth::Ambiguous }, UnicodeRange{ 0x266c, 0x266d, CodepointWidth::Ambiguous }, UnicodeRange{ 0x266f, 0x266f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x267b, 0x267b, CodepointWidth::Wide }, - UnicodeRange{ 0x267e, 0x267f, CodepointWidth::Wide }, - UnicodeRange{ 0x2692, 0x2697, CodepointWidth::Wide }, - UnicodeRange{ 0x2699, 0x2699, CodepointWidth::Wide }, - UnicodeRange{ 0x269b, 0x269c, CodepointWidth::Wide }, + UnicodeRange{ 0x267b, 0x267b, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x267e, 0x267f, CodepointWidth::Wide }, // OVR 67e + UnicodeRange{ 0x2692, 0x2697, CodepointWidth::Wide }, // OVR 692, 694-697 + UnicodeRange{ 0x2699, 0x2699, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x269b, 0x269c, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x269e, 0x269f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26a0, 0x26a1, CodepointWidth::Wide }, - UnicodeRange{ 0x26a7, 0x26a7, CodepointWidth::Wide }, + UnicodeRange{ 0x26a0, 0x26a1, CodepointWidth::Wide }, // OVR 6a0 + UnicodeRange{ 0x26a7, 0x26a7, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x26aa, 0x26ab, CodepointWidth::Wide }, - UnicodeRange{ 0x26b0, 0x26b1, CodepointWidth::Wide }, + UnicodeRange{ 0x26b0, 0x26b1, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x26bd, 0x26be, CodepointWidth::Wide }, UnicodeRange{ 0x26bf, 0x26bf, CodepointWidth::Ambiguous }, UnicodeRange{ 0x26c4, 0x26c5, CodepointWidth::Wide }, - UnicodeRange{ 0x26c4, 0x26c5, CodepointWidth::Wide }, UnicodeRange{ 0x26c6, 0x26c7, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26c8, 0x26c8, CodepointWidth::Wide }, + UnicodeRange{ 0x26c8, 0x26c8, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x26c9, 0x26cd, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26ce, 0x26cf, CodepointWidth::Wide }, + UnicodeRange{ 0x26ce, 0x26cf, CodepointWidth::Wide }, // OVR 6CF UnicodeRange{ 0x26d0, 0x26d0, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26d1, 0x26d1, CodepointWidth::Wide }, + UnicodeRange{ 0x26d1, 0x26d1, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x26d2, 0x26d2, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26d3, 0x26d4, CodepointWidth::Wide }, + UnicodeRange{ 0x26d3, 0x26d4, CodepointWidth::Wide }, // OVR 6d3 UnicodeRange{ 0x26d5, 0x26e1, CodepointWidth::Ambiguous }, UnicodeRange{ 0x26e3, 0x26e3, CodepointWidth::Ambiguous }, UnicodeRange{ 0x26e8, 0x26e8, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26e9, 0x26ea, CodepointWidth::Wide }, + UnicodeRange{ 0x26e9, 0x26ea, CodepointWidth::Wide }, // OVR 6e9 UnicodeRange{ 0x26eb, 0x26ef, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26f0, 0x26f5, CodepointWidth::Wide }, + UnicodeRange{ 0x26f0, 0x26f5, CodepointWidth::Wide }, // OVR 6f0-6f1, 6f4 UnicodeRange{ 0x26f6, 0x26f6, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26f7, 0x26fa, CodepointWidth::Wide }, + UnicodeRange{ 0x26f7, 0x26fa, CodepointWidth::Wide }, // OVR 6f8-6f9 UnicodeRange{ 0x26fb, 0x26fc, CodepointWidth::Ambiguous }, UnicodeRange{ 0x26fd, 0x26fd, CodepointWidth::Wide }, UnicodeRange{ 0x26fe, 0x26ff, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2702, 0x2702, CodepointWidth::Wide }, + UnicodeRange{ 0x2702, 0x2702, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2705, 0x2705, CodepointWidth::Wide }, - UnicodeRange{ 0x2708, 0x2710, CodepointWidth::Wide }, // 0x2710, 0x270E MS addition - UnicodeRange{ 0x2712, 0x2712, CodepointWidth::Wide }, - UnicodeRange{ 0x2714, 0x2714, CodepointWidth::Wide }, - UnicodeRange{ 0x2716, 0x2716, CodepointWidth::Wide }, - UnicodeRange{ 0x271d, 0x271d, CodepointWidth::Wide }, - UnicodeRange{ 0x2721, 0x2721, CodepointWidth::Wide }, + UnicodeRange{ 0x2708, 0x2710, CodepointWidth::Wide }, // OVR 708-709, 70c-710; MS 710, 70e + UnicodeRange{ 0x2712, 0x2712, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x2714, 0x2714, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x2716, 0x2716, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x271d, 0x271d, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x2721, 0x2721, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2728, 0x2728, CodepointWidth::Wide }, - UnicodeRange{ 0x2733, 0x2734, CodepointWidth::Wide }, + UnicodeRange{ 0x2733, 0x2734, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x273d, 0x273d, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2744, 0x2744, CodepointWidth::Wide }, - UnicodeRange{ 0x2747, 0x2747, CodepointWidth::Wide }, + UnicodeRange{ 0x2744, 0x2744, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x2747, 0x2747, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x274c, 0x274c, CodepointWidth::Wide }, UnicodeRange{ 0x274e, 0x274e, CodepointWidth::Wide }, UnicodeRange{ 0x2753, 0x2755, CodepointWidth::Wide }, UnicodeRange{ 0x2757, 0x2757, CodepointWidth::Wide }, - UnicodeRange{ 0x2763, 0x2765, CodepointWidth::Wide }, // 0x2765 MS addition + UnicodeRange{ 0x2763, 0x2765, CodepointWidth::Wide }, // OVR all, MS 65 UnicodeRange{ 0x2776, 0x277f, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2795, 0x2797, CodepointWidth::Wide }, - UnicodeRange{ 0x27a1, 0x27a1, CodepointWidth::Wide }, + UnicodeRange{ 0x27a1, 0x27a1, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x27b0, 0x27b0, CodepointWidth::Wide }, UnicodeRange{ 0x27bf, 0x27bf, CodepointWidth::Wide }, UnicodeRange{ 0x2b1b, 0x2b1c, CodepointWidth::Wide }, @@ -301,67 +296,67 @@ namespace UnicodeRange{ 0x18800, 0x18af2, CodepointWidth::Wide }, UnicodeRange{ 0x1b000, 0x1b11e, CodepointWidth::Wide }, UnicodeRange{ 0x1b170, 0x1b2fb, CodepointWidth::Wide }, - UnicodeRange{ 0x1f000, 0x1f02b, CodepointWidth::Wide }, // All mahjong tiles except 0x1f004 are MS addition + UnicodeRange{ 0x1f000, 0x1f02b, CodepointWidth::Wide }, // OVR and MS all except 004 UnicodeRange{ 0x1f0cf, 0x1f0cf, CodepointWidth::Wide }, UnicodeRange{ 0x1f100, 0x1f10a, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f110, 0x1f12d, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f130, 0x1f169, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1f170, 0x1f171, CodepointWidth::Wide }, + UnicodeRange{ 0x1f170, 0x1f171, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x1f172, 0x1f17d, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1f17e, 0x1f17f, CodepointWidth::Wide }, + UnicodeRange{ 0x1f17e, 0x1f17f, CodepointWidth::Wide }, // OVR 17f UnicodeRange{ 0x1f180, 0x1f18d, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f18e, 0x1f18e, CodepointWidth::Wide }, UnicodeRange{ 0x1f18f, 0x1f190, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f191, 0x1f19a, CodepointWidth::Wide }, UnicodeRange{ 0x1f19b, 0x1f1ac, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1f1e6, 0x1f1ff, CodepointWidth::Wide }, + UnicodeRange{ 0x1f1e6, 0x1f1ff, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x1f200, 0x1f202, CodepointWidth::Wide }, UnicodeRange{ 0x1f210, 0x1f23b, CodepointWidth::Wide }, UnicodeRange{ 0x1f240, 0x1f248, CodepointWidth::Wide }, UnicodeRange{ 0x1f250, 0x1f251, CodepointWidth::Wide }, UnicodeRange{ 0x1f260, 0x1f265, CodepointWidth::Wide }, - UnicodeRange{ 0x1f300, 0x1f321, CodepointWidth::Wide }, - UnicodeRange{ 0x1f324, 0x1f393, CodepointWidth::Wide }, - UnicodeRange{ 0x1f396, 0x1f397, CodepointWidth::Wide }, - UnicodeRange{ 0x1f399, 0x1f39b, CodepointWidth::Wide }, - UnicodeRange{ 0x1f39e, 0x1f39f, CodepointWidth::Wide }, - UnicodeRange{ 0x1f3a0, 0x1f3f0, CodepointWidth::Wide }, - UnicodeRange{ 0x1f3f3, 0x1f3f5, CodepointWidth::Wide }, - UnicodeRange{ 0x1f3f7, 0x1f4fd, CodepointWidth::Wide }, + UnicodeRange{ 0x1f300, 0x1f321, CodepointWidth::Wide }, // OVR 321 + UnicodeRange{ 0x1f324, 0x1f393, CodepointWidth::Wide }, // OVR 324-32c, 336, 37d + UnicodeRange{ 0x1f396, 0x1f397, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f399, 0x1f39b, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f39e, 0x1f39f, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f3a0, 0x1f3f0, CodepointWidth::Wide }, // OVR 3cb-3ce, 3d4-3df + UnicodeRange{ 0x1f3f3, 0x1f3f5, CodepointWidth::Wide }, // OVR 3f3, 3f5 + UnicodeRange{ 0x1f3f7, 0x1f4fd, CodepointWidth::Wide }, // OVR 3f7, 43f, 4fd UnicodeRange{ 0x1f4ff, 0x1f53d, CodepointWidth::Wide }, - UnicodeRange{ 0x1f549, 0x1f54e, CodepointWidth::Wide }, + UnicodeRange{ 0x1f549, 0x1f54e, CodepointWidth::Wide }, // OVR 549-54a UnicodeRange{ 0x1f550, 0x1f567, CodepointWidth::Wide }, - UnicodeRange{ 0x1f56f, 0x1f570, CodepointWidth::Wide }, - UnicodeRange{ 0x1f573, 0x1f57a, CodepointWidth::Wide }, - UnicodeRange{ 0x1f587, 0x1f587, CodepointWidth::Wide }, - UnicodeRange{ 0x1f58a, 0x1f58d, CodepointWidth::Wide }, - UnicodeRange{ 0x1f590, 0x1f590, CodepointWidth::Wide }, - UnicodeRange{ 0x1f594, 0x1f596, CodepointWidth::Wide }, // 0x1f594 MS addition - UnicodeRange{ 0x1f5a4, 0x1f5a5, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5a8, 0x1f5a8, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5b1, 0x1f5b2, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5bc, 0x1f5bc, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5c2, 0x1f5c4, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5d1, 0x1f5d2, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5dc, 0x1f5de, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5e1, 0x1f5e1, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5e3, 0x1f5e3, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5e8, 0x1f5e8, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5ef, 0x1f5ef, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5f3, 0x1f5f3, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5fa, 0x1f64f, CodepointWidth::Wide }, + UnicodeRange{ 0x1f56f, 0x1f570, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f573, 0x1f57a, CodepointWidth::Wide }, // OVR 573-579 + UnicodeRange{ 0x1f587, 0x1f587, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f58a, 0x1f58d, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f590, 0x1f590, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f594, 0x1f596, CodepointWidth::Wide }, // OVR 594, MS 594 + UnicodeRange{ 0x1f5a4, 0x1f5a5, CodepointWidth::Wide }, // OVR 5a5 + UnicodeRange{ 0x1f5a8, 0x1f5a8, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f5b1, 0x1f5b2, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f5bc, 0x1f5bc, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f5c2, 0x1f5c4, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f5d1, 0x1f5d2, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f5dc, 0x1f5de, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f5e1, 0x1f5e1, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f5e3, 0x1f5e3, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f5e8, 0x1f5e8, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f5ef, 0x1f5ef, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f5f3, 0x1f5f3, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f5fa, 0x1f64f, CodepointWidth::Wide }, // OVR 5fa UnicodeRange{ 0x1f680, 0x1f6c5, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6cb, 0x1f6d2, CodepointWidth::Wide }, + UnicodeRange{ 0x1f6cb, 0x1f6d2, CodepointWidth::Wide }, // OVR 6cb, 6cd-6cf UnicodeRange{ 0x1f6d5, 0x1f6d7, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6e0, 0x1f6e5, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6e9, 0x1f6e9, CodepointWidth::Wide }, + UnicodeRange{ 0x1f6e0, 0x1f6e5, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f6e9, 0x1f6e9, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x1f6eb, 0x1f6ec, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6f0, 0x1f6f0, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6f3, 0x1f6fc, CodepointWidth::Wide }, + UnicodeRange{ 0x1f6f0, 0x1f6f0, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f6f3, 0x1f6fc, CodepointWidth::Wide }, // OVR 6f3 UnicodeRange{ 0x1f7e0, 0x1f7eb, CodepointWidth::Wide }, - UnicodeRange{ 0x1f90c, 0x1f9ff, CodepointWidth::Wide }, + UnicodeRange{ 0x1f90c, 0x1f9ff, CodepointWidth::Wide }, // OVR 93b, 946 UnicodeRange{ 0x1fa70, 0x1fa74, CodepointWidth::Wide }, - UnicodeRange{ 0x1fa78, 0x1fa7A, CodepointWidth::Wide }, + UnicodeRange{ 0x1fa78, 0x1fa7a, CodepointWidth::Wide }, UnicodeRange{ 0x1fa80, 0x1fa86, CodepointWidth::Wide }, UnicodeRange{ 0x1fa90, 0x1faa8, CodepointWidth::Wide }, UnicodeRange{ 0x1fab0, 0x1fab6, CodepointWidth::Wide }, From ee2b59f92cfc98a71c4107d4a38b497b65923efe Mon Sep 17 00:00:00 2001 From: Leon Liang Date: Fri, 8 May 2020 12:29:24 -0700 Subject: [PATCH 5/9] removed MS emoji codepoints --- src/types/CodepointWidthDetector.cpp | 32 +++++++++++++++++++--------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index 24049f2ddab..3be466865bb 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -27,9 +27,18 @@ namespace // Emojis in 0x2010 - 0x2B59 used to be marked as Ambiguous in GetQuickCharWidth() in order to // force a font lookup, but since we default all Ambiguous width to Narrow, those emojis always // came out looking squished/tiny. They've been moved into this table and marked as Wide. - // ** Codepoint ranges marked with "OVR" have their given width from EastAsianWidth.txt overridden. - // ** Codepoint ranges marked with "MS" are given an emoji presentation by Microsoft but are not - // necessarily specified as an emoji in the unicode standard. + // + // There are a couple of codepoints that Microsoft specifically gave an emoji representation + // even if it's not specified as an emoji in the standard. I'll list the ones I'm aware of in this comment in case + // we decide to use them in the future: + // 0x261A-0x261C, 0x261E-0x261F + // 0x2661, + // 0x2662, + // 0x2664, + // 0x2666 0x2710, + // 0x270E 0x2765 0x1f000 - 0x1f02b except 0x1f004 0x1f594 + // + // *** Codepoint ranges marked with "OVR" have their given width from EastAsianWidth.txt overridden. UnicodeRange{ 0xa1, 0xa1, CodepointWidth::Ambiguous }, UnicodeRange{ 0xa4, 0xa4, CodepointWidth::Ambiguous }, UnicodeRange{ 0xa7, 0xa8, CodepointWidth::Ambiguous }, @@ -185,10 +194,10 @@ namespace UnicodeRange{ 0x2609, 0x2609, CodepointWidth::Ambiguous }, UnicodeRange{ 0x260e, 0x260e, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x260e, 0x260f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2611, 0x2612, CodepointWidth::Wide }, // OVR all; MS all + UnicodeRange{ 0x2611, 0x2611, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2614, 0x2615, CodepointWidth::Wide }, UnicodeRange{ 0x2618, 0x2618, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x261a, 0x261f, CodepointWidth::Wide }, // OVR all; MS 61a-61c, 61e-61f + UnicodeRange{ 0x261d, 0x261d, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2620, 0x2620, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2622, 0x2623, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2626, 0x2626, CodepointWidth::Wide }, // OVR @@ -198,7 +207,9 @@ namespace UnicodeRange{ 0x2640, 0x2640, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2642, 0x2642, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2648, 0x2653, CodepointWidth::Wide }, - UnicodeRange{ 0x265f, 0x2668, CodepointWidth::Wide }, // OVR all; MS 661, 662, 664, 666 + UnicodeRange{ 0x265f, 0x2660, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x2663, 0x2663, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x2665, 0x2666, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2669, 0x266a, CodepointWidth::Ambiguous }, UnicodeRange{ 0x266c, 0x266d, CodepointWidth::Ambiguous }, UnicodeRange{ 0x266f, 0x266f, CodepointWidth::Ambiguous }, @@ -236,7 +247,8 @@ namespace UnicodeRange{ 0x26fe, 0x26ff, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2702, 0x2702, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2705, 0x2705, CodepointWidth::Wide }, - UnicodeRange{ 0x2708, 0x2710, CodepointWidth::Wide }, // OVR 708-709, 70c-710; MS 710, 70e + UnicodeRange{ 0x2708, 0x270d, CodepointWidth::Wide }, // OVR 708-709, 70c-70d + UnicodeRange{ 0x270f, 0x270f, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2712, 0x2712, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2714, 0x2714, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2716, 0x2716, CodepointWidth::Wide }, // OVR @@ -251,7 +263,7 @@ namespace UnicodeRange{ 0x274e, 0x274e, CodepointWidth::Wide }, UnicodeRange{ 0x2753, 0x2755, CodepointWidth::Wide }, UnicodeRange{ 0x2757, 0x2757, CodepointWidth::Wide }, - UnicodeRange{ 0x2763, 0x2765, CodepointWidth::Wide }, // OVR all, MS 65 + UnicodeRange{ 0x2763, 0x2764, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2776, 0x277f, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2795, 0x2797, CodepointWidth::Wide }, UnicodeRange{ 0x27a1, 0x27a1, CodepointWidth::Wide }, // OVR @@ -296,7 +308,7 @@ namespace UnicodeRange{ 0x18800, 0x18af2, CodepointWidth::Wide }, UnicodeRange{ 0x1b000, 0x1b11e, CodepointWidth::Wide }, UnicodeRange{ 0x1b170, 0x1b2fb, CodepointWidth::Wide }, - UnicodeRange{ 0x1f000, 0x1f02b, CodepointWidth::Wide }, // OVR and MS all except 004 + UnicodeRange{ 0x1f004, 0x1f004, CodepointWidth::Wide }, UnicodeRange{ 0x1f0cf, 0x1f0cf, CodepointWidth::Wide }, UnicodeRange{ 0x1f100, 0x1f10a, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f110, 0x1f12d, CodepointWidth::Ambiguous }, @@ -331,7 +343,7 @@ namespace UnicodeRange{ 0x1f587, 0x1f587, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x1f58a, 0x1f58d, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x1f590, 0x1f590, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f594, 0x1f596, CodepointWidth::Wide }, // OVR 594, MS 594 + UnicodeRange{ 0x1f595, 0x1f596, CodepointWidth::Wide }, UnicodeRange{ 0x1f5a4, 0x1f5a5, CodepointWidth::Wide }, // OVR 5a5 UnicodeRange{ 0x1f5a8, 0x1f5a8, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x1f5b1, 0x1f5b2, CodepointWidth::Wide }, // OVR From 224f88077259680e77b4b3599082a518ea513ad9 Mon Sep 17 00:00:00 2001 From: Leon Liang Date: Fri, 8 May 2020 12:52:10 -0700 Subject: [PATCH 6/9] added a few missing --- src/types/CodepointWidthDetector.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index 3be466865bb..ce488fd078d 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -30,7 +30,7 @@ namespace // // There are a couple of codepoints that Microsoft specifically gave an emoji representation // even if it's not specified as an emoji in the standard. I'll list the ones I'm aware of in this comment in case - // we decide to use them in the future: + // we decide to emojify them in the future: // 0x261A-0x261C, 0x261E-0x261F // 0x2661, // 0x2662, @@ -124,6 +124,7 @@ namespace UnicodeRange{ 0x2121, 0x2122, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2126, 0x2126, CodepointWidth::Ambiguous }, UnicodeRange{ 0x212b, 0x212b, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2139, 0x2139, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2153, 0x2154, CodepointWidth::Ambiguous }, UnicodeRange{ 0x215b, 0x215e, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2160, 0x216b, CodepointWidth::Ambiguous }, @@ -205,7 +206,7 @@ namespace UnicodeRange{ 0x262e, 0x262f, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2638, 0x263a, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2640, 0x2640, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2642, 0x2642, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2642, 0x2642, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2648, 0x2653, CodepointWidth::Wide }, UnicodeRange{ 0x265f, 0x2660, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2663, 0x2663, CodepointWidth::Wide }, // OVR @@ -349,7 +350,7 @@ namespace UnicodeRange{ 0x1f5b1, 0x1f5b2, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x1f5bc, 0x1f5bc, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x1f5c2, 0x1f5c4, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x1f5d1, 0x1f5d2, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x1f5d1, 0x1f5d3, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x1f5dc, 0x1f5de, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x1f5e1, 0x1f5e1, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x1f5e3, 0x1f5e3, CodepointWidth::Wide }, // OVR From 56000d45497fe6ddedae699191662a10f67545de Mon Sep 17 00:00:00 2001 From: Leon Liang Date: Fri, 8 May 2020 12:58:01 -0700 Subject: [PATCH 7/9] hot springs, female sign --- src/types/CodepointWidthDetector.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index ce488fd078d..d152a3dfa9e 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -205,12 +205,13 @@ namespace UnicodeRange{ 0x262a, 0x262a, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x262e, 0x262f, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2638, 0x263a, CodepointWidth::Wide }, // OVR - UnicodeRange{ 0x2640, 0x2640, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2640, 0x2640, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2642, 0x2642, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2648, 0x2653, CodepointWidth::Wide }, UnicodeRange{ 0x265f, 0x2660, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2663, 0x2663, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2665, 0x2666, CodepointWidth::Wide }, // OVR + UnicodeRange{ 0x2668, 0x2668, CodepointWidth::Wide }, // OVR UnicodeRange{ 0x2669, 0x266a, CodepointWidth::Ambiguous }, UnicodeRange{ 0x266c, 0x266d, CodepointWidth::Ambiguous }, UnicodeRange{ 0x266f, 0x266f, CodepointWidth::Ambiguous }, From fc0fe0375febebefcbad013a8e2e59f0de033bc1 Mon Sep 17 00:00:00 2001 From: Leon Liang Date: Fri, 8 May 2020 13:05:11 -0700 Subject: [PATCH 8/9] spell check --- .github/actions/spell-check/whitelist/whitelist.txt | 1 + src/types/CodepointWidthDetector.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/actions/spell-check/whitelist/whitelist.txt b/.github/actions/spell-check/whitelist/whitelist.txt index 58da42f6e46..330b8b24b93 100644 --- a/.github/actions/spell-check/whitelist/whitelist.txt +++ b/.github/actions/spell-check/whitelist/whitelist.txt @@ -1671,6 +1671,7 @@ OUTPATHROOT Outptr Ov OVERLAPPEDWINDOW +OVR OWNDC OWNERDRAWFIXED packageuwp diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index d152a3dfa9e..69b815b14a4 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -30,7 +30,7 @@ namespace // // There are a couple of codepoints that Microsoft specifically gave an emoji representation // even if it's not specified as an emoji in the standard. I'll list the ones I'm aware of in this comment in case - // we decide to emojify them in the future: + // we decide to add them in the future: // 0x261A-0x261C, 0x261E-0x261F // 0x2661, // 0x2662, From 9e2ee25ce8a7932a44c6dc4c3a02e3252e45e485 Mon Sep 17 00:00:00 2001 From: Leon Liang Date: Fri, 8 May 2020 13:07:53 -0700 Subject: [PATCH 9/9] wow i can't do math --- src/types/CodepointWidthDetector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index 69b815b14a4..ffb30b57412 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -19,7 +19,7 @@ namespace return range.upperBound < searchTerm; } - static constexpr std::array s_wideAndAmbiguousTable{ + static constexpr std::array s_wideAndAmbiguousTable{ // generated from http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt // anything not present here is presumed to be Narrow. //