From 62b2cdbb5257142e0f3cad2393f8f824c7ccee08 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 21 Mar 2024 18:02:11 +0100 Subject: [PATCH 01/14] Implement grapheme clusters --- .github/actions/spelling/expect/expect.txt | 18 + doc/cascadia/profiles.schema.json | 5 + src/buffer/out/Row.cpp | 62 +- src/buffer/out/Row.hpp | 2 +- src/buffer/out/textBuffer.cpp | 69 +- src/cascadia/TerminalCore/ICoreSettings.idl | 1 + src/cascadia/TerminalCore/Terminal.cpp | 3 + .../GlobalAppSettings.idl | 1 + .../TerminalSettingsModel/MTSMSettings.h | 1 + .../TerminalSettings.cpp | 1 + .../TerminalSettingsModel/TerminalSettings.h | 1 + src/cascadia/inc/ControlProperties.h | 1 + src/features.xml | 10 + src/host/screenInfo.cpp | 17 +- src/host/srvinit.cpp | 28 +- src/host/stream.cpp | 7 - .../ut_host/CodepointWidthDetectorTests.cpp | 89 -- src/host/ut_host/Host.UnitTests.vcxproj | 1 - .../ut_host/Host.UnitTests.vcxproj.filters | 3 - src/host/ut_host/sources | 1 - src/types/CodepointWidthDetector.cpp | 848 ++++++++++- src/types/CodepointWidthDetector_gen.go | 566 +++++++ src/types/GlyphWidth.cpp | 34 +- src/types/inc/CodepointWidthDetector.hpp | 42 +- src/types/inc/GlyphWidth.hpp | 7 - src/types/lib/types.vcxproj.filters | 9 +- .../ut_types/CodepointWidthDetectorTests.cpp | 1331 +++++++++++++++++ .../CodepointWidthDetectorTests_gen.go | 139 ++ src/types/ut_types/Types.Unit.Tests.vcxproj | 1 + src/types/ut_types/sources | 1 + 30 files changed, 2964 insertions(+), 335 deletions(-) delete mode 100644 src/host/ut_host/CodepointWidthDetectorTests.cpp create mode 100644 src/types/CodepointWidthDetector_gen.go create mode 100644 src/types/ut_types/CodepointWidthDetectorTests.cpp create mode 100644 src/types/ut_types/CodepointWidthDetectorTests_gen.go diff --git a/.github/actions/spelling/expect/expect.txt b/.github/actions/spelling/expect/expect.txt index 32dd37878d7..684bfdbd692 100644 --- a/.github/actions/spelling/expect/expect.txt +++ b/.github/actions/spelling/expect/expect.txt @@ -144,6 +144,7 @@ bytebuffer cac cacafire CALLCONV +CANDRABINDU capslock CARETBLINKINGENABLED CARRIAGERETURN @@ -155,6 +156,7 @@ cbiex CBN CBoolean cbt +Ccc CCCBB cch CCHAR @@ -180,6 +182,7 @@ chaof charinfo CHARSETINFO chh +chonker chshdng CHT Cic @@ -598,7 +601,9 @@ FEEF fesb FFAF FFDE +FFFD FFFDb +FFrom fgbg FGCOLOR FGHIJ @@ -617,6 +622,7 @@ FINDDOWN FINDSTRINGEXACT FINDUP FIter +FITZPATRICK FIXEDCONVERTED FIXEDFILEINFO Flg @@ -888,11 +894,13 @@ jconcpp JLO JOBOBJECT JOBOBJECTINFOCLASS +JONGSEONG JPN jsoncpp Jsons jsprovider jumplist +JUNGSEONG KAttrs kawa Kazu @@ -911,6 +919,7 @@ keyups KILLACTIVE KILLFOCUS kinda +KIYEOK KLF KLMNO KLMNOPQRST @@ -1020,6 +1029,7 @@ luma lval LVB LVERTICAL +LVT LWA LWIN lwkmvj @@ -1049,6 +1059,7 @@ mdmerge MDs MEASUREITEM megamix +Meh memallocator meme MENUCHAR @@ -1164,6 +1175,7 @@ NOMINMAX NOMOVE NONALERT nonbreaking +noncharacter nonclient NONINFRINGEMENT NONPREROTATED @@ -1212,6 +1224,7 @@ ntuser NTVDM ntverp nugetversions +NUKTA nullness nullonfailure nullopts @@ -1489,6 +1502,7 @@ renderengine rendersize reparented reparenting +REPH replatformed Replymessage repositorypath @@ -1517,6 +1531,7 @@ rgw RIGHTALIGN RIGHTBUTTON riid +ris RIS roadmap robomac @@ -1883,6 +1898,7 @@ UPDATEDISPLAY UPDOWN UPKEY upss +UPSS uregex URegular usebackq @@ -1925,6 +1941,7 @@ vga vgaoem viewkind viewports +VIRAMA Virt VIRTTERM vkey @@ -2165,6 +2182,7 @@ Zabcdefghijklmn Zabcdefghijklmnopqrstuvwxyz ZCmd ZCtrl +ZWJs zxcvbnm ZYXWVU ZYXWVUTd diff --git a/doc/cascadia/profiles.schema.json b/doc/cascadia/profiles.schema.json index 7225059372a..5f8cf7fbd1d 100644 --- a/doc/cascadia/profiles.schema.json +++ b/doc/cascadia/profiles.schema.json @@ -2344,6 +2344,11 @@ "description": "Force the terminal to use the legacy input encoding. Certain keys in some applications may stop working when enabling this setting.", "type": "boolean" }, + "experimental.graphemes": { + "default": true, + "description": "When set to true, the terminal will use grapheme cluster boundaries for cursor movement. Otherwise, the terminal will use codepoint boundaries.", + "type": "boolean" + }, "experimental.useBackgroundImageForWindow": { "default": false, "description": "When set to true, the background image for the currently focused profile is expanded to encompass the entire window, beneath other panes.", diff --git a/src/buffer/out/Row.cpp b/src/buffer/out/Row.cpp index 4722dc0dfc4..62bc50df499 100644 --- a/src/buffer/out/Row.cpp +++ b/src/buffer/out/Row.cpp @@ -5,10 +5,8 @@ #include "Row.hpp" #include -#include -#include "textBuffer.hpp" -#include "../../types/inc/GlyphWidth.hpp" +#include "../../types/inc/CodepointWidthDetector.hpp" // It would be nice to add checked array access in the future, but it's a little annoying to do so without impacting // performance (including Debug performance). Other languages are a little bit more ergonomic there than C++. @@ -646,60 +644,45 @@ catch (...) // // We can infer the "end" from the amount of columns we're given (colLimit - colBeg), // because ASCII is always 1 column wide per character. - auto it = chars.begin(); - const auto end = it + std::min(chars.size(), colLimit - colBeg); + const auto len = std::min(chars.size(), colLimit - colBeg); size_t ch = chBeg; - while (it != end) + for (size_t off = 0; off < len; ++off) { - if (*it >= 0x80) [[unlikely]] + if (chars[off] >= 0x80) [[unlikely]] { - _replaceTextUnicode(ch, it); + _replaceTextUnicode(ch, off); return; } til::at(row._charOffsets, colEnd) = gsl::narrow_cast(ch); ++colEnd; ++ch; - ++it; } colEndDirty = colEnd; charsConsumed = ch - chBeg; } -[[msvc::forceinline]] void ROW::WriteHelper::_replaceTextUnicode(size_t ch, std::wstring_view::const_iterator it) noexcept +[[msvc::forceinline]] void ROW::WriteHelper::_replaceTextUnicode(size_t ch, size_t off) { - const auto end = chars.end(); + auto& cwd = CodepointWidthDetector::Singleton(); + const auto len = chars.size(); - while (it != end) + // The non-ASCII character we have encountered may be a combining mark, like "a^" which is then displayed as "â". + // In order to recognize both characters as a single grapheme, we need to back up by 1 ASCII character + // and let MeasureNext() find the next proper grapheme boundary. + if (off != 0) { - unsigned int width = 1; - auto ptr = &*it; - const auto wch = *ptr; - size_t advance = 1; - - ++it; - - // Even in our slow-path we can avoid calling IsGlyphFullWidth if the current character is ASCII. - // It also allows us to skip the surrogate pair decoding at the same time. - if (wch >= 0x80) - { - if (til::is_surrogate(wch)) - { - if (it != end && til::is_leading_surrogate(wch) && til::is_trailing_surrogate(*it)) - { - advance = 2; - ++it; - } - else - { - ptr = &UNICODE_REPLACEMENT; - } - } + --colEnd; + --ch; + --off; + } - width = IsGlyphFullWidth({ ptr, advance }) + 1u; - } + while (off < len) + { + int width; + const auto end = cwd.GraphemeNext(chars, off, &width); const auto colEndNew = gsl::narrow_cast(colEnd + width); if (colEndNew > colLimit) @@ -719,7 +702,8 @@ catch (...) til::at(row._charOffsets, colEnd++) = gsl::narrow_cast(ch | CharOffsetsTrailer); } - ch += advance; + ch += end - off; + off = end; } colEndDirty = colEnd; @@ -1062,7 +1046,7 @@ std::wstring_view ROW::GetText() const noexcept std::wstring_view ROW::GetText(til::CoordType columnBegin, til::CoordType columnEnd) const noexcept { - const til::CoordType columns = _columnCount; + const auto columns = GetReadableColumnCount(); const auto colBeg = clamp(columnBegin, 0, columns); const auto colEnd = clamp(columnEnd, colBeg, columns); const size_t chBeg = _uncheckedCharOffset(gsl::narrow_cast(colBeg)); diff --git a/src/buffer/out/Row.hpp b/src/buffer/out/Row.hpp index 197343df6d8..bd39c8461e0 100644 --- a/src/buffer/out/Row.hpp +++ b/src/buffer/out/Row.hpp @@ -181,7 +181,7 @@ class ROW final bool IsValid() const noexcept; void ReplaceCharacters(til::CoordType width) noexcept; void ReplaceText() noexcept; - void _replaceTextUnicode(size_t ch, std::wstring_view::const_iterator it) noexcept; + void _replaceTextUnicode(size_t ch, size_t off); void CopyTextFrom(const std::span& charOffsets) noexcept; static void _copyOffsets(uint16_t* dst, const uint16_t* src, uint16_t size, uint16_t offset) noexcept; void Finish(); diff --git a/src/buffer/out/textBuffer.cpp b/src/buffer/out/textBuffer.cpp index 5c71dd1d398..f19d031771d 100644 --- a/src/buffer/out/textBuffer.cpp +++ b/src/buffer/out/textBuffer.cpp @@ -2,16 +2,13 @@ // Licensed under the MIT license. #include "precomp.h" - #include "textBuffer.hpp" #include -#include #include "UTextAdapter.h" -#include "../../types/inc/GlyphWidth.hpp" +#include "../../types/inc/CodepointWidthDetector.hpp" #include "../renderer/base/renderer.hpp" -#include "../types/inc/convert.hpp" #include "../types/inc/utils.hpp" using namespace Microsoft::Console; @@ -408,17 +405,17 @@ void TextBuffer::_PrepareForDoubleByteSequence(const DbcsAttribute dbcsAttribute // Given the character offset `position` in the `chars` string, this function returns the starting position of the next grapheme. // For instance, given a `chars` of L"x\uD83D\uDE42y" and a `position` of 1 it'll return 3. // GraphemePrev would do the exact inverse of this operation. -// In the future, these functions are expected to also deliver information about how many columns a grapheme occupies. -// (I know that mere UTF-16 code point iteration doesn't handle graphemes, but that's what we're working towards.) size_t TextBuffer::GraphemeNext(const std::wstring_view& chars, size_t position) noexcept { - return til::utf16_iterate_next(chars, position); + auto& cwd = CodepointWidthDetector::Singleton(); + return cwd.GraphemeNext(chars, position, nullptr); } // It's the counterpart to GraphemeNext. See GraphemeNext. size_t TextBuffer::GraphemePrev(const std::wstring_view& chars, size_t position) noexcept { - return til::utf16_iterate_prev(chars, position); + auto& cwd = CodepointWidthDetector::Singleton(); + return cwd.GraphemePrev(chars, position, nullptr); } // Ever wondered how much space a piece of text needs before inserting it? This function will tell you! @@ -445,7 +442,7 @@ size_t TextBuffer::FitTextIntoColumns(const std::wstring_view& chars, til::Coord { } - const auto dist = gsl::narrow_cast(it - beg); + auto dist = gsl::narrow_cast(it - beg); auto col = gsl::narrow_cast(dist); if (it == asciiEnd) [[likely]] @@ -455,33 +452,23 @@ size_t TextBuffer::FitTextIntoColumns(const std::wstring_view& chars, til::Coord } // Unicode slow-path where we need to count text and columns separately. - for (;;) - { - auto ptr = &*it; - const auto wch = *ptr; - size_t len = 1; - - col++; + auto& cwd = CodepointWidthDetector::Singleton(); + const auto len = chars.size(); - // Even in our slow-path we can avoid calling IsGlyphFullWidth if the current character is ASCII. - // It also allows us to skip the surrogate pair decoding at the same time. - if (wch >= 0x80) - { - if (til::is_surrogate(wch)) - { - const auto it2 = it + 1; - if (til::is_leading_surrogate(wch) && it2 != end && til::is_trailing_surrogate(*it2)) - { - len = 2; - } - else - { - ptr = &UNICODE_REPLACEMENT; - } - } + // The non-ASCII character we have encountered may be a combining mark, like "a^" which is then displayed as "â". + // In order to recognize both characters as a single grapheme, we need to back up by 1 ASCII character + // and let GraphemeNext() find the next proper grapheme boundary. + if (dist != 0) + { + dist--; + col--; + } - col += IsGlyphFullWidth({ ptr, len }); - } + while (dist < len) + { + int width; + dist = cwd.GraphemeNext(chars, dist, &width); + col += width; // If we ran out of columns, we need to always return `columnLimit` and not `cols`, // because if we tried inserting a wide glyph into just 1 remaining column it will @@ -490,17 +477,13 @@ size_t TextBuffer::FitTextIntoColumns(const std::wstring_view& chars, til::Coord if (col > columnLimit) { columns = columnLimit; - return gsl::narrow_cast(it - beg); - } - - // But if we simply ran out of text we just need to return the actual number of columns. - it += len; - if (it == end) - { - columns = col; - return chars.size(); + return dist; } } + + // But if we simply ran out of text we just need to return the actual number of columns. + columns = col; + return chars.size(); } // Pretend as if `position` is a regular cursor in the TextBuffer. diff --git a/src/cascadia/TerminalCore/ICoreSettings.idl b/src/cascadia/TerminalCore/ICoreSettings.idl index 1f3c78aa09b..6b6b4b4c3f5 100644 --- a/src/cascadia/TerminalCore/ICoreSettings.idl +++ b/src/cascadia/TerminalCore/ICoreSettings.idl @@ -20,6 +20,7 @@ namespace Microsoft.Terminal.Core String WordDelimiters; Boolean ForceVTInput; + Boolean Graphemes; Boolean TrimBlockSelection; Boolean DetectURLs; Boolean VtPassthrough; diff --git a/src/cascadia/TerminalCore/Terminal.cpp b/src/cascadia/TerminalCore/Terminal.cpp index 6eabcdab072..b13acc3ff11 100644 --- a/src/cascadia/TerminalCore/Terminal.cpp +++ b/src/cascadia/TerminalCore/Terminal.cpp @@ -14,6 +14,8 @@ #include #include +#include "../../types/inc/CodepointWidthDetector.hpp" + using namespace winrt::Microsoft::Terminal::Core; using namespace Microsoft::Terminal::Core; using namespace Microsoft::Console; @@ -97,6 +99,7 @@ void Terminal::UpdateSettings(ICoreSettings settings) _autoMarkPrompts = settings.AutoMarkPrompts(); _getTerminalInput().ForceDisableWin32InputMode(settings.ForceVTInput()); + CodepointWidthDetector::Singleton().SetEnableGraphemes(settings.Graphemes()); if (settings.TabColor() == nullptr) { diff --git a/src/cascadia/TerminalSettingsModel/GlobalAppSettings.idl b/src/cascadia/TerminalSettingsModel/GlobalAppSettings.idl index 612262d4b11..038f1afec53 100644 --- a/src/cascadia/TerminalSettingsModel/GlobalAppSettings.idl +++ b/src/cascadia/TerminalSettingsModel/GlobalAppSettings.idl @@ -80,6 +80,7 @@ namespace Microsoft.Terminal.Settings.Model INHERITABLE_SETTING(Boolean, SoftwareRendering); INHERITABLE_SETTING(Boolean, UseBackgroundImageForWindow); INHERITABLE_SETTING(Boolean, ForceVTInput); + INHERITABLE_SETTING(Boolean, Graphemes); INHERITABLE_SETTING(Boolean, DebugFeaturesEnabled); INHERITABLE_SETTING(Boolean, StartOnUserLogin); INHERITABLE_SETTING(Boolean, AlwaysOnTop); diff --git a/src/cascadia/TerminalSettingsModel/MTSMSettings.h b/src/cascadia/TerminalSettingsModel/MTSMSettings.h index cb8eb8bcdff..16124774453 100644 --- a/src/cascadia/TerminalSettingsModel/MTSMSettings.h +++ b/src/cascadia/TerminalSettingsModel/MTSMSettings.h @@ -28,6 +28,7 @@ Author(s): X(bool, SoftwareRendering, "experimental.rendering.software", false) \ X(bool, UseBackgroundImageForWindow, "experimental.useBackgroundImageForWindow", false) \ X(bool, ForceVTInput, "experimental.input.forceVT", false) \ + X(bool, Graphemes, "experimental.graphemes", Feature_Graphemes::IsEnabled()) \ X(bool, TrimBlockSelection, "trimBlockSelection", true) \ X(bool, DetectURLs, "experimental.detectURLs", true) \ X(bool, AlwaysShowTabs, "alwaysShowTabs", true) \ diff --git a/src/cascadia/TerminalSettingsModel/TerminalSettings.cpp b/src/cascadia/TerminalSettingsModel/TerminalSettings.cpp index e5c07a71103..c46a11f9cf6 100644 --- a/src/cascadia/TerminalSettingsModel/TerminalSettings.cpp +++ b/src/cascadia/TerminalSettingsModel/TerminalSettings.cpp @@ -364,6 +364,7 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation _SoftwareRendering = globalSettings.SoftwareRendering(); _UseBackgroundImageForWindow = globalSettings.UseBackgroundImageForWindow(); _ForceVTInput = globalSettings.ForceVTInput(); + _Graphemes = globalSettings.Graphemes(); _TrimBlockSelection = globalSettings.TrimBlockSelection(); _DetectURLs = globalSettings.DetectURLs(); _EnableUnfocusedAcrylic = globalSettings.EnableUnfocusedAcrylic(); diff --git a/src/cascadia/TerminalSettingsModel/TerminalSettings.h b/src/cascadia/TerminalSettingsModel/TerminalSettings.h index 184e05ac8f9..70c751e563d 100644 --- a/src/cascadia/TerminalSettingsModel/TerminalSettings.h +++ b/src/cascadia/TerminalSettingsModel/TerminalSettings.h @@ -159,6 +159,7 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation INHERITABLE_SETTING(Model::TerminalSettings, bool, SoftwareRendering, false); INHERITABLE_SETTING(Model::TerminalSettings, bool, UseBackgroundImageForWindow, false); INHERITABLE_SETTING(Model::TerminalSettings, bool, ForceVTInput, false); + INHERITABLE_SETTING(Model::TerminalSettings, bool, Graphemes, false); INHERITABLE_SETTING(Model::TerminalSettings, hstring, PixelShaderPath); INHERITABLE_SETTING(Model::TerminalSettings, hstring, PixelShaderImagePath); diff --git a/src/cascadia/inc/ControlProperties.h b/src/cascadia/inc/ControlProperties.h index a7700b04958..81f34dd384f 100644 --- a/src/cascadia/inc/ControlProperties.h +++ b/src/cascadia/inc/ControlProperties.h @@ -46,6 +46,7 @@ X(bool, TrimBlockSelection, true) \ X(bool, SuppressApplicationTitle) \ X(bool, ForceVTInput, false) \ + X(bool, Graphemes, false) \ X(winrt::hstring, StartingTitle) \ X(bool, DetectURLs, true) \ X(bool, VtPassthrough, false) \ diff --git a/src/features.xml b/src/features.xml index e8fd44179d3..62e2c067e29 100644 --- a/src/features.xml +++ b/src/features.xml @@ -169,4 +169,14 @@ + + Feature_Graphemes + Enables support for grapheme clusters + AlwaysDisabled + + Dev + Canary + + + diff --git a/src/host/screenInfo.cpp b/src/host/screenInfo.cpp index 9b0ff2ca4e4..f96c728b09e 100644 --- a/src/host/screenInfo.cpp +++ b/src/host/screenInfo.cpp @@ -2,24 +2,13 @@ // Licensed under the MIT license. #include "precomp.h" - #include "screenInfo.hpp" -#include "dbcs.h" -#include "output.h" -#include "_output.h" -#include "misc.h" -#include "handle.h" -#include +#include "output.h" #include "../interactivity/inc/ServiceLocator.hpp" -#include "../types/inc/Viewport.hpp" -#include "../types/inc/GlyphWidth.hpp" -#include "../terminal/parser/OutputStateMachineEngine.hpp" - +#include "../types/inc/CodepointWidthDetector.hpp" #include "../types/inc/convert.hpp" -#pragma hdrstop - using namespace Microsoft::Console; using namespace Microsoft::Console::Types; using namespace Microsoft::Console::Render; @@ -533,7 +522,7 @@ void SCREEN_INFORMATION::RefreshFontWithRenderer() GetDesiredFont(), GetCurrentFont()); - NotifyGlyphWidthFontChanged(); + CodepointWidthDetector::Singleton().ClearFallbackCache(); } } } diff --git a/src/host/srvinit.cpp b/src/host/srvinit.cpp index 3fda2556ca8..a29c31ad6fb 100644 --- a/src/host/srvinit.cpp +++ b/src/host/srvinit.cpp @@ -2,33 +2,18 @@ // Licensed under the MIT license. #include "precomp.h" - #include "srvinit.h" #include "dbcs.h" #include "handle.h" #include "registry.hpp" #include "renderFontDefaults.hpp" - -#include "ApiRoutines.h" - -#include "../types/inc/GlyphWidth.hpp" - -#include "../server/DeviceHandle.h" -#include "../server/Entrypoints.h" -#include "../server/IoSorter.h" - -#include "../interactivity/inc/ISystemConfigurationProvider.hpp" -#include "../interactivity/inc/ServiceLocator.hpp" #include "../interactivity/base/ApiDetector.hpp" #include "../interactivity/base/RemoteConsoleControl.hpp" - -#include "renderData.hpp" -#include "../renderer/base/renderer.hpp" - -#include "../inc/conint.h" - -#include "tracing.hpp" +#include "../interactivity/inc/ServiceLocator.hpp" +#include "../server/DeviceHandle.h" +#include "../server/IoSorter.h" +#include "../types/inc/CodepointWidthDetector.hpp" #if TIL_FEATURE_RECEIVEINCOMINGHANDOFF_ENABLED #include "ITerminalHandoff.h" @@ -882,8 +867,9 @@ PWSTR TranslateConsoleTitle(_In_ PCWSTR pwszConsoleTitle, const BOOL fUnexpand, // Set up the renderer to be used to calculate the width of a glyph, // should we be unable to figure out its width another way. - auto pfn = std::bind(&Renderer::IsGlyphWideByFont, static_cast(g.pRender), std::placeholders::_1); - SetGlyphWidthFallback(pfn); + CodepointWidthDetector::Singleton().SetFallbackMethod([](const std::wstring_view& glyph) { + return ServiceLocator::LocateGlobals().pRender->IsGlyphWideByFont(glyph); + }); } catch (...) { diff --git a/src/host/stream.cpp b/src/host/stream.cpp index 11a7f6a4115..62a099f4784 100644 --- a/src/host/stream.cpp +++ b/src/host/stream.cpp @@ -2,18 +2,11 @@ // Licensed under the MIT license. #include "precomp.h" - -#include "_stream.h" #include "stream.h" #include "handle.h" #include "misc.h" #include "readDataRaw.hpp" - -#include "ApiRoutines.h" - -#include "../types/inc/GlyphWidth.hpp" - #include "../interactivity/inc/ServiceLocator.hpp" using Microsoft::Console::Interactivity::ServiceLocator; diff --git a/src/host/ut_host/CodepointWidthDetectorTests.cpp b/src/host/ut_host/CodepointWidthDetectorTests.cpp deleted file mode 100644 index 9f433eb7c36..00000000000 --- a/src/host/ut_host/CodepointWidthDetectorTests.cpp +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -#include "precomp.h" -#include "WexTestClass.h" -#include "../../inc/consoletaeftemplates.hpp" -#include "CommonState.hpp" - -#include "../types/inc/CodepointWidthDetector.hpp" - -using namespace WEX::Logging; - -static constexpr std::wstring_view emoji = L"\xD83E\xDD22"; // U+1F922 nauseated face - -static constexpr std::wstring_view ambiguous = L"\x414"; // U+0414 cyrillic capital de - -// codepoint and utf16 encoded string -static const std::vector> testData = { - { 0x7, L"\a", CodepointWidth::Narrow }, // BEL - { 0x20, L" ", CodepointWidth::Narrow }, - { 0x39, L"9", CodepointWidth::Narrow }, - { 0x414, L"\x414", CodepointWidth::Narrow }, // U+0414 cyrillic capital de - { 0x1104, L"\x1104", CodepointWidth::Wide }, // U+1104 hangul choseong ssangtikeut - { 0x306A, L"\x306A", CodepointWidth::Wide }, // U+306A hiragana na - { 0x30CA, L"\x30CA", CodepointWidth::Wide }, // U+30CA katakana na - { 0x72D7, L"\x72D7", CodepointWidth::Wide }, // U+72D7 - { 0x1F47E, L"\xD83D\xDC7E", CodepointWidth::Wide }, // U+1F47E alien monster - { 0x1F51C, L"\xD83D\xDD1C", CodepointWidth::Wide } // U+1F51C SOON -}; - -class CodepointWidthDetectorTests -{ - TEST_CLASS(CodepointWidthDetectorTests); - - TEST_METHOD(CanLookUpEmoji) - { - CodepointWidthDetector widthDetector; - VERIFY_IS_TRUE(widthDetector.IsWide(emoji)); - } - - TEST_METHOD(CanGetWidths) - { - CodepointWidthDetector widthDetector; - for (const auto& data : testData) - { - const auto& expected = std::get<2>(data); - const auto& wstr = std::get<1>(data); - const auto result = widthDetector.GetWidth({ wstr.c_str(), wstr.size() }); - VERIFY_ARE_EQUAL(result, expected); - } - } - - static bool FallbackMethod(const std::wstring_view glyph) - { - if (glyph.size() < 1) - { - return false; - } - else - { - return (glyph.at(0) % 2) == 1; - } - } - - TEST_METHOD(AmbiguousCache) - { - // Set up a detector with fallback. - CodepointWidthDetector widthDetector; - widthDetector.SetFallbackMethod(std::bind(&FallbackMethod, std::placeholders::_1)); - - // Ensure fallback cache is empty. - VERIFY_ARE_EQUAL(0u, widthDetector._fallbackCache.size()); - - // Lookup ambiguous width character. - widthDetector.IsWide(ambiguous); - - // Cache should hold it. - VERIFY_ARE_EQUAL(1u, widthDetector._fallbackCache.size()); - - // Cached item should match what we expect - const auto it = widthDetector._fallbackCache.begin(); - VERIFY_ARE_EQUAL(ambiguous[0], it->first); - VERIFY_ARE_EQUAL(FallbackMethod(ambiguous) ? 2u : 1u, it->second); - - // Cache should empty when font changes. - widthDetector.NotifyFontChanged(); - VERIFY_ARE_EQUAL(0u, widthDetector._fallbackCache.size()); - } -}; diff --git a/src/host/ut_host/Host.UnitTests.vcxproj b/src/host/ut_host/Host.UnitTests.vcxproj index d464570c788..89d7e2a65e0 100644 --- a/src/host/ut_host/Host.UnitTests.vcxproj +++ b/src/host/ut_host/Host.UnitTests.vcxproj @@ -15,7 +15,6 @@ - diff --git a/src/host/ut_host/Host.UnitTests.vcxproj.filters b/src/host/ut_host/Host.UnitTests.vcxproj.filters index 9e61fb41067..9c946aa560f 100644 --- a/src/host/ut_host/Host.UnitTests.vcxproj.filters +++ b/src/host/ut_host/Host.UnitTests.vcxproj.filters @@ -69,9 +69,6 @@ Source Files - - Source Files - Source Files diff --git a/src/host/ut_host/sources b/src/host/ut_host/sources index 1312b3e63fa..799bb24a949 100644 --- a/src/host/ut_host/sources +++ b/src/host/ut_host/sources @@ -20,7 +20,6 @@ SOURCES = \ HistoryTests.cpp \ UtilsTests.cpp \ ConsoleArgumentsTests.cpp \ - CodepointWidthDetectorTests.cpp \ DbcsTests.cpp \ ScreenBufferTests.cpp \ TextBufferIteratorTests.cpp \ diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index 6848aa5b616..15218f04c5c 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -4,6 +4,13 @@ #include "precomp.h" #include "inc/CodepointWidthDetector.hpp" +// We know that ucdToClusterBreak() can never return anything >=CB_COUNT, but the compiler doesn't. +#pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4). +#pragma warning(disable : 26482) // Only index into arrays using constant expressions (bounds.2). +// ICU doesn't play well with MSVC's linter. +#pragma warning(disable : 26476) // Expression/symbol '...' uses a naked union '...' with multiple type pointers: Use variant instead (type.7). +#pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). + namespace { // used to store range data in CodepointWidthDetector's internal map @@ -20,11 +27,11 @@ namespace } // Generated by Generate-CodepointWidthsFromUCD.ps1 -Pack:True -Full: -NoOverrides:False - // on 2022-11-15 19:54:23Z from Unicode 15.0.0. - // 321149 (0x4E67D) codepoints covered. + // on 2024-03-20 12:57:23Z from Unicode 15.1.0. + // 321154 (0x4E682) codepoints covered. // 240 (0xF0) codepoints overridden. // Override path: .\src\types\unicode_width_overrides.xml - static constexpr std::array s_wideAndAmbiguousTable{ + static constexpr std::array s_wideAndAmbiguousTable{ UnicodeRange{ 0xa1, 0xa1, 1 }, UnicodeRange{ 0xa4, 0xa4, 1 }, UnicodeRange{ 0xa7, 0xa8, 1 }, @@ -226,14 +233,13 @@ namespace UnicodeRange{ 0x2e80, 0x2e99, 0 }, UnicodeRange{ 0x2e9b, 0x2ef3, 0 }, UnicodeRange{ 0x2f00, 0x2fd5, 0 }, - UnicodeRange{ 0x2ff0, 0x2ffb, 0 }, - UnicodeRange{ 0x3000, 0x303e, 0 }, + UnicodeRange{ 0x2ff0, 0x303e, 0 }, UnicodeRange{ 0x3041, 0x3096, 0 }, UnicodeRange{ 0x3099, 0x30ff, 0 }, UnicodeRange{ 0x3105, 0x312f, 0 }, UnicodeRange{ 0x3131, 0x318e, 0 }, UnicodeRange{ 0x3190, 0x31e3, 0 }, - UnicodeRange{ 0x31f0, 0x321e, 0 }, + UnicodeRange{ 0x31ef, 0x321e, 0 }, UnicodeRange{ 0x3220, 0x3247, 0 }, UnicodeRange{ 0x3248, 0x324f, 1 }, UnicodeRange{ 0x3250, 0x4dbf, 0 }, @@ -325,68 +331,793 @@ namespace }; } -// Routine Description: -// - returns the width type of codepoint as fast as we can by using quick lookup table and fallback cache. -// Arguments: -// - glyph - the utf16 encoded codepoint to search for -// Return Value: -// - the width type of the codepoint -CodepointWidth CodepointWidthDetector::GetWidth(const std::wstring_view& glyph) noexcept +// s_stage1/2/3/4 represents a multi-stage table, aka trie. +// The highest bits of the codepoint are an index into s_stage1, which selects a row in s_stage2. +// The next couple bits of the codepoint then select the column in that row. +// This continues until the last stage which contains the final value. +// +// Fundamentally, the trie is generated by taking all 1114112 codepoints and their assigned values and deduplicating +// chunks of e.g. 16 values each. Each deduplicated chunk is assigned its offset in the list of all deduplicated chunks. +// This results in two lists: 1114112/16=7132 IDs and however many deduplicated chunks you have accumulated. +// This is often called a two-stage table. +// +// If you want to look up the value now, you'll first find the deduplicated chunk offset via `offsets[codepoint / 16]`. +// This gives you the location of your chunk. Now you just look up the value with `values[offset + (codepoint & 15)]`. +// +// Since the 7132 offsets take up a lot more space than the deduplicated values (at least in case of the Unicode database), +// this process can be repeated by compressing the offset array the exact same way the values got compressed and so on. + +// s_joinRules represents the UAX #29 extended grapheme cluster rules, however slightly modified to fit our needs. +// Specifically, UAX #29 states: +// > Note: Testing two adjacent characters is insufficient for determining a boundary. +// +// I completely agree, but I really hate it. So this code trades off correctness for simplicity +// by using a simple lookup table anyway. Under most circumstances users won't notice, +// because as far as I can see this only behaves different for degenerate ("invalid") Unicode. +// It reduces our code complexity significantly and is way *way* faster. +// +// This is a great reference for the s_joinRules table: +// https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.html + +// Generated by CodepointWidthDetector_gen.go +// on 2024-03-20T21:19:52Z, from Unicode 15.1.0, 8414 bytes +// clang-format off +static constexpr uint16_t s_stage1[] = { + 0x0000, 0x0020, 0x0040, 0x0060, 0x0080, 0x009f, 0x00bf, 0x00ca, 0x00ca, 0x00d3, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, + 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00eb, 0x010b, 0x011d, 0x0121, 0x011e, 0x011b, 0x0126, 0x0146, 0x0166, 0x0166, 0x0166, 0x0182, + 0x01a2, 0x01ba, 0x01da, 0x01fa, 0x0146, 0x0146, 0x0218, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x022d, 0x00ca, 0x00ca, + 0x024d, 0x026d, 0x0146, 0x0146, 0x0146, 0x0282, 0x02a2, 0x02b0, 0x0146, 0x02c3, 0x02e1, 0x02f9, 0x0319, 0x0336, 0x0356, 0x0376, + 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, + 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x0396, + 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, + 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x0396, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x03b6, 0x03be, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, + 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x03de, + 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, + 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x03de, +}; +static constexpr uint16_t s_stage2[] = { + 0x0000, 0x0004, 0x000c, 0x0014, 0x001c, 0x0024, 0x002a, 0x0031, 0x002a, 0x0037, 0x002a, 0x003f, 0x0047, 0x0049, 0x004f, 0x0057, 0x005f, 0x0065, 0x006d, 0x002a, 0x002a, 0x002a, 0x0073, 0x007b, 0x0083, 0x008a, 0x002a, 0x0091, 0x0098, 0x009f, 0x00a3, 0x00aa, + 0x00b2, 0x00b8, 0x00be, 0x00c5, 0x00cd, 0x00d5, 0x00dd, 0x00e5, 0x00dd, 0x00ed, 0x00dd, 0x00f5, 0x00dd, 0x00fd, 0x0105, 0x010d, 0x0115, 0x011d, 0x00dd, 0x0125, 0x012d, 0x0135, 0x013d, 0x0144, 0x014b, 0x0153, 0x0155, 0x015d, 0x0162, 0x006f, 0x016a, 0x0172, + 0x0175, 0x017d, 0x0185, 0x002a, 0x018d, 0x0191, 0x0195, 0x019a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x01a2, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x01a8, 0x01af, 0x01b6, 0x01bd, + 0x01c4, 0x002a, 0x01cc, 0x002a, 0x01d2, 0x002a, 0x002a, 0x002a, 0x01da, 0x01e0, 0x01e8, 0x01ef, 0x01f7, 0x01ff, 0x0207, 0x020d, 0x0214, 0x002a, 0x002a, 0x021b, 0x002a, 0x002a, 0x002a, 0x0047, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, + 0x0223, 0x022b, 0x0233, 0x0239, 0x0241, 0x0249, 0x0251, 0x0259, 0x0261, 0x0269, 0x0271, 0x002a, 0x0279, 0x002a, 0x0280, 0x0287, 0x002a, 0x028f, 0x0293, 0x029b, 0x002a, 0x002a, 0x02a3, 0x02ab, 0x02b3, 0x02bb, 0x02c3, 0x02cb, 0x02d3, 0x02db, 0x02e3, 0x002a, + 0x002a, 0x002a, 0x002a, 0x02eb, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x02f3, 0x02f9, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x02fd, 0x002a, 0x0304, 0x002a, 0x0043, 0x002a, 0x002a, 0x030c, 0x0310, 0x0318, 0x0318, 0x0318, 0x031e, 0x0324, + 0x032c, 0x0332, 0x0318, 0x033a, 0x0318, 0x0341, 0x0345, 0x034b, 0x0352, 0x0358, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, + 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x002a, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x035f, 0x0367, 0x002a, + 0x002a, 0x002a, 0x002a, 0x002a, 0x036a, 0x0372, 0x015f, 0x002a, 0x002a, 0x002a, 0x002a, 0x037a, 0x002a, 0x0382, 0x038a, 0x0392, 0x039a, 0x03a2, 0x03aa, 0x03af, 0x03b7, 0x03bf, 0x03c7, 0x002a, 0x002a, 0x002a, 0x03ce, 0x03d6, 0x03d7, 0x03d8, 0x03d9, 0x03da, + 0x03db, 0x03dc, 0x03d6, 0x03d7, 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03d6, 0x03d7, 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03d6, 0x03d7, 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03d6, 0x03d7, 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03d6, 0x03d7, + 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03e3, 0x03eb, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, + 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, + 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x03f3, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x03fb, 0x0401, 0x002a, 0x0407, 0x032c, 0x040f, + 0x0414, 0x0418, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0420, 0x002a, 0x002a, 0x002a, 0x0428, 0x002a, 0x042d, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, + 0x002a, 0x002a, 0x0435, 0x002a, 0x002a, 0x01c8, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x043d, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0442, 0x0448, 0x002a, 0x00a7, 0x0450, 0x002a, 0x0458, 0x0460, 0x0468, 0x0470, 0x0478, 0x0480, + 0x0488, 0x0490, 0x0493, 0x049b, 0x002a, 0x04a0, 0x012d, 0x04a8, 0x002a, 0x002a, 0x04af, 0x04b7, 0x01e8, 0x04bf, 0x002a, 0x002a, 0x04c2, 0x04ca, 0x01e8, 0x04d2, 0x04d5, 0x002a, 0x04dc, 0x002a, 0x002a, 0x002a, 0x04e2, 0x002a, 0x002a, 0x002a, 0x04ea, 0x04f2, + 0x002a, 0x04f8, 0x0500, 0x0508, 0x0510, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0514, 0x002a, 0x051c, 0x002a, 0x0523, 0x052b, 0x0532, 0x002a, 0x002a, 0x002a, 0x002a, 0x0535, 0x053d, 0x0545, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, + 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0547, 0x054f, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x01f1, 0x0552, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, + 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0559, 0x0560, 0x0564, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, + 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x056c, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, + 0x0574, 0x057c, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, + 0x002a, 0x057e, 0x0318, 0x0318, 0x0318, 0x0318, 0x0586, 0x058d, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0593, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, + 0x002a, 0x002a, 0x059b, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x05a3, + 0x05ab, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x05af, 0x05b7, 0x002a, 0x002a, 0x05bf, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, + 0x002a, 0x05c7, 0x05cf, 0x05d7, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x05df, 0x002a, 0x05e6, 0x002a, 0x0552, 0x002a, 0x002a, + 0x002a, 0x002a, 0x002a, 0x05e9, 0x05ef, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x05ef, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x05f5, 0x002a, 0x05fd, 0x002a, 0x002a, 0x002a, 0x002a, + 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0605, 0x0606, 0x0606, 0x060d, 0x0615, 0x061b, 0x0623, 0x0629, 0x0631, 0x0639, + 0x0606, 0x0606, 0x0641, 0x0648, 0x0650, 0x0657, 0x065f, 0x0667, 0x0668, 0x0669, 0x0671, 0x0679, 0x0681, 0x0686, 0x0668, 0x068e, 0x0668, 0x0696, 0x002a, 0x069e, 0x002a, 0x06a6, 0x06ae, 0x06b5, 0x06bc, 0x0606, 0x06c4, 0x06cc, 0x0668, 0x0668, 0x0606, 0x06d4, + 0x06dc, 0x06e4, 0x002a, 0x002a, 0x002a, 0x002a, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x06ec, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, + 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0319, 0x06f4, 0x0047, 0x06fc, 0x06fc, 0x0047, 0x0047, 0x0047, 0x0704, 0x06fc, 0x06fc, + 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x0293, 0x0293, + 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x070c, +}; +static constexpr uint16_t s_stage3[] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0011, 0x0018, 0x0020, 0x0022, 0x002a, 0x0008, 0x0029, 0x0030, + 0x0036, 0x003e, 0x0040, 0x0047, 0x004c, 0x0008, 0x004a, 0x002d, + 0x004e, 0x002d, 0x0053, 0x0029, 0x005b, 0x0063, 0x0034, 0x0008, + 0x004e, 0x002d, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x002a, 0x006b, 0x0049, 0x0008, 0x0008, 0x0008, + 0x0008, 0x004c, 0x0008, 0x004c, 0x0008, 0x0008, 0x0008, 0x0072, + 0x005a, 0x0079, 0x0081, 0x0008, 0x0008, 0x0008, 0x0008, 0x0089, + 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0008, + 0x0008, 0x0091, 0x0092, 0x0098, 0x0028, 0x0091, 0x0092, 0x0098, + 0x0028, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x004c, + 0x0008, 0x0092, 0x0092, 0x0092, 0x0092, 0x0092, 0x0092, 0x004c, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a0, 0x00a6, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00ad, 0x0089, 0x0089, + 0x0089, 0x0089, 0x00af, 0x00b5, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x00bd, 0x0008, 0x0089, 0x00c5, 0x0008, + 0x0008, 0x0008, 0x0008, 0x00a0, 0x0089, 0x0089, 0x0008, 0x0008, + 0x00cd, 0x0008, 0x0008, 0x00a8, 0x00d5, 0x00dc, 0x00e3, 0x0008, + 0x0008, 0x00e9, 0x00cc, 0x0008, 0x0008, 0x0008, 0x0089, 0x0089, + 0x00a5, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a8, + 0x0089, 0x00cd, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a0, + 0x00a4, 0x00f1, 0x0008, 0x0008, 0x00a8, 0x00f9, 0x00fd, 0x00a2, + 0x0008, 0x0008, 0x0008, 0x0101, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0109, 0x0089, 0x0008, 0x0008, 0x0008, 0x0008, 0x00ac, 0x0089, + 0x0089, 0x0111, 0x0089, 0x0089, 0x0089, 0x00a4, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0119, 0x0089, 0x011f, 0x00ad, + 0x0008, 0x0127, 0x0008, 0x0008, 0x0008, 0x0101, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x012b, 0x00dc, 0x0132, 0x00ce, + 0x0008, 0x0127, 0x0008, 0x0008, 0x0138, 0x0140, 0x0147, 0x00cc, + 0x0008, 0x0008, 0x0008, 0x014b, 0x0008, 0x00af, 0x0153, 0x0008, + 0x0008, 0x0127, 0x0008, 0x0008, 0x00ac, 0x00dc, 0x0132, 0x00a9, + 0x0008, 0x0127, 0x0008, 0x0008, 0x0008, 0x00cb, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x00a8, 0x015b, 0x00e3, 0x00ce, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a3, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x012b, 0x00b0, 0x0162, 0x0168, + 0x0008, 0x0127, 0x0008, 0x0008, 0x0008, 0x00b0, 0x00e3, 0x0168, + 0x0008, 0x0127, 0x0008, 0x00ca, 0x0008, 0x00a4, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x015e, 0x00b0, 0x016e, 0x00ce, + 0x0008, 0x0127, 0x0008, 0x0008, 0x0008, 0x0101, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0175, 0x017c, 0x0089, + 0x0008, 0x0008, 0x0127, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0181, 0x00a5, 0x00ce, 0x008a, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0181, 0x00a3, 0x0008, 0x008a, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a6, 0x0008, 0x0008, + 0x0139, 0x0176, 0x00b0, 0x00a9, 0x0089, 0x00ad, 0x0089, 0x0089, + 0x0089, 0x00a3, 0x0138, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0189, 0x0089, 0x018d, 0x0008, 0x0008, 0x00a8, + 0x0192, 0x0199, 0x01a0, 0x00e4, 0x0008, 0x01a6, 0x01ad, 0x0008, + 0x01b5, 0x0008, 0x0008, 0x0008, 0x0008, 0x01bd, 0x01bd, 0x01bd, + 0x01bd, 0x01bd, 0x01bd, 0x01bd, 0x01bd, 0x01c5, 0x01c5, 0x01c5, + 0x01c5, 0x01c5, 0x01cd, 0x01cd, 0x01cd, 0x01cd, 0x01cd, 0x01cd, + 0x01cd, 0x01cd, 0x0008, 0x0008, 0x0008, 0x00a9, 0x0008, 0x0008, + 0x0008, 0x0008, 0x01d5, 0x0008, 0x0008, 0x0008, 0x0148, 0x0008, + 0x0008, 0x0127, 0x0008, 0x0008, 0x0008, 0x0127, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x00aa, 0x0089, 0x0089, 0x00a4, + 0x00f1, 0x0008, 0x0008, 0x0008, 0x0008, 0x01db, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0168, 0x0008, 0x0008, 0x0008, + 0x0008, 0x00cc, 0x0008, 0x0008, 0x0008, 0x0008, 0x0089, 0x00a4, + 0x0089, 0x00a4, 0x0008, 0x0008, 0x00ce, 0x00a4, 0x0008, 0x0008, + 0x0008, 0x0008, 0x00a9, 0x008a, 0x01e2, 0x0089, 0x0089, 0x00dc, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0089, 0x0089, + 0x008a, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a3, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00aa, 0x0089, 0x00a3, + 0x0008, 0x0008, 0x0008, 0x0008, 0x00a0, 0x00a4, 0x0008, 0x00a5, + 0x0008, 0x0008, 0x0008, 0x00ad, 0x018e, 0x0008, 0x0008, 0x0008, + 0x0008, 0x00a8, 0x0089, 0x00a4, 0x0008, 0x0008, 0x0008, 0x0008, + 0x00aa, 0x0089, 0x0089, 0x0008, 0x0008, 0x011b, 0x0089, 0x0089, + 0x0142, 0x00f2, 0x00a6, 0x0008, 0x01ea, 0x01f2, 0x01f7, 0x0022, + 0x01ff, 0x0207, 0x020d, 0x0008, 0x0214, 0x0008, 0x0008, 0x021c, + 0x0222, 0x002c, 0x007a, 0x0025, 0x0008, 0x0008, 0x0008, 0x0008, + 0x002c, 0x0008, 0x0008, 0x0089, 0x0089, 0x0089, 0x0089, 0x00cd, + 0x0008, 0x022a, 0x004c, 0x0073, 0x0008, 0x0231, 0x002d, 0x0008, + 0x0214, 0x0008, 0x0008, 0x0033, 0x0060, 0x0092, 0x0026, 0x0092, + 0x0028, 0x0008, 0x004c, 0x0239, 0x023f, 0x0008, 0x0246, 0x0008, + 0x0028, 0x0008, 0x0008, 0x024c, 0x0008, 0x007a, 0x0008, 0x0008, + 0x0008, 0x0040, 0x0254, 0x024f, 0x0259, 0x0068, 0x025e, 0x007d, + 0x0032, 0x0008, 0x0264, 0x002e, 0x0008, 0x026c, 0x026a, 0x0008, + 0x0008, 0x026a, 0x0008, 0x002b, 0x004c, 0x002b, 0x0008, 0x0008, + 0x007a, 0x0008, 0x0008, 0x002e, 0x0274, 0x0008, 0x027c, 0x0008, + 0x0008, 0x0284, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0285, 0x0008, 0x0008, 0x0008, 0x028d, 0x0295, 0x029d, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0092, 0x0092, 0x0092, 0x0092, 0x0092, + 0x0092, 0x0092, 0x0092, 0x02a5, 0x0092, 0x0092, 0x0092, 0x0092, + 0x0098, 0x0092, 0x0092, 0x0008, 0x0008, 0x0008, 0x0008, 0x0098, + 0x02ab, 0x02b1, 0x0032, 0x02b7, 0x02be, 0x0028, 0x0008, 0x01f3, + 0x007a, 0x0008, 0x02c6, 0x02ce, 0x02d5, 0x02dd, 0x02e3, 0x02ea, + 0x02ea, 0x02ea, 0x02ea, 0x02e7, 0x02f2, 0x02f6, 0x02ea, 0x02fe, + 0x0301, 0x02ea, 0x02eb, 0x0309, 0x0008, 0x0311, 0x0315, 0x0313, + 0x031d, 0x02ea, 0x0321, 0x0322, 0x0328, 0x032a, 0x032f, 0x0305, + 0x032c, 0x0335, 0x033b, 0x0343, 0x031d, 0x034b, 0x027f, 0x0214, + 0x0353, 0x0244, 0x002b, 0x0357, 0x035f, 0x0366, 0x0008, 0x036e, + 0x0008, 0x004e, 0x0092, 0x0008, 0x0008, 0x0376, 0x0008, 0x0214, + 0x0008, 0x0353, 0x037e, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0243, 0x0008, 0x0386, 0x0008, 0x0008, 0x038e, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0392, 0x0028, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x00ce, 0x00a6, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x00ce, 0x039a, 0x039a, 0x039a, 0x03a0, + 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, 0x03a4, 0x0008, + 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, + 0x03ac, 0x0008, 0x0008, 0x0008, 0x039a, 0x039a, 0x039a, 0x039a, + 0x039a, 0x03b4, 0x03bc, 0x03bf, 0x03c6, 0x039a, 0x039a, 0x039a, + 0x039a, 0x039a, 0x039a, 0x039a, 0x039b, 0x03ce, 0x039a, 0x039a, + 0x039a, 0x039a, 0x03d6, 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, + 0x03c6, 0x039a, 0x039b, 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, + 0x039a, 0x03a4, 0x03de, 0x039a, 0x039a, 0x039a, 0x039b, 0x039a, + 0x039a, 0x039a, 0x039a, 0x0092, 0x039a, 0x039a, 0x039a, 0x039a, + 0x039a, 0x039a, 0x03bd, 0x03e5, 0x039a, 0x039a, 0x039a, 0x039a, + 0x03a3, 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, 0x039b, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00ce, + 0x011b, 0x018e, 0x0008, 0x0008, 0x0008, 0x00a8, 0x0008, 0x0008, + 0x0008, 0x0008, 0x03ed, 0x00ca, 0x0008, 0x0008, 0x00a0, 0x03f4, + 0x0008, 0x0008, 0x00a6, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x00aa, 0x0089, 0x018e, 0x0008, 0x0008, 0x0008, 0x0089, 0x0089, + 0x00a6, 0x00ce, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a8, 0x018e, + 0x0008, 0x0008, 0x00ce, 0x0089, 0x00a4, 0x0008, 0x01bd, 0x01bd, + 0x01bd, 0x03fc, 0x00a4, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x00a0, 0x0089, 0x00cd, 0x0008, 0x0008, 0x0008, 0x00f1, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x00ad, 0x008a, 0x0008, 0x00ca, + 0x0401, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0407, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x040f, 0x0416, 0x00cc, + 0x0008, 0x0008, 0x0008, 0x0008, 0x00a0, 0x0168, 0x0008, 0x0008, + 0x0008, 0x0008, 0x00a0, 0x041c, 0x0008, 0x0008, 0x0424, 0x0425, + 0x0425, 0x0429, 0x0425, 0x0425, 0x0425, 0x0424, 0x0425, 0x0425, + 0x0429, 0x0425, 0x0425, 0x0425, 0x0424, 0x0425, 0x0425, 0x042e, + 0x0008, 0x01c5, 0x01c5, 0x0436, 0x043d, 0x01cd, 0x01cd, 0x01cd, + 0x01cd, 0x01cd, 0x0441, 0x0008, 0x0008, 0x0008, 0x0138, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0089, 0x0449, 0x039a, 0x03dc, 0x0089, + 0x0089, 0x039a, 0x039a, 0x039f, 0x039a, 0x039b, 0x03a4, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0451, 0x039a, + 0x039a, 0x039a, 0x039a, 0x03dd, 0x0008, 0x0008, 0x0008, 0x0459, + 0x0008, 0x0008, 0x0008, 0x0008, 0x039b, 0x0008, 0x0000, 0x0461, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00f1, + 0x0008, 0x0008, 0x0008, 0x0008, 0x00cd, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x00a8, 0x00a5, 0x041b, 0x00aa, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0140, 0x0008, 0x0008, 0x0008, + 0x0008, 0x00aa, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0402, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a9, + 0x01d5, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x00a5, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0089, + 0x008a, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0126, 0x00ce, + 0x00a5, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0089, 0x0469, + 0x00cb, 0x00eb, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x00a5, 0x0008, 0x0008, 0x0008, 0x00ce, 0x0089, 0x00a3, 0x0008, + 0x0168, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00ca, 0x0008, + 0x00a5, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a0, 0x0089, + 0x0471, 0x0478, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x00aa, 0x0089, 0x0138, 0x00cc, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x00ce, 0x0089, 0x00a5, 0x0008, 0x0008, + 0x00dc, 0x0147, 0x00ce, 0x0008, 0x0480, 0x00a3, 0x00a3, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a9, 0x0089, 0x008a, + 0x0008, 0x0008, 0x0138, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a4, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00ce, + 0x018e, 0x0089, 0x00cd, 0x0008, 0x0008, 0x0401, 0x0008, 0x0008, + 0x0008, 0x0008, 0x00cd, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x00a0, 0x0089, 0x0008, 0x0008, 0x0008, 0x00a9, + 0x018c, 0x00a4, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00aa, + 0x0089, 0x00a5, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x00af, 0x0483, 0x0489, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x00ad, 0x00ac, 0x03ef, 0x0008, 0x0008, 0x0008, + 0x00ad, 0x00a5, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a0, 0x0491, + 0x00ce, 0x0008, 0x00ad, 0x00a4, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0498, 0x049e, 0x0089, 0x00a6, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x00ce, 0x008a, 0x0089, 0x0008, 0x0008, 0x00ac, 0x0089, + 0x0089, 0x00ad, 0x008a, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x04a6, 0x04ad, 0x04b4, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x00a1, 0x00f9, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0476, 0x0008, 0x04b8, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x00aa, 0x015b, 0x00a5, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0222, 0x0222, 0x00a7, + 0x0089, 0x018e, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x008a, 0x0008, 0x00ce, 0x00ad, 0x0089, 0x0089, 0x0089, 0x0089, + 0x0089, 0x00ce, 0x00a5, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x04c0, 0x0008, 0x00a6, 0x0008, 0x039a, 0x039a, 0x039a, 0x039a, + 0x039a, 0x039a, 0x039a, 0x0008, 0x039a, 0x039a, 0x03ac, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x039a, 0x03dd, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x039e, 0x04c8, 0x039a, 0x039a, + 0x039a, 0x039a, 0x03d3, 0x0008, 0x04cf, 0x0008, 0x0008, 0x04d7, + 0x0008, 0x03a8, 0x0008, 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, + 0x039a, 0x039a, 0x03a4, 0x0008, 0x0008, 0x0008, 0x0168, 0x04df, + 0x0008, 0x0008, 0x0008, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, + 0x018e, 0x0089, 0x0089, 0x008a, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x00a9, 0x01d9, 0x04e7, 0x04ec, 0x0116, + 0x00a4, 0x0008, 0x0008, 0x0008, 0x01d5, 0x0008, 0x0008, 0x0148, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0089, + 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x008a, 0x00a0, 0x0089, + 0x0089, 0x0089, 0x0089, 0x0089, 0x00a3, 0x00f1, 0x0008, 0x03f4, + 0x0008, 0x0008, 0x00a0, 0x00ad, 0x0089, 0x0008, 0x0008, 0x008a, + 0x0089, 0x0089, 0x04f3, 0x00b3, 0x00a5, 0x0008, 0x0008, 0x00ce, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0138, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x00aa, 0x0008, 0x0008, 0x008a, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00aa, 0x00a5, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x04fb, 0x02ea, 0x02ea, + 0x02ea, 0x02ea, 0x02ea, 0x02ea, 0x02ea, 0x02ea, 0x02eb, 0x02ea, + 0x02ea, 0x02ea, 0x02ea, 0x02ea, 0x02ea, 0x0092, 0x0503, 0x0092, + 0x0092, 0x0092, 0x050b, 0x0092, 0x0092, 0x0092, 0x0092, 0x0092, + 0x0513, 0x051b, 0x051d, 0x0092, 0x0525, 0x052c, 0x0531, 0x0092, + 0x0534, 0x02ea, 0x02ea, 0x02ea, 0x02ea, 0x0539, 0x053f, 0x053f, + 0x053f, 0x0547, 0x02ea, 0x039a, 0x054f, 0x039a, 0x03bd, 0x0555, + 0x055a, 0x039a, 0x055d, 0x0565, 0x02ea, 0x02f4, 0x02ea, 0x02ea, + 0x02ea, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x0566, 0x02ed, 0x056e, + 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x056f, + 0x02f2, 0x02f2, 0x02f6, 0x02ea, 0x02f2, 0x02f2, 0x02f2, 0x02f2, + 0x0575, 0x02f6, 0x02ea, 0x02f2, 0x02f2, 0x057c, 0x0584, 0x02f2, + 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f3, 0x058c, + 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, + 0x058f, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, + 0x0596, 0x0241, 0x059e, 0x02f2, 0x02f2, 0x02f2, 0x02ea, 0x02ea, + 0x0312, 0x02ea, 0x02ea, 0x0560, 0x02ea, 0x04fb, 0x02ea, 0x02ea, + 0x02ea, 0x02ea, 0x02ea, 0x02ea, 0x02ea, 0x02ef, 0x02f2, 0x02f2, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x02f4, 0x04fb, + 0x0591, 0x02ee, 0x02ea, 0x0562, 0x02ee, 0x02f5, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x05a6, 0x02ea, 0x0008, 0x0008, + 0x0386, 0x02ea, 0x02f2, 0x02f6, 0x0566, 0x02ea, 0x0008, 0x05a6, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x02ea, 0x0008, + 0x05a8, 0x0008, 0x0008, 0x0008, 0x0008, 0x02ea, 0x0008, 0x0008, + 0x0008, 0x0241, 0x02ea, 0x02ea, 0x0008, 0x05b0, 0x02f2, 0x02f2, + 0x02f2, 0x02f2, 0x02f2, 0x05b5, 0x05b9, 0x02f2, 0x02f2, 0x02f2, + 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02ea, 0x02ea, 0x02ea, 0x02ea, + 0x02ea, 0x02ea, 0x02f2, 0x02f5, 0x02f2, 0x0566, 0x02f2, 0x02f2, + 0x02f2, 0x02f2, 0x02f2, 0x056e, 0x02f4, 0x02ec, 0x02f2, 0x02f6, + 0x02f2, 0x0566, 0x02f2, 0x0566, 0x02ea, 0x02ea, 0x02ea, 0x02ea, + 0x02ea, 0x02ea, 0x02ea, 0x0309, 0x05c1, 0x0000, 0x0000, 0x0000, + 0x0089, 0x0089, 0x0089, 0x0089, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0089, 0x0089, 0x0089, 0x0089, + 0x0089, 0x0089, 0x0000, 0x0000, 0x0092, 0x0092, 0x0092, 0x0092, + 0x0092, 0x0092, 0x0092, 0x05c9, +}; +static constexpr uint8_t s_stage4[] = { + 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x41, 0x40, 0xc0, 0x40, 0x40, 0xc0, 0x40, 0x40, + 0xc0, 0x4c, 0xc0, 0x40, 0x40, 0x01, 0xcc, 0x40, + 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x40, 0xc0, 0xc0, + 0xc0, 0xc0, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + 0xc0, 0x40, 0x40, 0x40, 0x40, 0x40, 0xc0, 0xc0, + 0x40, 0x40, 0x40, 0x40, 0xc0, 0x40, 0xc0, 0xc0, + 0xc0, 0x40, 0xc0, 0xc0, 0x40, 0x40, 0x40, 0xc0, + 0xc0, 0xc0, 0x40, 0xc0, 0x40, 0xc0, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0xc0, 0xc0, 0xc0, 0x40, + 0x40, 0x40, 0x40, 0xc0, 0xc0, 0xc0, 0x40, 0xc0, + 0x40, 0x40, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0x40, + 0xc0, 0x40, 0x40, 0xc0, 0x40, 0xc0, 0x40, 0xc0, + 0x40, 0xc0, 0x40, 0x40, 0x40, 0x40, 0xc0, 0x40, + 0x40, 0xc0, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0x40, 0xc0, 0x40, + 0xc0, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, + 0xc0, 0xc0, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, + 0x40, 0x40, 0x40, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x40, 0x02, 0x02, + 0x40, 0x02, 0x02, 0x40, 0x02, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x40, 0x40, 0x02, 0x02, 0x02, + 0x40, 0x01, 0x40, 0x40, 0x40, 0x02, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x03, 0x40, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x40, 0x40, 0x02, 0x40, 0x02, 0x02, 0x02, + 0x02, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x03, 0x40, 0x40, 0x40, 0x40, 0x40, 0x02, 0x40, + 0x40, 0x02, 0x02, 0x40, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x40, 0x02, 0x02, 0x02, 0x40, 0x40, 0x40, + 0x40, 0x03, 0x03, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x02, 0x02, 0x03, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x40, 0x40, 0x02, 0x02, 0x02, 0x40, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x0b, 0x02, 0x02, 0x40, + 0x40, 0x02, 0x02, 0x40, 0x40, 0x40, 0x40, 0x02, + 0x40, 0x02, 0x02, 0x40, 0x40, 0x02, 0x02, 0x0b, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x02, 0x40, + 0x02, 0x02, 0x02, 0x40, 0x40, 0x40, 0x40, 0x02, + 0x40, 0x40, 0x02, 0x02, 0x02, 0x40, 0x40, 0x40, + 0x02, 0x40, 0x40, 0x02, 0x02, 0x40, 0x02, 0x02, + 0x0b, 0x40, 0x40, 0x02, 0x02, 0x02, 0x40, 0x40, + 0x40, 0x02, 0x02, 0x40, 0x02, 0x02, 0x02, 0x0b, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x02, 0x02, 0x40, + 0x02, 0x02, 0x02, 0x0b, 0x43, 0x40, 0x40, 0x02, + 0x40, 0x40, 0x40, 0x40, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x40, 0x02, 0x40, 0x42, 0x02, 0x02, 0x02, + 0x02, 0x40, 0x40, 0x40, 0x00, 0x00, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x40, 0x40, 0x40, 0x40, + 0x02, 0x02, 0x40, 0x00, 0x00, 0x00, 0x40, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x40, + 0x02, 0x00, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x02, 0x40, 0x00, 0x40, 0x40, 0x00, + 0x00, 0x00, 0x02, 0x40, 0x40, 0x86, 0x86, 0x86, + 0x86, 0x86, 0x86, 0x86, 0x86, 0x47, 0x47, 0x47, + 0x47, 0x47, 0x47, 0x47, 0x47, 0x48, 0x48, 0x48, + 0x48, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40, 0x02, + 0x02, 0x02, 0x02, 0x40, 0x40, 0x40, 0x02, 0x02, + 0x02, 0x01, 0x02, 0x00, 0x02, 0x00, 0x00, 0x02, + 0x02, 0x02, 0x40, 0x40, 0x40, 0x01, 0x02, 0x04, + 0x01, 0x01, 0xc0, 0x40, 0x40, 0xc0, 0xc0, 0xc0, + 0xc0, 0x40, 0x40, 0xc0, 0xc0, 0x40, 0x40, 0x41, + 0x41, 0x01, 0x01, 0x01, 0x01, 0x01, 0x40, 0xc0, + 0x40, 0xc0, 0xc0, 0x40, 0xc0, 0x40, 0x40, 0x40, + 0xc0, 0x4c, 0x40, 0xc0, 0x40, 0x4c, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x41, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x40, 0x40, 0x40, 0xc0, 0x40, 0xc0, + 0x40, 0x40, 0xc0, 0xcc, 0x40, 0x40, 0x40, 0xc0, + 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0xcc, 0xcc, 0xcc, + 0xcc, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x4c, + 0x4c, 0x40, 0x40, 0x40, 0x40, 0x40, 0xc0, 0x40, + 0xc0, 0x40, 0x40, 0x40, 0xc0, 0x40, 0x40, 0xc0, + 0x40, 0x40, 0x40, 0xc0, 0x40, 0x40, 0xc0, 0xc0, + 0xc0, 0xc0, 0xc0, 0x40, 0xc0, 0x40, 0x40, 0x40, + 0xc0, 0x40, 0x40, 0x40, 0xc0, 0xc0, 0x40, 0x40, + 0xc0, 0xc0, 0xc0, 0xc0, 0x40, 0x40, 0x8c, 0x8c, + 0x40, 0x40, 0x40, 0x40, 0x4c, 0x80, 0x80, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x4c, 0x40, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x4c, 0x40, 0x8c, 0x8c, + 0x8c, 0x8c, 0x4c, 0x4c, 0x4c, 0x8c, 0x4c, 0x4c, + 0x8c, 0x40, 0x40, 0x40, 0x40, 0x4c, 0x4c, 0x4c, + 0x40, 0x40, 0x40, 0x40, 0x40, 0xc0, 0xc0, 0xcc, + 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x4c, 0x4c, 0x40, + 0x40, 0x40, 0x40, 0xc0, 0xc0, 0x40, 0x40, 0xcc, + 0xc0, 0x40, 0x40, 0x40, 0x40, 0xc0, 0xc0, 0x40, + 0x40, 0xc0, 0x40, 0x40, 0xc0, 0xc0, 0x40, 0x40, + 0x40, 0x4c, 0x4c, 0x8c, 0x8c, 0x40, 0x4c, 0x4c, + 0x4c, 0x4c, 0x4c, 0xcc, 0xc0, 0x4c, 0xcc, 0x4c, + 0x4c, 0x4c, 0x4c, 0xcc, 0xcc, 0x4c, 0x4c, 0x4c, + 0x40, 0x8c, 0x8c, 0x4c, 0x4c, 0x4c, 0x4c, 0xcc, + 0x4c, 0xcc, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, + 0x4c, 0x4c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, + 0x8c, 0x8c, 0x4c, 0x4c, 0x4c, 0x4c, 0xcc, 0xcc, + 0x4c, 0xcc, 0xcc, 0xcc, 0x4c, 0xcc, 0xcc, 0x4c, + 0xcc, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x40, + 0x40, 0x4c, 0x4c, 0x4c, 0x8c, 0x4c, 0x4c, 0x4c, + 0x4c, 0x4c, 0x4c, 0xcc, 0xcc, 0x4c, 0x4c, 0x8c, + 0x8c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x8c, 0x8c, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x8c, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x8c, + 0x8c, 0xcc, 0x8c, 0xcc, 0xcc, 0x8c, 0xcc, 0xcc, + 0x8c, 0xcc, 0xcc, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, + 0x8c, 0x40, 0x40, 0x4c, 0x4c, 0x4c, 0x40, 0x4c, + 0x40, 0x4c, 0x40, 0x8c, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x4c, 0x40, 0x40, 0x4c, 0x40, + 0x40, 0x40, 0x40, 0x8c, 0x40, 0x8c, 0x40, 0x40, + 0x40, 0x8c, 0x8c, 0x8c, 0x40, 0x8c, 0x40, 0x40, + 0x40, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x8c, 0x8c, 0x8c, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x8c, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x4c, 0x4c, 0x4c, 0x40, 0x40, + 0x40, 0x8c, 0x8c, 0x40, 0x40, 0x40, 0x40, 0x8c, + 0xc0, 0xc0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x40, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x40, 0x40, 0x40, 0x40, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x40, 0x40, 0x80, 0x80, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x8c, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x8c, 0x80, 0x40, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x40, 0x02, + 0x02, 0x80, 0x80, 0x80, 0x80, 0x80, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x80, 0x80, 0x80, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x80, 0x8c, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x40, 0x40, 0x02, + 0x40, 0x40, 0x40, 0x02, 0x40, 0x40, 0x40, 0x40, + 0x02, 0x40, 0x40, 0x40, 0x86, 0x86, 0x86, 0x86, + 0x86, 0x40, 0x40, 0x40, 0x40, 0x02, 0x02, 0x40, + 0x40, 0x40, 0x00, 0x02, 0x00, 0x40, 0x40, 0x02, + 0x40, 0x02, 0x02, 0x02, 0x40, 0x40, 0x02, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x02, 0x02, 0x02, 0x40, + 0x02, 0x02, 0x40, 0x40, 0x89, 0x8a, 0x8a, 0x8a, + 0x8a, 0x8a, 0x8a, 0x8a, 0x8a, 0x89, 0x8a, 0x8a, + 0x8a, 0x8a, 0x40, 0x40, 0x40, 0x40, 0x47, 0x47, + 0x47, 0x47, 0x47, 0x47, 0x47, 0x40, 0x40, 0x40, + 0x48, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40, 0x40, + 0x40, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x82, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x01, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x42, + 0x42, 0x41, 0x01, 0x01, 0x01, 0x40, 0xc0, 0x40, + 0x40, 0x02, 0x02, 0x02, 0x40, 0x40, 0x03, 0x40, + 0x40, 0x02, 0x40, 0x43, 0x43, 0x40, 0x40, 0x40, + 0x40, 0x02, 0x02, 0x02, 0x02, 0x40, 0x02, 0x02, + 0x40, 0x40, 0x02, 0x02, 0x40, 0x40, 0x02, 0x02, + 0x02, 0x02, 0x43, 0x02, 0x02, 0x40, 0x40, 0x40, + 0x40, 0x02, 0x02, 0x43, 0x02, 0x02, 0x02, 0x02, + 0x40, 0x40, 0x40, 0x40, 0x43, 0x43, 0x43, 0x43, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x40, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x40, 0x40, 0x02, + 0x40, 0x02, 0x02, 0x40, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x43, 0x02, 0x40, 0x40, 0x40, 0x40, + 0x80, 0x80, 0x80, 0x80, 0x02, 0x40, 0x40, 0x40, + 0x80, 0x80, 0x80, 0x80, 0x40, 0x80, 0x80, 0x40, + 0x40, 0x80, 0x40, 0x40, 0x40, 0x40, 0x40, 0x80, + 0x80, 0x80, 0x40, 0x40, 0x80, 0x40, 0x40, 0x01, + 0x01, 0x01, 0x01, 0x40, 0x40, 0x40, 0x40, 0x02, + 0x02, 0x02, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x40, 0x40, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x4c, 0x4c, 0x4c, 0x4c, 0x8c, + 0x4c, 0x4c, 0x4c, 0xc0, 0xc0, 0xc0, 0x40, 0x40, + 0x4c, 0x4c, 0x4c, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, + 0xc0, 0x40, 0x4c, 0xc0, 0xc0, 0x40, 0x40, 0x4c, + 0x4c, 0x4c, 0x4c, 0xcc, 0xcc, 0xc0, 0xc0, 0xc0, + 0xc0, 0xc0, 0xc0, 0xcc, 0xcc, 0xc0, 0xc0, 0xc0, + 0xc0, 0xc0, 0xc0, 0x8c, 0xc0, 0x8c, 0x8c, 0x8c, + 0x8c, 0x8c, 0x8c, 0x8c, 0xc0, 0xc0, 0xc0, 0xc0, + 0xc0, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x45, + 0x45, 0x45, 0x45, 0x45, 0x45, 0x45, 0x45, 0x80, + 0x8c, 0x8c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x80, + 0x80, 0x8c, 0x80, 0x80, 0x80, 0x80, 0x80, 0x8c, + 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x80, 0x4c, 0x4c, + 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x8c, 0x8c, 0x4c, + 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x8c, 0x8c, + 0x8c, 0x8c, 0x8c, 0x8c, 0x4c, 0x8c, 0x8c, 0x8c, + 0x4c, 0x4c, 0x4c, 0x4c, 0x8c, 0x4c, 0x4c, 0x4c, + 0x8c, 0x4c, 0x4c, 0x4c, 0x8c, 0x8c, 0x8c, 0x82, + 0x82, 0x82, 0x82, 0x82, 0x8c, 0x4c, 0x8c, 0x8c, + 0x8c, 0x8c, 0x8c, 0x8c, 0x4c, 0x4c, 0x8c, 0x8c, + 0x8c, 0x8c, 0x8c, 0x8c, 0x40, 0x40, 0x4c, 0x4c, + 0x4c, 0x8c, 0x8c, 0x8c, 0x8c, 0x4c, 0x40, 0x40, + 0x40, 0x40, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, + 0x40, 0x40, 0x40, 0x40, 0x8c, 0x8c, 0x8c, 0x8c, + 0x40, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x40, + 0x8c, 0x41, 0x01, 0x41, 0x41, 0x41, 0x41, 0x41, + 0x41, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x40, + 0x40, +}; +static constexpr uint8_t s_joinRules[13][13] = { + 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, + 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, + 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, + 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, + 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, + 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, +}; +[[msvc::forceinline]] constexpr uint8_t ucdLookup(const char32_t cp) noexcept +{ + const auto s1 = s_stage1[cp >> 11]; + const auto s2 = s_stage2[s1 + ((cp >> 6) & 31)]; + const auto s3 = s_stage3[s2 + ((cp >> 3) & 7)]; + const auto s4 = s_stage4[s3 + ((cp >> 0) & 7)]; + return s4; +} +[[msvc::forceinline]] constexpr uint8_t ucdGraphemeJoins(const uint8_t lead, const uint8_t trail) noexcept { - char32_t codepoint = 0; + const auto l = lead & 15; + const auto t = trail & 15; + return s_joinRules[l][t]; +} +[[msvc::forceinline]] constexpr int ucdToCharacterWidth(const uint8_t val) noexcept +{ + return val >> 6; +} +// clang-format on + +[[msvc::forceinline]] constexpr std::wstring_view::iterator utf16NextOrFFFD(std::wstring_view::iterator it, const std::wstring_view::iterator& end, char32_t& out) +{ + char32_t c = *it++; - switch (glyph.size()) + // Is any surrogate? + if ((c & 0xF800) == 0xD800) { - case 1: - codepoint = til::at(glyph, 0); - break; - case 2: - codepoint = (til::at(glyph, 0) & 0x3FF) << 10; - codepoint |= til::at(glyph, 1) & 0x3FF; - codepoint += 0x10000; - break; - default: - codepoint = 0; - break; + const char32_t c1 = c; + c = 0xfffd; + + // Is leading surrogate and not at end? + if ((c1 & 0x400) == 0 && it != end) + { + const char32_t c2 = *it; + // Is also trailing surrogate! + if ((c2 & 0xFC00) == 0xDC00) + { + c = (c1 << 10) - 0x35FDC00 + c2; + ++it; + } + } } - if (codepoint < 0x80) + out = c; + return it; +} + +[[msvc::forceinline]] constexpr std::wstring_view::iterator utf16PrevOrFFFD(std::wstring_view::iterator it, const std::wstring_view::iterator& beg, char32_t& out) +{ + char32_t c = *--it; + + // Is any surrogate? + if ((c & 0xF800) == 0xD800) { - return CodepointWidth::Narrow; + const char32_t c2 = c; + c = 0xfffd; + + // Is trailing surrogate and not at begin? + if ((c2 & 0x400) != 0 && it != beg) + { + const char32_t c1 = it[-1]; + // Is also leading surrogate! + if ((c1 & 0xFC00) == 0xD800) + { + c = (c1 << 10) - 0x35FDC00 + c2; + --it; + } + } } - // The return value of _lookupGlyphWidth coincides with the enum value of CodepointWidth - // on purpose to allow for this easy conversion to happen. Optimally, we should probably - // remove CodepointWidth altogether to allow for zero-width joiners and other characters. - static_assert(WI_EnumValue(CodepointWidth::Narrow) == 1); - static_assert(WI_EnumValue(CodepointWidth::Wide) == 2); - return static_cast(_lookupGlyphWidth(codepoint, glyph)); + out = c; + return it; } -// Routine Description: -// - checks if codepoint is wide. will attempt to fallback as much possible until an answer is determined -// Arguments: -// - glyph - the utf16 encoded codepoint to check width of -// Return Value: -// - true if codepoint is wide -bool CodepointWidthDetector::IsWide(const std::wstring_view& glyph) noexcept +static CodepointWidthDetector s_codepointWidthDetector; + +CodepointWidthDetector& CodepointWidthDetector::Singleton() noexcept +{ + return s_codepointWidthDetector; +} + +CodepointWidthDetector::CodepointWidthDetector() : + _enableGraphemes{ Feature_Graphemes::IsEnabled() } +{ +} + +size_t CodepointWidthDetector::GraphemeNext(const std::wstring_view& str, size_t offset, int* width) noexcept +{ + if (!_enableGraphemes.load(std::memory_order_relaxed)) + { + return _graphemeNextOld(str, offset, width); + } + + const auto beg = str.begin(); + const auto end = str.end(); + auto it = beg + std::min(offset, str.size()); + + if (it == end) + { + return 0; + } + + char32_t cp; + it = utf16NextOrFFFD(it, end, cp); + + auto lead = ucdLookup(cp); + int totalWidth = 0; + + for (;;) + { + auto w = ucdToCharacterWidth(lead); + if (w == 3) + { + w = _checkFallbackViaCache(cp); + } + + totalWidth += w; + + if (it == end) + { + break; + } + + const auto it2 = utf16NextOrFFFD(it, end, cp); + const auto trail = ucdLookup(cp); + + if (!ucdGraphemeJoins(lead, trail)) + { + break; + } + + it = it2; + lead = trail; + } + + if (width) + { + *width = totalWidth < 1 ? 1 : (totalWidth > 2 ? 2 : totalWidth); + } + return it - beg; +} + +// This code is identical to GraphemeNext() but with the order of operations reversed since we're iterating backwards. +size_t CodepointWidthDetector::GraphemePrev(const std::wstring_view& str, size_t offset, int* width) noexcept { - return GetWidth(glyph) == CodepointWidth::Wide; + if (!_enableGraphemes.load(std::memory_order_relaxed)) + { + return _graphemePrevOld(str, offset, width); + } + + const auto beg = str.begin(); + auto it = beg + std::min(offset, str.size()); + + if (it == beg) + { + return 0; + } + + char32_t cp; + it = utf16PrevOrFFFD(it, beg, cp); + + auto trail = ucdLookup(cp); + int totalWidth = 0; + + for (;;) + { + auto w = ucdToCharacterWidth(trail); + if (w == 3) + { + w = _checkFallbackViaCache(cp); + } + + totalWidth += w; + + if (it == beg) + { + break; + } + + const auto it2 = utf16PrevOrFFFD(it, beg, cp); + const auto lead = ucdLookup(cp); + + if (!ucdGraphemeJoins(lead, trail)) + { + break; + } + + it = it2; + trail = lead; + } + + if (width) + { + *width = totalWidth < 1 ? 1 : (totalWidth > 2 ? 2 : totalWidth); + } + return it - beg; } -// GetWidth's slow-path for non-ASCII characters. Returns the number of columns the codepoint takes up in the terminal. -uint8_t CodepointWidthDetector::_lookupGlyphWidth(const char32_t codepoint, const std::wstring_view& glyph) noexcept +__declspec(noinline) size_t CodepointWidthDetector::_graphemeNextOld(const std::wstring_view& str, size_t offset, int* width) noexcept +{ + const auto beg = str.begin(); + const auto end = str.end(); + auto it = beg + std::min(offset, str.size()); + + if (it == end) + { + return 0; + } + + char32_t cp; + it = utf16NextOrFFFD(it, end, cp); + + if (width) + { + *width = _getWidthOld(cp); + } + + return it - beg; +} + +__declspec(noinline) size_t CodepointWidthDetector::_graphemePrevOld(const std::wstring_view& str, size_t offset, int* width) noexcept +{ + const auto beg = str.begin(); + auto it = beg + std::min(offset, str.size()); + + if (it == beg) + { + return 0; + } + + char32_t cp; + it = utf16PrevOrFFFD(it, beg, cp); + + if (width) + { + *width = _getWidthOld(cp); + } + + return it - beg; +} + +int CodepointWidthDetector::_getWidthOld(const char32_t codepoint) noexcept { #pragma warning(suppress : 26447) // The function is declared 'noexcept' but calls function 'lower_bound<...>()' which may throw exceptions (f.6). const auto it = std::lower_bound(s_wideAndAmbiguousTable.begin(), s_wideAndAmbiguousTable.end(), codepoint); - uint8_t width = 1; + int width = 1; if (it != s_wideAndAmbiguousTable.end() && codepoint >= it->lowerBound && codepoint <= it->upperBound) { width = 2; if (it->isAmbiguous) { - width = _checkFallbackViaCache(codepoint, glyph); + width = _checkFallbackViaCache(codepoint); } } @@ -395,7 +1126,7 @@ uint8_t CodepointWidthDetector::_lookupGlyphWidth(const char32_t codepoint, cons // Call the function specified via SetFallbackMethod() to turn CodepointWidth::Ambiguous into Narrow/Wide. // Caches the results in _fallbackCache. This is _lookupGlyphWidth's even-slower-path. -uint8_t CodepointWidthDetector::_checkFallbackViaCache(const char32_t codepoint, const std::wstring_view& glyph) noexcept +__declspec(noinline) int CodepointWidthDetector::_checkFallbackViaCache(const char32_t codepoint) noexcept try { // Ambiguous glyphs are considered narrow by default. See microsoft/terminal#2066 for more info. @@ -409,7 +1140,21 @@ try return it->second; } - const uint8_t width = _pfnFallbackMethod(glyph) ? 2 : 1; + wchar_t buf[2]; + size_t len = 0; + if (codepoint <= 0xffff) + { + buf[0] = static_cast(codepoint); + len = 1; + } + else + { + buf[0] = static_cast((codepoint >> 10) + 0xD7C0); + buf[1] = static_cast((codepoint & 0x3ff) | 0xDC00); + len = 2; + } + + const uint8_t width = _pfnFallbackMethod({ &buf[0], len }) ? 2 : 1; _fallbackCache.insert_or_assign(codepoint, width); return width; } @@ -419,6 +1164,11 @@ catch (...) return 1; } +void CodepointWidthDetector::SetEnableGraphemes(const bool enable) noexcept +{ + _enableGraphemes.store(enable, std::memory_order_relaxed); +} + // Method Description: // - Sets a function that should be used as the fallback mechanism for // determining a particular glyph's width, should the glyph be an ambiguous @@ -442,7 +1192,7 @@ void CodepointWidthDetector::SetFallbackMethod(std::function // Return Value: // - -void CodepointWidthDetector::NotifyFontChanged() noexcept +void CodepointWidthDetector::ClearFallbackCache() noexcept { #pragma warning(suppress : 26447) // The function is declared 'noexcept' but calls function 'clear()' which may throw exceptions (f.6). _fallbackCache.clear(); diff --git a/src/types/CodepointWidthDetector_gen.go b/src/types/CodepointWidthDetector_gen.go new file mode 100644 index 00000000000..15741f972c5 --- /dev/null +++ b/src/types/CodepointWidthDetector_gen.go @@ -0,0 +1,566 @@ +package main + +import ( + "bytes" + "encoding/xml" + "fmt" + "math" + "os" + "slices" + "strconv" + "strings" + "time" + "unsafe" +) + +type CharacterWidth int + +const ( + cwZeroWidth CharacterWidth = iota + cwNarrow + cwWide + cwAmbiguous +) + +type ClusterBreak int + +const ( + cbOther ClusterBreak = iota + cbControl + cbExtend + cbPrepend + cbZeroWidthJoiner + cbRegionalIndicator + cbHangulL + cbHangulV + cbHangulT + cbHangulLV + cbHangulLVT + cbConjunctLinker + cbExtendedPictographic + + cbCount +) + +type HexInt int + +func (h *HexInt) UnmarshalXMLAttr(attr xml.Attr) error { + v, err := strconv.ParseUint(attr.Value, 16, 32) + if err != nil { + return err + } + *h = HexInt(v) + return nil +} + +type UCD struct { + Description string `xml:"description"` + Repertoire struct { + Group []struct { + GeneralCategory string `xml:"gc,attr"` + GraphemeClusterBreak string `xml:"GCB,attr"` + IndicConjunctBreak string `xml:"InCB,attr"` + ExtendedPictographic string `xml:"ExtPict,attr"` + EastAsian string `xml:"ea,attr"` + + // This maps the following tags: + // , , , + Char []struct { + Codepoint HexInt `xml:"cp,attr"` + FirstCodepoint HexInt `xml:"first-cp,attr"` + LastCodepoint HexInt `xml:"last-cp,attr"` + + GeneralCategory string `xml:"gc,attr"` + GraphemeClusterBreak string `xml:"GCB,attr"` + IndicConjunctBreak string `xml:"InCB,attr"` + ExtendedPictographic string `xml:"ExtPict,attr"` + EastAsian string `xml:"ea,attr"` + } `xml:",any"` + } `xml:"group"` + } `xml:"repertoire"` +} + +func main() { + if err := run(); err != nil { + fmt.Println(err) + os.Exit(1) + } +} + +func run() error { + if len(os.Args) <= 1 { + fmt.Println(`Usage: + go run CodepointWidthDetector_gen.go + +You can download the latest ucd.nounihan.grouped.xml from: + https://www.unicode.org/Public/UCD/latest/ucdxml/ucd.nounihan.grouped.zip`) + os.Exit(1) + } + + data, err := os.ReadFile(os.Args[1]) + if err != nil { + return fmt.Errorf("failed to read XML: %w", err) + } + + ucd := &UCD{} + err = xml.Unmarshal(data, ucd) + if err != nil { + return fmt.Errorf("failed to parse XML: %w", err) + } + + values, err := extractValuesFromUCD(ucd) + if err != nil { + return err + } + + // More stages = Less size. The trajectory roughly follows a+b*c^stages, where c < 1. + // 4 still gives ~30% savings over 3 stages and going beyond 5 gives diminishing returns (<10%). + trie := buildBestTrie(values, 2, 8, 4) + rules := buildJoinRules() + + for cp, expected := range values { + var v TrieType + for _, s := range trie.Stages { + v = s.Values[int(v)+((cp>>s.Shift)&s.Mask)] + } + if v != expected { + return fmt.Errorf("trie sanity check failed for %U", cp) + } + } + + buf := &strings.Builder{} + + _, _ = fmt.Fprintf(buf, "// Generated by CodepointWidthDetector_gen.go\n") + _, _ = fmt.Fprintf(buf, "// on %v, from %s, %d bytes\n", time.Now().UTC().Format(time.RFC3339), ucd.Description, trie.TotalSize+len(rules)*len(rules)) + _, _ = fmt.Fprintf(buf, "// clang-format off\n") + + for i, s := range trie.Stages { + width := 16 + if i != 0 { + width = s.Mask + 1 + } + _, _ = fmt.Fprintf(buf, "static constexpr uint%d_t s_stage%d[] = {", s.Bits, i+1) + for j, value := range s.Values { + if j%width == 0 { + buf.WriteString("\n ") + } + _, _ = fmt.Fprintf(buf, " 0x%0*x,", s.Bits/4, value) + } + buf.WriteString("\n};\n") + } + + _, _ = fmt.Fprintf(buf, "static constexpr uint8_t s_joinRules[%d][%d] = {", len(rules), len(rules)) + for _, row := range rules { + buf.WriteString("\n ") + for _, val := range row { + var i int + if val { + i = 1 + } + _, _ = fmt.Fprintf(buf, " %d,", i) + } + } + buf.WriteString("\n};\n") + + _, _ = fmt.Fprintf(buf, "[[msvc::forceinline]] constexpr uint%d_t ucdLookup(const char32_t cp) noexcept\n", trie.Stages[len(trie.Stages)-1].Bits) + buf.WriteString("{\n") + for i, s := range trie.Stages { + _, _ = fmt.Fprintf(buf, " const auto s%d = s_stage%d[", i+1, i+1) + if i == 0 { + _, _ = fmt.Fprintf(buf, "cp >> %d", s.Shift) + } else { + _, _ = fmt.Fprintf(buf, "s%d + ((cp >> %d) & %d)", i, s.Shift, s.Mask) + } + buf.WriteString("];\n") + } + _, _ = fmt.Fprintf(buf, " return s%d;\n", len(trie.Stages)) + buf.WriteString("}\n") + + buf.WriteString("[[msvc::forceinline]] constexpr uint8_t ucdGraphemeJoins(const uint8_t lead, const uint8_t trail) noexcept\n") + buf.WriteString("{\n") + buf.WriteString(" const auto l = lead & 15;\n") + buf.WriteString(" const auto t = trail & 15;\n") + buf.WriteString(" return s_joinRules[l][t];\n") + buf.WriteString("}\n") + + buf.WriteString("[[msvc::forceinline]] constexpr int ucdToCharacterWidth(const uint8_t val) noexcept\n") + buf.WriteString("{\n") + buf.WriteString(" return val >> 6;\n") + buf.WriteString("}\n") + + buf.WriteString("// clang-format on\n") + + _, _ = os.Stdout.WriteString(buf.String()) + return nil +} + +type TrieType uint32 + +func extractValuesFromUCD(ucd *UCD) ([]TrieType, error) { + values := make([]TrieType, 1114112) + fillRange(values, trieValue(cbOther, cwNarrow)) + + for _, group := range ucd.Repertoire.Group { + for _, char := range group.Char { + generalCategory := coalesce(char.GeneralCategory, group.GeneralCategory) + graphemeClusterBreak := coalesce(char.GraphemeClusterBreak, group.GraphemeClusterBreak) + indicConjunctBreak := coalesce(char.IndicConjunctBreak, group.IndicConjunctBreak) + extendedPictographic := coalesce(char.ExtendedPictographic, group.ExtendedPictographic) + eastAsian := coalesce(char.EastAsian, group.EastAsian) + + firstCp, lastCp := int(char.FirstCodepoint), int(char.LastCodepoint) + if char.Codepoint != 0 { + firstCp, lastCp = int(char.Codepoint), int(char.Codepoint) + } + + var ( + cb ClusterBreak + width CharacterWidth + ) + + switch graphemeClusterBreak { + case "XX": // Anything else + cb = cbOther + case "CR", "LF", "CN": // Carriage Return, Line Feed, Control + // We ignore GB3 which demands that CR × LF do not break apart, because + // a) these control characters won't normally reach our text storage + // b) otherwise we're in a raw write mode and historically conhost stores them in separate cells + cb = cbControl + case "EX", "SM": // Extend, SpacingMark + cb = cbExtend + case "PP": // Prepend + cb = cbPrepend + case "ZWJ": // Zero Width Joiner + cb = cbZeroWidthJoiner + case "RI": // Regional Indicator + cb = cbRegionalIndicator + case "L": // Hangul Syllable Type L + cb = cbHangulL + case "V": // Hangul Syllable Type V + cb = cbHangulV + case "T": // Hangul Syllable Type T + cb = cbHangulT + case "LV": // Hangul Syllable Type LV + cb = cbHangulLV + case "LVT": // Hangul Syllable Type LVT + cb = cbHangulLVT + default: + return nil, fmt.Errorf("unrecognized GCB %s for %U to %U", graphemeClusterBreak, firstCp, lastCp) + } + + if extendedPictographic == "Y" { + // Currently every single Extended_Pictographic codepoint happens to be GCB=XX. + // This is fantastic for us because it means we can stuff it into the ClusterBreak enum + // and treat it as an alias of EXTEND, but with the special GB11 properties. + if cb != cbOther { + return nil, fmt.Errorf("unexpected GCB %s with ExtPict=Y for %U to %U", graphemeClusterBreak, firstCp, lastCp) + } + cb = cbExtendedPictographic + } + + if indicConjunctBreak == "Linker" { + // Similarly here, we can treat it as an alias for EXTEND, but with the GB9c properties. + if cb != cbExtend { + return nil, fmt.Errorf("unexpected GCB %s with InCB=Linker for %U to %U", graphemeClusterBreak, firstCp, lastCp) + } + cb = cbConjunctLinker + } + + switch eastAsian { + case "N", "Na", "H": // neutral, narrow, half-width + width = cwNarrow + case "F", "W": // full-width, wide + width = cwWide + case "A": // ambiguous + width = cwAmbiguous + default: + return nil, fmt.Errorf("unrecognized ea %s for %U to %U", eastAsian, firstCp, lastCp) + } + + // There's no "ea" attribute for "zero width" so we need to do that ourselves. This matches: + // Mc: Mark, spacing combining + // Me: Mark, enclosing + // Mn: Mark, non-spacing + // Cf: Control, format + if strings.HasPrefix(generalCategory, "M") || generalCategory == "Cf" { + width = cwZeroWidth + } + + fillRange(values[firstCp:lastCp+1], trieValue(cb, width)) + } + } + + // Box-drawing and block elements are ambiguous according to their EastAsian attribute, + // but by convention terminals always consider them to be narrow. + fillRange(values[0x2500:0x259F+1], trieValue(cbOther, cwNarrow)) + // U+FE0F Variation Selector-16 is used to turn unqualified Emojis into qualified ones. + // By convention, this also turns them from being ambiguous, = narrow by default, into wide ones. + fillRange(values[0xFE0F:0xFE0F+1], trieValue(cbExtend, cwWide)) + + return values, nil +} + +func trieValue(cb ClusterBreak, width CharacterWidth) TrieType { + return TrieType(byte(cb) | byte(width)<<6) +} + +func coalesce(a, b string) string { + if a != "" { + return a + } + return b +} + +type Stage struct { + Values []TrieType + Shift int + Mask int + Bits int +} + +type Trie struct { + Stages []*Stage + TotalSize int +} + +func buildBestTrie(uncompressed []TrieType, minShift, maxShift, stages int) *Trie { + delta := maxShift - minShift + 1 + results := make(chan *Trie) + bestTrie := &Trie{TotalSize: math.MaxInt} + + iters := 1 + for i := 1; i < stages; i++ { + iters *= delta + } + + for i := 0; i < iters; i++ { + go func(i int) { + // Given minShift=2, maxShift=3, depth=3 this generates + // [2 2 2] + // [3 2 2] + // [2 3 2] + // [3 3 2] + // [2 2 3] + // [3 2 3] + // [2 3 3] + // [3 3 3] + shifts := make([]int, stages-1) + for j := range shifts { + shifts[j] = minShift + i%delta + i /= delta + } + results <- buildTrie(uncompressed, shifts) + }(i) + } + + for i := 0; i < iters; i++ { + t := <-results + if bestTrie.TotalSize > t.TotalSize { + bestTrie = t + } + } + return bestTrie +} + +func buildTrie(uncompressed []TrieType, shifts []int) *Trie { + var cumulativeShift int + var stages []*Stage + + for _, shift := range shifts { + chunkSize := 1 << shift + cache := map[string]TrieType{} + compressed := make([]TrieType, 0, len(uncompressed)/8) + offsets := make([]TrieType, 0, len(uncompressed)/chunkSize) + + for i := 0; i < len(uncompressed); i += chunkSize { + chunk := uncompressed[i:min(len(uncompressed), i+chunkSize)] + // Cast the integer slice to a string so that it can be hashed. + key := unsafe.String((*byte)(unsafe.Pointer(&chunk[0])), len(chunk)*int(unsafe.Sizeof(chunk[0]))) + offset, exists := cache[key] + + if !exists { + // For a 4-stage trie searching for existing occurrences of chunk in compressed yields a ~10% + // compression improvement. Checking for overlaps with the tail end of compressed yields another ~15%. + // FYI I tried to shuffle the order of compressed chunks but found that this has a negligible impact. + if existing := findExisting(compressed, chunk); existing != -1 { + offset = TrieType(existing) + cache[key] = offset + } else { + overlap := measureOverlap(compressed, chunk) + compressed = append(compressed, chunk[overlap:]...) + offset = TrieType(len(compressed) - len(chunk)) + cache[key] = offset + } + } + + offsets = append(offsets, offset) + } + + stages = append(stages, &Stage{ + Values: compressed, + Shift: cumulativeShift, + Mask: chunkSize - 1, + }) + + uncompressed = offsets + cumulativeShift += shift + } + + stages = append(stages, &Stage{ + Values: uncompressed, + Shift: cumulativeShift, + Mask: math.MaxInt32, + }) + slices.Reverse(stages) + + for _, s := range stages { + m := slices.Max(s.Values) + if m <= 0xff { + s.Bits = 8 + } else if m <= 0xffff { + s.Bits = 16 + } else { + s.Bits = 32 + } + } + + totalSize := 0 + for _, s := range stages { + totalSize += (s.Bits / 8) * len(s.Values) + } + + return &Trie{ + Stages: stages, + TotalSize: totalSize, + } +} + +// Finds needle in haystack. Returns -1 if it couldn't be found. +func findExisting(haystack, needle []TrieType) int { + if len(haystack) == 0 || len(needle) == 0 { + return -1 + } + + s := int(unsafe.Sizeof(TrieType(0))) + h := unsafe.Slice((*byte)(unsafe.Pointer(&haystack[0])), len(haystack)*s) + n := unsafe.Slice((*byte)(unsafe.Pointer(&needle[0])), len(needle)*s) + i := 0 + + for { + i = bytes.Index(h[i:], n) + if i == -1 { + return -1 + } + if i%s == 0 { + return i / s + } + } +} + +// Given two slices, this returns the amount by which prev's end overlaps with next's start. +// That is, given [0,1,2,3,4] and [2,3,4,5] this returns 3 because [2,3,4] is the "overlap". +func measureOverlap(prev, next []TrieType) int { + for overlap := min(len(prev), len(next)); overlap >= 0; overlap-- { + if slices.Equal(prev[len(prev)-overlap:], next[:overlap]) { + return overlap + } + } + return 0 +} + +func buildJoinRules() [cbCount][cbCount]bool { + // UAX #29 states: + // > Note: Testing two adjacent characters is insufficient for determining a boundary. + // + // I completely agree, but I really hate it. So this code trades off correctness for simplicity + // by using a simple lookup table anyway. Under most circumstances users won't notice, + // because as far as I can see this only behaves different for degenerate ("invalid") Unicode. + // It reduces our code complexity significantly and is way *way* faster. + // + // This is a great reference for the resulting table: + // https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.html + + // NOTE: We build the table in reverse, because rules with lower numbers take priority. + // (This is primarily relevant for GB9b vs. GB4.) + + // Otherwise, break everywhere. + // GB999: Any ÷ Any + var rules [cbCount][cbCount]bool + + // Do not break within emoji flag sequences. That is, do not break between regional indicator + // (RI) symbols if there is an odd number of RI characters before the break point. + // GB13: [^RI] (RI RI)* RI × RI + // GB12: sot (RI RI)* RI × RI + // + // We cheat here by not checking that the number of RIs is even. Meh! + rules[cbRegionalIndicator][cbRegionalIndicator] = true + + // Do not break within emoji modifier sequences or emoji zwj sequences. + // GB11: \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic} + // + // We cheat here by not checking that the ZWJ is preceded by an ExtPic. Meh! + rules[cbZeroWidthJoiner][cbExtendedPictographic] = true + + // Do not break within certain combinations with Indic_Conjunct_Break (InCB)=Linker. + // GB9c: \p{InCB=Consonant} [\p{InCB=Extend}\p{InCB=Linker}]* \p{InCB=Linker} [\p{InCB=Extend}\p{InCB=Linker}]* × \p{InCB=Consonant} + // + // I'm sure GB9c is great for these languages, but honestly the definition is complete whack. + // Just look at that chonker! This isn't a "cheat" like the others above, this is a reinvention: + // We treat it as having both ClusterBreak.PREPEND and ClusterBreak.EXTEND properties. + fillRange(rules[cbConjunctLinker][:], true) + for i := range rules { + rules[i][cbConjunctLinker] = true + } + + // Do not break before SpacingMarks, or after Prepend characters. + // GB9b: Prepend × + fillRange(rules[cbPrepend][:], true) + + // Do not break before SpacingMarks, or after Prepend characters. + // GB9a: × SpacingMark + // Do not break before extending characters or ZWJ. + // GB9: × (Extend | ZWJ) + for i := range rules { + // CodepointWidthDetector_gen.py sets SpacingMarks to ClusterBreak.EXTEND as well, + // since they're entirely identical to GB9's Extend. + rules[i][cbExtend] = true + rules[i][cbZeroWidthJoiner] = true + } + + // Do not break Hangul syllable sequences. + // GB8: (LVT | T) x T + rules[cbHangulLVT][cbHangulT] = true + rules[cbHangulT][cbHangulT] = true + // GB7: (LV | V) x (V | T) + rules[cbHangulLV][cbHangulT] = true + rules[cbHangulLV][cbHangulV] = true + rules[cbHangulV][cbHangulV] = true + rules[cbHangulV][cbHangulT] = true + // GB6: L x (L | V | LV | LVT) + rules[cbHangulL][cbHangulL] = true + rules[cbHangulL][cbHangulV] = true + rules[cbHangulL][cbHangulLV] = true + rules[cbHangulL][cbHangulLVT] = true + + // Do not break between a CR and LF. Otherwise, break before and after controls. + // GB5: ÷ (Control | CR | LF) + for i := range rules { + rules[i][cbControl] = false + } + // GB4: (Control | CR | LF) ÷ + fillRange(rules[cbControl][:], false) + + // We ignore GB3 which demands that CR × LF do not break apart, because + // a) these control characters won't normally reach our text storage + // b) otherwise we're in a raw write mode and historically conhost stores them in separate cells + + // We also ignore GB1 and GB2 which demand breaks at the start and end, + // because that's not part of the loops in GraphemeNext/Prev and not this table. + return rules +} + +func fillRange[T any](s []T, v T) { + for i := range s { + s[i] = v + } +} diff --git a/src/types/GlyphWidth.cpp b/src/types/GlyphWidth.cpp index 2f7d0578a45..d517c526d86 100644 --- a/src/types/GlyphWidth.cpp +++ b/src/types/GlyphWidth.cpp @@ -5,16 +5,14 @@ #include "inc/CodepointWidthDetector.hpp" #include "inc/GlyphWidth.hpp" -#pragma warning(suppress : 26426) -// TODO GH 2676 - remove warning suppression and decide what to do re: singleton instance of CodepointWidthDetector -static CodepointWidthDetector widthDetector; - // Function Description: // - determines if the glyph represented by the string of characters should be // wide or not. See CodepointWidthDetector::IsWide bool IsGlyphFullWidth(const std::wstring_view& glyph) noexcept { - return widthDetector.IsWide(glyph); + int width; + CodepointWidthDetector::Singleton().GraphemeNext(glyph, 0, &width); + return width == 2; } // Function Description: @@ -24,29 +22,3 @@ bool IsGlyphFullWidth(const wchar_t wch) noexcept { return wch < 0x80 ? false : IsGlyphFullWidth({ &wch, 1 }); } - -// Function Description: -// - Sets a function that should be used by the global CodepointWidthDetector -// as the fallback mechanism for determining a particular glyph's width, -// should the glyph be an ambiguous width. -// A Terminal could hook in a Renderer's IsGlyphWideByFont method as the -// fallback to ask the renderer for the glyph's width (for example). -// Arguments: -// - pfnFallback - the function to use as the fallback method. -// Return Value: -// - -void SetGlyphWidthFallback(std::function pfnFallback) noexcept -{ - widthDetector.SetFallbackMethod(std::move(pfnFallback)); -} - -// Function Description: -// - Forwards notification about font changing to glyph width detector -// Arguments: -// - -// Return Value: -// - -void NotifyGlyphWidthFontChanged() noexcept -{ - widthDetector.NotifyFontChanged(); -} diff --git a/src/types/inc/CodepointWidthDetector.hpp b/src/types/inc/CodepointWidthDetector.hpp index 79d0f3e8582..a9da79bd80d 100644 --- a/src/types/inc/CodepointWidthDetector.hpp +++ b/src/types/inc/CodepointWidthDetector.hpp @@ -1,37 +1,29 @@ -/*++ -Copyright (c) Microsoft Corporation +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. -Module Name: -- CodepointWidthDetector.hpp - -Abstract: -- Object used to measure the width of a codepoint when it's rendered +#pragma once -Author: -- Austin Diviness (AustDi) 18-May-2018 ---*/ +struct CodepointWidthDetector +{ + static CodepointWidthDetector& Singleton() noexcept; -#pragma once + CodepointWidthDetector(); -#include "convert.hpp" + size_t GraphemeNext(const std::wstring_view& str, size_t offset, int* width) noexcept; + size_t GraphemePrev(const std::wstring_view& str, size_t offset, int* width) noexcept; -// use to measure the width of a codepoint -class CodepointWidthDetector final -{ -public: - CodepointWidth GetWidth(const std::wstring_view& glyph) noexcept; - bool IsWide(const std::wstring_view& glyph) noexcept; + void SetEnableGraphemes(bool enable) noexcept; void SetFallbackMethod(std::function pfnFallback) noexcept; - void NotifyFontChanged() noexcept; - -#ifdef UNIT_TESTING - friend class CodepointWidthDetectorTests; -#endif + void ClearFallbackCache() noexcept; private: - uint8_t _lookupGlyphWidth(char32_t codepoint, const std::wstring_view& glyph) noexcept; - uint8_t _checkFallbackViaCache(char32_t codepoint, const std::wstring_view& glyph) noexcept; + __declspec(noinline) int _checkFallbackViaCache(char32_t codepoint) noexcept; + + __declspec(noinline) size_t _graphemeNextOld(const std::wstring_view& str, size_t offset, int* width) noexcept; + __declspec(noinline) size_t _graphemePrevOld(const std::wstring_view& str, size_t offset, int* width) noexcept; + int _getWidthOld(char32_t cp) noexcept; std::unordered_map _fallbackCache; std::function _pfnFallbackMethod; + std::atomic _enableGraphemes; }; diff --git a/src/types/inc/GlyphWidth.hpp b/src/types/inc/GlyphWidth.hpp index 11982bf5a0c..10b5a966b1c 100644 --- a/src/types/inc/GlyphWidth.hpp +++ b/src/types/inc/GlyphWidth.hpp @@ -10,12 +10,5 @@ Module Name: */ #pragma once -#include -#include - -#include "convert.hpp" - bool IsGlyphFullWidth(const std::wstring_view& glyph) noexcept; bool IsGlyphFullWidth(const wchar_t wch) noexcept; -void SetGlyphWidthFallback(std::function pfnFallback) noexcept; -void NotifyGlyphWidthFontChanged() noexcept; diff --git a/src/types/lib/types.vcxproj.filters b/src/types/lib/types.vcxproj.filters index efb65b2bc42..2ecdd122c9f 100644 --- a/src/types/lib/types.vcxproj.filters +++ b/src/types/lib/types.vcxproj.filters @@ -30,9 +30,6 @@ Source Files - - Source Files - Source Files @@ -60,6 +57,9 @@ Source Files + + Source Files + @@ -122,5 +122,6 @@ + - + \ No newline at end of file diff --git a/src/types/ut_types/CodepointWidthDetectorTests.cpp b/src/types/ut_types/CodepointWidthDetectorTests.cpp new file mode 100644 index 00000000000..dbc1333338e --- /dev/null +++ b/src/types/ut_types/CodepointWidthDetectorTests.cpp @@ -0,0 +1,1331 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#include "precomp.h" +#include "WexTestClass.h" + +#include "../types/inc/CodepointWidthDetector.hpp" + +// FYI at the time of writing you may have to generate this table in cmd with +// go run CodepointWidthDetectorTests_gen.go > temp.txt +// because PowerShell garbles Unicode text between piped commands. + +// Several test cases are commented out. They're due to us intentionally not handling some rules correctly: +// * GB3: CR × LF +// There's no point in us handling this, because in our implementation +// the text buffer won't ever see these control characters to begin with. +// * GB9c: \p{InCB=Consonant} [ \p{InCB=Extend} \p{InCB=Linker} ]* \p{InCB=Linker} [ \p{InCB=Extend} \p{InCB=Linker} ]* × \p{InCB=Consonant} +// aka: Do not break within certain combinations with InCB=Linker. +// Building a state machine for this hurts performance, and I'm not sure anyone will notice. +// Instead, our implementation just joins any preceding and following character if there's a Linker in between. +// * GB11: ExtPic Extend* ZWJ × ExtPic +// aka: ZWJs should only join if they're in between two ExtPics +// Same thing here. Any ExtPic joins with any preceding ZWJ. +// * GB12/13: [^RI] (RI RI)* RI × RI +// aka: RIs should have an even number. +// Same thing here. Any RI joins with any preceding RI. + +// Generated by CodepointWidthDetector_gen.go +// on 2024-03-21T15:48:04Z, from Unicode 15.1.0 +struct GraphemeBreakTest +{ + const wchar_t* comment; + const wchar_t* graphemes[4]; +}; +static constexpr GraphemeBreakTest s_graphemeBreakTests[] = { + { L"÷ [0.2] SPACE (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]", L" ", L" " }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L" \x0308", L" " }, + { L"÷ [0.2] SPACE (Other) ÷ [5.0] (CR) ÷ [0.3]", L" ", L"\r" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L" \x0308", L"\r" }, + { L"÷ [0.2] SPACE (Other) ÷ [5.0] (LF) ÷ [0.3]", L" ", L"\n" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L" \x0308", L"\n" }, + { L"÷ [0.2] SPACE (Other) ÷ [5.0] (Control) ÷ [0.3]", L" ", L"\x01" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L" \x0308", L"\x01" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L" \x034F" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L" \x0308\x034F" }, + { L"÷ [0.2] SPACE (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L" ", L"\U0001F1E6" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L" \x0308", L"\U0001F1E6" }, + { L"÷ [0.2] SPACE (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L" ", L"\x0600" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L" \x0308", L"\x0600" }, + { L"÷ [0.2] SPACE (Other) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L" \x0A03" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L" \x0308\x0A03" }, + { L"÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L" ", L"\x1100" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L" \x0308", L"\x1100" }, + { L"÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L" ", L"\x1160" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L" \x0308", L"\x1160" }, + { L"÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L" ", L"\x11A8" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L" \x0308", L"\x11A8" }, + { L"÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L" ", L"\xAC00" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L" \x0308", L"\xAC00" }, + { L"÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L" ", L"\xAC01" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L" \x0308", L"\xAC01" }, + { L"÷ [0.2] SPACE (Other) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L" \x0900" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L" \x0308\x0900" }, + { L"÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L" \x0903" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L" \x0308\x0903" }, + { L"÷ [0.2] SPACE (Other) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L" ", L"\x0904" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L" \x0308", L"\x0904" }, + { L"÷ [0.2] SPACE (Other) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L" ", L"\x0D4E" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L" \x0308", L"\x0D4E" }, + { L"÷ [0.2] SPACE (Other) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L" ", L"\x0915" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L" \x0308", L"\x0915" }, + { L"÷ [0.2] SPACE (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L" ", L"\x231A" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L" \x0308", L"\x231A" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L" \x0300" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L" \x0308\x0300" }, + { L"÷ [0.2] SPACE (Other) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L" \x093C" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L" \x0308\x093C" }, + { L"÷ [0.2] SPACE (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L" \x094D" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L" \x0308\x094D" }, + { L"÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L" \x200D" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L" \x0308\x200D" }, + { L"÷ [0.2] SPACE (Other) ÷ [999.0] (Other) ÷ [0.3]", L" ", L"\x0378" }, + { L"÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L" \x0308", L"\x0378" }, + { L"÷ [0.2] (CR) ÷ [4.0] SPACE (Other) ÷ [0.3]", L"\r", L" " }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\r", L"\x0308", L" " }, + { L"÷ [0.2] (CR) ÷ [4.0] (CR) ÷ [0.3]", L"\r", L"\r" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\r", L"\x0308", L"\r" }, + //{ L"÷ [0.2] (CR) × [3.0] (LF) ÷ [0.3]", L"\r\n" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\r", L"\x0308", L"\n" }, + { L"÷ [0.2] (CR) ÷ [4.0] (Control) ÷ [0.3]", L"\r", L"\x01" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\r", L"\x0308", L"\x01" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\r", L"\x034F" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\r", L"\x0308\x034F" }, + { L"÷ [0.2] (CR) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\r", L"\U0001F1E6" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\r", L"\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] (CR) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\r", L"\x0600" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\r", L"\x0308", L"\x0600" }, + { L"÷ [0.2] (CR) ÷ [4.0] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\r", L"\x0A03" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\r", L"\x0308\x0A03" }, + { L"÷ [0.2] (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\r", L"\x1100" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\r", L"\x0308", L"\x1100" }, + { L"÷ [0.2] (CR) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\r", L"\x1160" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\r", L"\x0308", L"\x1160" }, + { L"÷ [0.2] (CR) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\r", L"\x11A8" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\r", L"\x0308", L"\x11A8" }, + { L"÷ [0.2] (CR) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\r", L"\xAC00" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\r", L"\x0308", L"\xAC00" }, + { L"÷ [0.2] (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\r", L"\xAC01" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\r", L"\x0308", L"\xAC01" }, + { L"÷ [0.2] (CR) ÷ [4.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\r", L"\x0900" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\r", L"\x0308\x0900" }, + { L"÷ [0.2] (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\r", L"\x0903" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\r", L"\x0308\x0903" }, + { L"÷ [0.2] (CR) ÷ [4.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\r", L"\x0904" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\r", L"\x0308", L"\x0904" }, + { L"÷ [0.2] (CR) ÷ [4.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\r", L"\x0D4E" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\r", L"\x0308", L"\x0D4E" }, + { L"÷ [0.2] (CR) ÷ [4.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\r", L"\x0915" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\r", L"\x0308", L"\x0915" }, + { L"÷ [0.2] (CR) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]", L"\r", L"\x231A" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\r", L"\x0308", L"\x231A" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\r", L"\x0300" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\r", L"\x0308\x0300" }, + { L"÷ [0.2] (CR) ÷ [4.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\r", L"\x093C" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\r", L"\x0308\x093C" }, + { L"÷ [0.2] (CR) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\r", L"\x094D" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\r", L"\x0308\x094D" }, + { L"÷ [0.2] (CR) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\r", L"\x200D" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\r", L"\x0308\x200D" }, + { L"÷ [0.2] (CR) ÷ [4.0] (Other) ÷ [0.3]", L"\r", L"\x0378" }, + { L"÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\r", L"\x0308", L"\x0378" }, + { L"÷ [0.2] (LF) ÷ [4.0] SPACE (Other) ÷ [0.3]", L"\n", L" " }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\n", L"\x0308", L" " }, + { L"÷ [0.2] (LF) ÷ [4.0] (CR) ÷ [0.3]", L"\n", L"\r" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\n", L"\x0308", L"\r" }, + { L"÷ [0.2] (LF) ÷ [4.0] (LF) ÷ [0.3]", L"\n", L"\n" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\n", L"\x0308", L"\n" }, + { L"÷ [0.2] (LF) ÷ [4.0] (Control) ÷ [0.3]", L"\n", L"\x01" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\n", L"\x0308", L"\x01" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\n", L"\x034F" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\n", L"\x0308\x034F" }, + { L"÷ [0.2] (LF) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\n", L"\U0001F1E6" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\n", L"\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] (LF) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\n", L"\x0600" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\n", L"\x0308", L"\x0600" }, + { L"÷ [0.2] (LF) ÷ [4.0] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\n", L"\x0A03" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\n", L"\x0308\x0A03" }, + { L"÷ [0.2] (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\n", L"\x1100" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\n", L"\x0308", L"\x1100" }, + { L"÷ [0.2] (LF) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\n", L"\x1160" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\n", L"\x0308", L"\x1160" }, + { L"÷ [0.2] (LF) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\n", L"\x11A8" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\n", L"\x0308", L"\x11A8" }, + { L"÷ [0.2] (LF) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\n", L"\xAC00" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\n", L"\x0308", L"\xAC00" }, + { L"÷ [0.2] (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\n", L"\xAC01" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\n", L"\x0308", L"\xAC01" }, + { L"÷ [0.2] (LF) ÷ [4.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\n", L"\x0900" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\n", L"\x0308\x0900" }, + { L"÷ [0.2] (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\n", L"\x0903" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\n", L"\x0308\x0903" }, + { L"÷ [0.2] (LF) ÷ [4.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\n", L"\x0904" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\n", L"\x0308", L"\x0904" }, + { L"÷ [0.2] (LF) ÷ [4.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\n", L"\x0D4E" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\n", L"\x0308", L"\x0D4E" }, + { L"÷ [0.2] (LF) ÷ [4.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\n", L"\x0915" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\n", L"\x0308", L"\x0915" }, + { L"÷ [0.2] (LF) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]", L"\n", L"\x231A" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\n", L"\x0308", L"\x231A" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\n", L"\x0300" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\n", L"\x0308\x0300" }, + { L"÷ [0.2] (LF) ÷ [4.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\n", L"\x093C" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\n", L"\x0308\x093C" }, + { L"÷ [0.2] (LF) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\n", L"\x094D" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\n", L"\x0308\x094D" }, + { L"÷ [0.2] (LF) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\n", L"\x200D" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\n", L"\x0308\x200D" }, + { L"÷ [0.2] (LF) ÷ [4.0] (Other) ÷ [0.3]", L"\n", L"\x0378" }, + { L"÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\n", L"\x0308", L"\x0378" }, + { L"÷ [0.2] (Control) ÷ [4.0] SPACE (Other) ÷ [0.3]", L"\x01", L" " }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x01", L"\x0308", L" " }, + { L"÷ [0.2] (Control) ÷ [4.0] (CR) ÷ [0.3]", L"\x01", L"\r" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x01", L"\x0308", L"\r" }, + { L"÷ [0.2] (Control) ÷ [4.0] (LF) ÷ [0.3]", L"\x01", L"\n" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x01", L"\x0308", L"\n" }, + { L"÷ [0.2] (Control) ÷ [4.0] (Control) ÷ [0.3]", L"\x01", L"\x01" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x01", L"\x0308", L"\x01" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x01", L"\x034F" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x01", L"\x0308\x034F" }, + { L"÷ [0.2] (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x01", L"\U0001F1E6" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x01", L"\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] (Control) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x01", L"\x0600" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x01", L"\x0308", L"\x0600" }, + { L"÷ [0.2] (Control) ÷ [4.0] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x01", L"\x0A03" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x01", L"\x0308\x0A03" }, + { L"÷ [0.2] (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x01", L"\x1100" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x01", L"\x0308", L"\x1100" }, + { L"÷ [0.2] (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x01", L"\x1160" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x01", L"\x0308", L"\x1160" }, + { L"÷ [0.2] (Control) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x01", L"\x11A8" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x01", L"\x0308", L"\x11A8" }, + { L"÷ [0.2] (Control) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x01", L"\xAC00" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x01", L"\x0308", L"\xAC00" }, + { L"÷ [0.2] (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x01", L"\xAC01" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x01", L"\x0308", L"\xAC01" }, + { L"÷ [0.2] (Control) ÷ [4.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x01", L"\x0900" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x01", L"\x0308\x0900" }, + { L"÷ [0.2] (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x01", L"\x0903" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x01", L"\x0308\x0903" }, + { L"÷ [0.2] (Control) ÷ [4.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x01", L"\x0904" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x01", L"\x0308", L"\x0904" }, + { L"÷ [0.2] (Control) ÷ [4.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x01", L"\x0D4E" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x01", L"\x0308", L"\x0D4E" }, + { L"÷ [0.2] (Control) ÷ [4.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x01", L"\x0915" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x01", L"\x0308", L"\x0915" }, + { L"÷ [0.2] (Control) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]", L"\x01", L"\x231A" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x01", L"\x0308", L"\x231A" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x01", L"\x0300" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x01", L"\x0308\x0300" }, + { L"÷ [0.2] (Control) ÷ [4.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x01", L"\x093C" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x01", L"\x0308\x093C" }, + { L"÷ [0.2] (Control) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x01", L"\x094D" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x01", L"\x0308\x094D" }, + { L"÷ [0.2] (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x01", L"\x200D" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x01", L"\x0308\x200D" }, + { L"÷ [0.2] (Control) ÷ [4.0] (Other) ÷ [0.3]", L"\x01", L"\x0378" }, + { L"÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x01", L"\x0308", L"\x0378" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x034F", L" " }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x034F\x0308", L" " }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] (CR) ÷ [0.3]", L"\x034F", L"\r" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x034F\x0308", L"\r" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] (LF) ÷ [0.3]", L"\x034F", L"\n" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x034F\x0308", L"\n" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] (Control) ÷ [0.3]", L"\x034F", L"\x01" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x034F\x0308", L"\x01" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x034F\x034F" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x034F\x0308\x034F" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x034F", L"\U0001F1E6" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x034F\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x034F", L"\x0600" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x034F\x0308", L"\x0600" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x034F\x0A03" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x034F\x0308\x0A03" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x034F", L"\x1100" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x034F\x0308", L"\x1100" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x034F", L"\x1160" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x034F\x0308", L"\x1160" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x034F", L"\x11A8" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x034F\x0308", L"\x11A8" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x034F", L"\xAC00" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x034F\x0308", L"\xAC00" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x034F", L"\xAC01" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x034F\x0308", L"\xAC01" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x034F\x0900" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x034F\x0308\x0900" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x034F\x0903" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x034F\x0308\x0903" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x034F", L"\x0904" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x034F\x0308", L"\x0904" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x034F", L"\x0D4E" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x034F\x0308", L"\x0D4E" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x034F", L"\x0915" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x034F\x0308", L"\x0915" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x034F", L"\x231A" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x034F\x0308", L"\x231A" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x034F\x0300" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x034F\x0308\x0300" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x034F\x093C" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x034F\x0308\x093C" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x034F\x094D" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x034F\x0308\x094D" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x034F\x200D" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x034F\x0308\x200D" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] (Other) ÷ [0.3]", L"\x034F", L"\x0378" }, + { L"÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x034F\x0308", L"\x0378" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\U0001F1E6", L" " }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\U0001F1E6\x0308", L" " }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] (CR) ÷ [0.3]", L"\U0001F1E6", L"\r" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\U0001F1E6\x0308", L"\r" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] (LF) ÷ [0.3]", L"\U0001F1E6", L"\n" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\U0001F1E6\x0308", L"\n" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] (Control) ÷ [0.3]", L"\U0001F1E6", L"\x01" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\U0001F1E6\x0308", L"\x01" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\U0001F1E6\x034F" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\U0001F1E6\x0308\x034F" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\U0001F1E6\U0001F1E6" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\U0001F1E6\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\U0001F1E6", L"\x0600" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\U0001F1E6\x0308", L"\x0600" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\U0001F1E6\x0A03" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\U0001F1E6\x0308\x0A03" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\U0001F1E6", L"\x1100" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\U0001F1E6\x0308", L"\x1100" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\U0001F1E6", L"\x1160" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\U0001F1E6\x0308", L"\x1160" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\U0001F1E6", L"\x11A8" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\U0001F1E6\x0308", L"\x11A8" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\U0001F1E6", L"\xAC00" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\U0001F1E6\x0308", L"\xAC00" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\U0001F1E6", L"\xAC01" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\U0001F1E6\x0308", L"\xAC01" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\U0001F1E6\x0900" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\U0001F1E6\x0308\x0900" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\U0001F1E6\x0903" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\U0001F1E6\x0308\x0903" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\U0001F1E6", L"\x0904" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\U0001F1E6\x0308", L"\x0904" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\U0001F1E6", L"\x0D4E" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\U0001F1E6\x0308", L"\x0D4E" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\U0001F1E6", L"\x0915" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\U0001F1E6\x0308", L"\x0915" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\U0001F1E6", L"\x231A" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\U0001F1E6\x0308", L"\x231A" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\U0001F1E6\x0300" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\U0001F1E6\x0308\x0300" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\U0001F1E6\x093C" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\U0001F1E6\x0308\x093C" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\U0001F1E6\x094D" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\U0001F1E6\x0308\x094D" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\U0001F1E6\x200D" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\U0001F1E6\x0308\x200D" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] (Other) ÷ [0.3]", L"\U0001F1E6", L"\x0378" }, + { L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\U0001F1E6\x0308", L"\x0378" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] SPACE (Other) ÷ [0.3]", L"\x0600 " }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x0600\x0308", L" " }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] (CR) ÷ [0.3]", L"\x0600", L"\r" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x0600\x0308", L"\r" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] (LF) ÷ [0.3]", L"\x0600", L"\n" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x0600\x0308", L"\n" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] (Control) ÷ [0.3]", L"\x0600", L"\x01" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x0600\x0308", L"\x01" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x0600\x034F" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x0600\x0308\x034F" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x0600\U0001F1E6" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x0600\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x0600\x0600" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x0600\x0308", L"\x0600" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x0600\x0A03" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x0600\x0308\x0A03" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x0600\x1100" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x0600\x0308", L"\x1100" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x0600\x1160" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x0600\x0308", L"\x1160" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x0600\x11A8" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x0600\x0308", L"\x11A8" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x0600\xAC00" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x0600\x0308", L"\xAC00" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x0600\xAC01" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x0600\x0308", L"\xAC01" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0600\x0900" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0600\x0308\x0900" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x0600\x0903" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x0600\x0308\x0903" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x0600\x0904" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x0600\x0308", L"\x0904" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0600\x0D4E" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0600\x0308", L"\x0D4E" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0600\x0915" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0600\x0308", L"\x0915" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] WATCH (ExtPict) ÷ [0.3]", L"\x0600\x231A" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x0600\x0308", L"\x231A" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x0600\x0300" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x0600\x0308\x0300" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x0600\x093C" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x0600\x0308\x093C" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x0600\x094D" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x0600\x0308\x094D" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x0600\x200D" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x0600\x0308\x200D" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] (Other) ÷ [0.3]", L"\x0600\x0378" }, + { L"÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x0600\x0308", L"\x0378" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x0A03", L" " }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x0A03\x0308", L" " }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [5.0] (CR) ÷ [0.3]", L"\x0A03", L"\r" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x0A03\x0308", L"\r" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [5.0] (LF) ÷ [0.3]", L"\x0A03", L"\n" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x0A03\x0308", L"\n" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [5.0] (Control) ÷ [0.3]", L"\x0A03", L"\x01" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x0A03\x0308", L"\x01" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x0A03\x034F" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x0A03\x0308\x034F" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x0A03", L"\U0001F1E6" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x0A03\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x0A03", L"\x0600" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x0A03\x0308", L"\x0600" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x0A03\x0A03" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x0A03\x0308\x0A03" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x0A03", L"\x1100" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x0A03\x0308", L"\x1100" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x0A03", L"\x1160" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x0A03\x0308", L"\x1160" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x0A03", L"\x11A8" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x0A03\x0308", L"\x11A8" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x0A03", L"\xAC00" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x0A03\x0308", L"\xAC00" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x0A03", L"\xAC01" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x0A03\x0308", L"\xAC01" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0A03\x0900" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0A03\x0308\x0900" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x0A03\x0903" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x0A03\x0308\x0903" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x0A03", L"\x0904" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x0A03\x0308", L"\x0904" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0A03", L"\x0D4E" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0A03\x0308", L"\x0D4E" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0A03", L"\x0915" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0A03\x0308", L"\x0915" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x0A03", L"\x231A" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x0A03\x0308", L"\x231A" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x0A03\x0300" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x0A03\x0308\x0300" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x0A03\x093C" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x0A03\x0308\x093C" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x0A03\x094D" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x0A03\x0308\x094D" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x0A03\x200D" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x0A03\x0308\x200D" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] (Other) ÷ [0.3]", L"\x0A03", L"\x0378" }, + { L"÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x0A03\x0308", L"\x0378" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x1100", L" " }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x1100\x0308", L" " }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] (CR) ÷ [0.3]", L"\x1100", L"\r" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x1100\x0308", L"\r" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] (LF) ÷ [0.3]", L"\x1100", L"\n" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x1100\x0308", L"\n" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] (Control) ÷ [0.3]", L"\x1100", L"\x01" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x1100\x0308", L"\x01" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x1100\x034F" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x1100\x0308\x034F" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x1100", L"\U0001F1E6" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x1100\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x1100", L"\x0600" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x1100\x0308", L"\x0600" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x1100\x0A03" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x1100\x0308\x0A03" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x1100\x1100" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x1100\x0308", L"\x1100" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x1100\x1160" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x1100\x0308", L"\x1160" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x1100", L"\x11A8" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x1100\x0308", L"\x11A8" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x1100\xAC00" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x1100\x0308", L"\xAC00" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x1100\xAC01" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x1100\x0308", L"\xAC01" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x1100\x0900" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x1100\x0308\x0900" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x1100\x0903" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x1100\x0308\x0903" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x1100", L"\x0904" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x1100\x0308", L"\x0904" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x1100", L"\x0D4E" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x1100\x0308", L"\x0D4E" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x1100", L"\x0915" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x1100\x0308", L"\x0915" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x1100", L"\x231A" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x1100\x0308", L"\x231A" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x1100\x0300" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x1100\x0308\x0300" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x1100\x093C" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x1100\x0308\x093C" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x1100\x094D" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x1100\x0308\x094D" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x1100\x200D" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x1100\x0308\x200D" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] (Other) ÷ [0.3]", L"\x1100", L"\x0378" }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x1100\x0308", L"\x0378" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x1160", L" " }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x1160\x0308", L" " }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] (CR) ÷ [0.3]", L"\x1160", L"\r" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x1160\x0308", L"\r" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] (LF) ÷ [0.3]", L"\x1160", L"\n" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x1160\x0308", L"\n" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] (Control) ÷ [0.3]", L"\x1160", L"\x01" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x1160\x0308", L"\x01" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x1160\x034F" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x1160\x0308\x034F" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x1160", L"\U0001F1E6" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x1160\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x1160", L"\x0600" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x1160\x0308", L"\x0600" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x1160\x0A03" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x1160\x0308\x0A03" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x1160", L"\x1100" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x1160\x0308", L"\x1100" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x1160\x1160" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x1160\x0308", L"\x1160" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x1160\x11A8" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x1160\x0308", L"\x11A8" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x1160", L"\xAC00" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x1160\x0308", L"\xAC00" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x1160", L"\xAC01" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x1160\x0308", L"\xAC01" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x1160\x0900" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x1160\x0308\x0900" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x1160\x0903" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x1160\x0308\x0903" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x1160", L"\x0904" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x1160\x0308", L"\x0904" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x1160", L"\x0D4E" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x1160\x0308", L"\x0D4E" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x1160", L"\x0915" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x1160\x0308", L"\x0915" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x1160", L"\x231A" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x1160\x0308", L"\x231A" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x1160\x0300" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x1160\x0308\x0300" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x1160\x093C" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x1160\x0308\x093C" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x1160\x094D" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x1160\x0308\x094D" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x1160\x200D" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x1160\x0308\x200D" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] (Other) ÷ [0.3]", L"\x1160", L"\x0378" }, + { L"÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x1160\x0308", L"\x0378" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x11A8", L" " }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x11A8\x0308", L" " }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] (CR) ÷ [0.3]", L"\x11A8", L"\r" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x11A8\x0308", L"\r" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] (LF) ÷ [0.3]", L"\x11A8", L"\n" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x11A8\x0308", L"\n" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] (Control) ÷ [0.3]", L"\x11A8", L"\x01" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x11A8\x0308", L"\x01" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x11A8\x034F" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x11A8\x0308\x034F" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x11A8", L"\U0001F1E6" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x11A8\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x11A8", L"\x0600" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x11A8\x0308", L"\x0600" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x11A8\x0A03" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x11A8\x0308\x0A03" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x11A8", L"\x1100" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x11A8\x0308", L"\x1100" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x11A8", L"\x1160" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x11A8\x0308", L"\x1160" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x11A8\x11A8" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x11A8\x0308", L"\x11A8" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x11A8", L"\xAC00" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x11A8\x0308", L"\xAC00" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x11A8", L"\xAC01" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x11A8\x0308", L"\xAC01" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x11A8\x0900" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x11A8\x0308\x0900" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x11A8\x0903" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x11A8\x0308\x0903" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x11A8", L"\x0904" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x11A8\x0308", L"\x0904" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x11A8", L"\x0D4E" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x11A8\x0308", L"\x0D4E" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x11A8", L"\x0915" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x11A8\x0308", L"\x0915" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x11A8", L"\x231A" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x11A8\x0308", L"\x231A" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x11A8\x0300" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x11A8\x0308\x0300" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x11A8\x093C" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x11A8\x0308\x093C" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x11A8\x094D" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x11A8\x0308\x094D" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x11A8\x200D" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x11A8\x0308\x200D" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] (Other) ÷ [0.3]", L"\x11A8", L"\x0378" }, + { L"÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x11A8\x0308", L"\x0378" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\xAC00", L" " }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\xAC00\x0308", L" " }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] (CR) ÷ [0.3]", L"\xAC00", L"\r" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\xAC00\x0308", L"\r" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] (LF) ÷ [0.3]", L"\xAC00", L"\n" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\xAC00\x0308", L"\n" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] (Control) ÷ [0.3]", L"\xAC00", L"\x01" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\xAC00\x0308", L"\x01" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\xAC00\x034F" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\xAC00\x0308\x034F" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\xAC00", L"\U0001F1E6" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\xAC00\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\xAC00", L"\x0600" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\xAC00\x0308", L"\x0600" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\xAC00\x0A03" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\xAC00\x0308\x0A03" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\xAC00", L"\x1100" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\xAC00\x0308", L"\x1100" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\xAC00\x1160" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\xAC00\x0308", L"\x1160" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\xAC00\x11A8" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\xAC00\x0308", L"\x11A8" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\xAC00", L"\xAC00" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\xAC00\x0308", L"\xAC00" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\xAC00", L"\xAC01" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\xAC00\x0308", L"\xAC01" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\xAC00\x0900" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\xAC00\x0308\x0900" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\xAC00\x0903" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\xAC00\x0308\x0903" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\xAC00", L"\x0904" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\xAC00\x0308", L"\x0904" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\xAC00", L"\x0D4E" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\xAC00\x0308", L"\x0D4E" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\xAC00", L"\x0915" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\xAC00\x0308", L"\x0915" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\xAC00", L"\x231A" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\xAC00\x0308", L"\x231A" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\xAC00\x0300" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\xAC00\x0308\x0300" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\xAC00\x093C" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\xAC00\x0308\x093C" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\xAC00\x094D" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\xAC00\x0308\x094D" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\xAC00\x200D" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\xAC00\x0308\x200D" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] (Other) ÷ [0.3]", L"\xAC00", L"\x0378" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\xAC00\x0308", L"\x0378" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\xAC01", L" " }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\xAC01\x0308", L" " }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] (CR) ÷ [0.3]", L"\xAC01", L"\r" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\xAC01\x0308", L"\r" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] (LF) ÷ [0.3]", L"\xAC01", L"\n" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\xAC01\x0308", L"\n" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] (Control) ÷ [0.3]", L"\xAC01", L"\x01" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\xAC01\x0308", L"\x01" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\xAC01\x034F" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\xAC01\x0308\x034F" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\xAC01", L"\U0001F1E6" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\xAC01\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\xAC01", L"\x0600" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\xAC01\x0308", L"\x0600" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\xAC01\x0A03" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\xAC01\x0308\x0A03" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\xAC01", L"\x1100" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\xAC01\x0308", L"\x1100" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\xAC01", L"\x1160" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\xAC01\x0308", L"\x1160" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\xAC01\x11A8" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\xAC01\x0308", L"\x11A8" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\xAC01", L"\xAC00" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\xAC01\x0308", L"\xAC00" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\xAC01", L"\xAC01" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\xAC01\x0308", L"\xAC01" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\xAC01\x0900" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\xAC01\x0308\x0900" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\xAC01\x0903" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\xAC01\x0308\x0903" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\xAC01", L"\x0904" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\xAC01\x0308", L"\x0904" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\xAC01", L"\x0D4E" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\xAC01\x0308", L"\x0D4E" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\xAC01", L"\x0915" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\xAC01\x0308", L"\x0915" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\xAC01", L"\x231A" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\xAC01\x0308", L"\x231A" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\xAC01\x0300" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\xAC01\x0308\x0300" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\xAC01\x093C" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\xAC01\x0308\x093C" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\xAC01\x094D" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\xAC01\x0308\x094D" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\xAC01\x200D" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\xAC01\x0308\x200D" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] (Other) ÷ [0.3]", L"\xAC01", L"\x0378" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\xAC01\x0308", L"\x0378" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x0900", L" " }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x0900\x0308", L" " }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [5.0] (CR) ÷ [0.3]", L"\x0900", L"\r" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x0900\x0308", L"\r" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [5.0] (LF) ÷ [0.3]", L"\x0900", L"\n" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x0900\x0308", L"\n" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [5.0] (Control) ÷ [0.3]", L"\x0900", L"\x01" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x0900\x0308", L"\x01" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x0900\x034F" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x0900\x0308\x034F" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x0900", L"\U0001F1E6" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x0900\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x0900", L"\x0600" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x0900\x0308", L"\x0600" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x0900\x0A03" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x0900\x0308\x0A03" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x0900", L"\x1100" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x0900\x0308", L"\x1100" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x0900", L"\x1160" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x0900\x0308", L"\x1160" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x0900", L"\x11A8" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x0900\x0308", L"\x11A8" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x0900", L"\xAC00" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x0900\x0308", L"\xAC00" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x0900", L"\xAC01" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x0900\x0308", L"\xAC01" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0900\x0900" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0900\x0308\x0900" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x0900\x0903" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x0900\x0308\x0903" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x0900", L"\x0904" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x0900\x0308", L"\x0904" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0900", L"\x0D4E" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0900\x0308", L"\x0D4E" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0900", L"\x0915" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0900\x0308", L"\x0915" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x0900", L"\x231A" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x0900\x0308", L"\x231A" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x0900\x0300" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x0900\x0308\x0300" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x0900\x093C" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x0900\x0308\x093C" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x0900\x094D" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x0900\x0308\x094D" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x0900\x200D" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x0900\x0308\x200D" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] (Other) ÷ [0.3]", L"\x0900", L"\x0378" }, + { L"÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x0900\x0308", L"\x0378" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x0903", L" " }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x0903\x0308", L" " }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [5.0] (CR) ÷ [0.3]", L"\x0903", L"\r" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x0903\x0308", L"\r" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [5.0] (LF) ÷ [0.3]", L"\x0903", L"\n" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x0903\x0308", L"\n" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [5.0] (Control) ÷ [0.3]", L"\x0903", L"\x01" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x0903\x0308", L"\x01" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x0903\x034F" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x0903\x0308\x034F" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x0903", L"\U0001F1E6" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x0903\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x0903", L"\x0600" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x0903\x0308", L"\x0600" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x0903\x0A03" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x0903\x0308\x0A03" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x0903", L"\x1100" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x0903\x0308", L"\x1100" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x0903", L"\x1160" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x0903\x0308", L"\x1160" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x0903", L"\x11A8" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x0903\x0308", L"\x11A8" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x0903", L"\xAC00" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x0903\x0308", L"\xAC00" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x0903", L"\xAC01" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x0903\x0308", L"\xAC01" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0903\x0900" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0903\x0308\x0900" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x0903\x0903" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x0903\x0308\x0903" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x0903", L"\x0904" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x0903\x0308", L"\x0904" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0903", L"\x0D4E" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0903\x0308", L"\x0D4E" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0903", L"\x0915" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0903\x0308", L"\x0915" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x0903", L"\x231A" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x0903\x0308", L"\x231A" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x0903\x0300" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x0903\x0308\x0300" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x0903\x093C" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x0903\x0308\x093C" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x0903\x094D" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x0903\x0308\x094D" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x0903\x200D" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x0903\x0308\x200D" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] (Other) ÷ [0.3]", L"\x0903", L"\x0378" }, + { L"÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x0903\x0308", L"\x0378" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x0904", L" " }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x0904\x0308", L" " }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [5.0] (CR) ÷ [0.3]", L"\x0904", L"\r" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x0904\x0308", L"\r" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [5.0] (LF) ÷ [0.3]", L"\x0904", L"\n" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x0904\x0308", L"\n" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [5.0] (Control) ÷ [0.3]", L"\x0904", L"\x01" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x0904\x0308", L"\x01" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x0904\x034F" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x0904\x0308\x034F" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x0904", L"\U0001F1E6" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x0904\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x0904", L"\x0600" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x0904\x0308", L"\x0600" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x0904\x0A03" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x0904\x0308\x0A03" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x0904", L"\x1100" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x0904\x0308", L"\x1100" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x0904", L"\x1160" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x0904\x0308", L"\x1160" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x0904", L"\x11A8" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x0904\x0308", L"\x11A8" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x0904", L"\xAC00" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x0904\x0308", L"\xAC00" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x0904", L"\xAC01" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x0904\x0308", L"\xAC01" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0904\x0900" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0904\x0308\x0900" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x0904\x0903" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x0904\x0308\x0903" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x0904", L"\x0904" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x0904\x0308", L"\x0904" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0904", L"\x0D4E" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0904\x0308", L"\x0D4E" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0904", L"\x0915" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0904\x0308", L"\x0915" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x0904", L"\x231A" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x0904\x0308", L"\x231A" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x0904\x0300" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x0904\x0308\x0300" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x0904\x093C" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x0904\x0308\x093C" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x0904\x094D" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x0904\x0308\x094D" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x0904\x200D" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x0904\x0308\x200D" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] (Other) ÷ [0.3]", L"\x0904", L"\x0378" }, + { L"÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x0904\x0308", L"\x0378" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] SPACE (Other) ÷ [0.3]", L"\x0D4E " }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x0D4E\x0308", L" " }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [5.0] (CR) ÷ [0.3]", L"\x0D4E", L"\r" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x0D4E\x0308", L"\r" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [5.0] (LF) ÷ [0.3]", L"\x0D4E", L"\n" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x0D4E\x0308", L"\n" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [5.0] (Control) ÷ [0.3]", L"\x0D4E", L"\x01" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x0D4E\x0308", L"\x01" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x0D4E\x034F" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x0D4E\x0308\x034F" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x0D4E\U0001F1E6" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x0D4E\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x0D4E\x0600" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x0D4E\x0308", L"\x0600" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x0D4E\x0A03" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x0D4E\x0308\x0A03" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x0D4E\x1100" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x0D4E\x0308", L"\x1100" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x0D4E\x1160" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x0D4E\x0308", L"\x1160" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x0D4E\x11A8" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x0D4E\x0308", L"\x11A8" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x0D4E\xAC00" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x0D4E\x0308", L"\xAC00" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x0D4E\xAC01" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x0D4E\x0308", L"\xAC01" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0D4E\x0900" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0D4E\x0308\x0900" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x0D4E\x0903" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x0D4E\x0308\x0903" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x0D4E\x0904" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x0D4E\x0308", L"\x0904" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0D4E\x0D4E" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0D4E\x0308", L"\x0D4E" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0D4E\x0915" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0D4E\x0308", L"\x0915" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] WATCH (ExtPict) ÷ [0.3]", L"\x0D4E\x231A" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x0D4E\x0308", L"\x231A" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x0D4E\x0300" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x0D4E\x0308\x0300" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x0D4E\x093C" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x0D4E\x0308\x093C" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x0D4E\x094D" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x0D4E\x0308\x094D" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x0D4E\x200D" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x0D4E\x0308\x200D" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] (Other) ÷ [0.3]", L"\x0D4E\x0378" }, + { L"÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x0D4E\x0308", L"\x0378" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x0915", L" " }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x0915\x0308", L" " }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [5.0] (CR) ÷ [0.3]", L"\x0915", L"\r" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x0915\x0308", L"\r" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [5.0] (LF) ÷ [0.3]", L"\x0915", L"\n" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x0915\x0308", L"\n" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [5.0] (Control) ÷ [0.3]", L"\x0915", L"\x01" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x0915\x0308", L"\x01" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x0915\x034F" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x0915\x0308\x034F" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x0915", L"\U0001F1E6" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x0915\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x0915", L"\x0600" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x0915\x0308", L"\x0600" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x0915\x0A03" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x0915\x0308\x0A03" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x0915", L"\x1100" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x0915\x0308", L"\x1100" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x0915", L"\x1160" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x0915\x0308", L"\x1160" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x0915", L"\x11A8" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x0915\x0308", L"\x11A8" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x0915", L"\xAC00" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x0915\x0308", L"\xAC00" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x0915", L"\xAC01" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x0915\x0308", L"\xAC01" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0915\x0900" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0915\x0308\x0900" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x0915\x0903" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x0915\x0308\x0903" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x0915", L"\x0904" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x0915\x0308", L"\x0904" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0915", L"\x0D4E" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0915\x0308", L"\x0D4E" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0915", L"\x0915" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0915\x0308", L"\x0915" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x0915", L"\x231A" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x0915\x0308", L"\x231A" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x0915\x0300" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x0915\x0308\x0300" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x0915\x093C" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x0915\x0308\x093C" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x0915\x094D" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x0915\x0308\x094D" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x0915\x200D" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x0915\x0308\x200D" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] (Other) ÷ [0.3]", L"\x0915", L"\x0378" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x0915\x0308", L"\x0378" }, + { L"÷ [0.2] WATCH (ExtPict) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x231A", L" " }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x231A\x0308", L" " }, + { L"÷ [0.2] WATCH (ExtPict) ÷ [5.0] (CR) ÷ [0.3]", L"\x231A", L"\r" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x231A\x0308", L"\r" }, + { L"÷ [0.2] WATCH (ExtPict) ÷ [5.0] (LF) ÷ [0.3]", L"\x231A", L"\n" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x231A\x0308", L"\n" }, + { L"÷ [0.2] WATCH (ExtPict) ÷ [5.0] (Control) ÷ [0.3]", L"\x231A", L"\x01" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x231A\x0308", L"\x01" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x231A\x034F" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x231A\x0308\x034F" }, + { L"÷ [0.2] WATCH (ExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x231A", L"\U0001F1E6" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x231A\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] WATCH (ExtPict) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x231A", L"\x0600" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x231A\x0308", L"\x0600" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x231A\x0A03" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x231A\x0308\x0A03" }, + { L"÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x231A", L"\x1100" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x231A\x0308", L"\x1100" }, + { L"÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x231A", L"\x1160" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x231A\x0308", L"\x1160" }, + { L"÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x231A", L"\x11A8" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x231A\x0308", L"\x11A8" }, + { L"÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x231A", L"\xAC00" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x231A\x0308", L"\xAC00" }, + { L"÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x231A", L"\xAC01" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x231A\x0308", L"\xAC01" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x231A\x0900" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x231A\x0308\x0900" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x231A\x0903" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x231A\x0308\x0903" }, + { L"÷ [0.2] WATCH (ExtPict) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x231A", L"\x0904" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x231A\x0308", L"\x0904" }, + { L"÷ [0.2] WATCH (ExtPict) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x231A", L"\x0D4E" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x231A\x0308", L"\x0D4E" }, + { L"÷ [0.2] WATCH (ExtPict) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x231A", L"\x0915" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x231A\x0308", L"\x0915" }, + { L"÷ [0.2] WATCH (ExtPict) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x231A", L"\x231A" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x231A\x0308", L"\x231A" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x231A\x0300" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x231A\x0308\x0300" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x231A\x093C" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x231A\x0308\x093C" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x231A\x094D" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x231A\x0308\x094D" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x231A\x200D" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x231A\x0308\x200D" }, + { L"÷ [0.2] WATCH (ExtPict) ÷ [999.0] (Other) ÷ [0.3]", L"\x231A", L"\x0378" }, + { L"÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x231A\x0308", L"\x0378" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x0300", L" " }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x0300\x0308", L" " }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x0300", L"\r" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x0300\x0308", L"\r" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x0300", L"\n" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x0300\x0308", L"\n" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x0300", L"\x01" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x0300\x0308", L"\x01" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x0300\x034F" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x0300\x0308\x034F" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x0300", L"\U0001F1E6" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x0300\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x0300", L"\x0600" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x0300\x0308", L"\x0600" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x0300\x0A03" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x0300\x0308\x0A03" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x0300", L"\x1100" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x0300\x0308", L"\x1100" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x0300", L"\x1160" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x0300\x0308", L"\x1160" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x0300", L"\x11A8" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x0300\x0308", L"\x11A8" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x0300", L"\xAC00" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x0300\x0308", L"\xAC00" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x0300", L"\xAC01" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x0300\x0308", L"\xAC01" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0300\x0900" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0300\x0308\x0900" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x0300\x0903" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x0300\x0308\x0903" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x0300", L"\x0904" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x0300\x0308", L"\x0904" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0300", L"\x0D4E" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0300\x0308", L"\x0D4E" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0300", L"\x0915" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0300\x0308", L"\x0915" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x0300", L"\x231A" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x0300\x0308", L"\x231A" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x0300\x0300" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x0300\x0308\x0300" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x0300\x093C" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x0300\x0308\x093C" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x0300\x094D" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x0300\x0308\x094D" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x0300\x200D" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x0300\x0308\x200D" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x0300", L"\x0378" }, + { L"÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x0300\x0308", L"\x0378" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x093C", L" " }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x093C\x0308", L" " }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x093C", L"\r" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x093C\x0308", L"\r" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x093C", L"\n" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x093C\x0308", L"\n" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x093C", L"\x01" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x093C\x0308", L"\x01" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x093C\x034F" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x093C\x0308\x034F" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x093C", L"\U0001F1E6" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x093C\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x093C", L"\x0600" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x093C\x0308", L"\x0600" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x093C\x0A03" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x093C\x0308\x0A03" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x093C", L"\x1100" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x093C\x0308", L"\x1100" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x093C", L"\x1160" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x093C\x0308", L"\x1160" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x093C", L"\x11A8" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x093C\x0308", L"\x11A8" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x093C", L"\xAC00" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x093C\x0308", L"\xAC00" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x093C", L"\xAC01" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x093C\x0308", L"\xAC01" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x093C\x0900" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x093C\x0308\x0900" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x093C\x0903" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x093C\x0308\x0903" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x093C", L"\x0904" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x093C\x0308", L"\x0904" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x093C", L"\x0D4E" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x093C\x0308", L"\x0D4E" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x093C", L"\x0915" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x093C\x0308", L"\x0915" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x093C", L"\x231A" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x093C\x0308", L"\x231A" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x093C\x0300" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x093C\x0308\x0300" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x093C\x093C" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x093C\x0308\x093C" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x093C\x094D" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x093C\x0308\x094D" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x093C\x200D" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x093C\x0308\x200D" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x093C", L"\x0378" }, + { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x093C\x0308", L"\x0378" }, + //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x094D", L" " }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x094D\x0308", L" " }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x094D", L"\r" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x094D\x0308", L"\r" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x094D", L"\n" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x094D\x0308", L"\n" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x094D", L"\x01" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x094D\x0308", L"\x01" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x094D\x034F" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x094D\x0308\x034F" }, + //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x094D", L"\U0001F1E6" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x094D\x0308", L"\U0001F1E6" }, + //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x094D", L"\x0600" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x094D\x0308", L"\x0600" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x094D\x0A03" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x094D\x0308\x0A03" }, + //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x094D", L"\x1100" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x094D\x0308", L"\x1100" }, + //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x094D", L"\x1160" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x094D\x0308", L"\x1160" }, + //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x094D", L"\x11A8" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x094D\x0308", L"\x11A8" }, + //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x094D", L"\xAC00" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x094D\x0308", L"\xAC00" }, + //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x094D", L"\xAC01" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x094D\x0308", L"\xAC01" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x094D\x0900" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x094D\x0308\x0900" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x094D\x0903" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x094D\x0308\x0903" }, + //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x094D", L"\x0904" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x094D\x0308", L"\x0904" }, + //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x094D", L"\x0D4E" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x094D\x0308", L"\x0D4E" }, + //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x094D", L"\x0915" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x094D\x0308", L"\x0915" }, + //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x094D", L"\x231A" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x094D\x0308", L"\x231A" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x094D\x0300" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x094D\x0308\x0300" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x094D\x093C" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x094D\x0308\x093C" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x094D\x094D" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x094D\x0308\x094D" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x094D\x200D" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x094D\x0308\x200D" }, + //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x094D", L"\x0378" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x094D\x0308", L"\x0378" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x200D", L" " }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x200D\x0308", L" " }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x200D", L"\r" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x200D\x0308", L"\r" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x200D", L"\n" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x200D\x0308", L"\n" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x200D", L"\x01" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x200D\x0308", L"\x01" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x200D\x034F" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x200D\x0308\x034F" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x200D", L"\U0001F1E6" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x200D\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x200D", L"\x0600" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x200D\x0308", L"\x0600" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x200D\x0A03" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x200D\x0308\x0A03" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x200D", L"\x1100" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x200D\x0308", L"\x1100" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x200D", L"\x1160" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x200D\x0308", L"\x1160" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x200D", L"\x11A8" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x200D\x0308", L"\x11A8" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x200D", L"\xAC00" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x200D\x0308", L"\xAC00" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x200D", L"\xAC01" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x200D\x0308", L"\xAC01" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x200D\x0900" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x200D\x0308\x0900" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x200D\x0903" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x200D\x0308\x0903" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x200D", L"\x0904" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x200D\x0308", L"\x0904" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x200D", L"\x0D4E" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x200D\x0308", L"\x0D4E" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x200D", L"\x0915" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x200D\x0308", L"\x0915" }, + //{ L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x200D", L"\x231A" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x200D\x0308", L"\x231A" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x200D\x0300" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x200D\x0308\x0300" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x200D\x093C" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x200D\x0308\x093C" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x200D\x094D" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x200D\x0308\x094D" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x200D\x200D" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x200D\x0308\x200D" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x200D", L"\x0378" }, + { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x200D\x0308", L"\x0378" }, + { L"÷ [0.2] (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x0378", L" " }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x0378\x0308", L" " }, + { L"÷ [0.2] (Other) ÷ [5.0] (CR) ÷ [0.3]", L"\x0378", L"\r" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x0378\x0308", L"\r" }, + { L"÷ [0.2] (Other) ÷ [5.0] (LF) ÷ [0.3]", L"\x0378", L"\n" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3]", L"\x0378\x0308", L"\n" }, + { L"÷ [0.2] (Other) ÷ [5.0] (Control) ÷ [0.3]", L"\x0378", L"\x01" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x0378\x0308", L"\x01" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x0378\x034F" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x0378\x0308\x034F" }, + { L"÷ [0.2] (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x0378", L"\U0001F1E6" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x0378\x0308", L"\U0001F1E6" }, + { L"÷ [0.2] (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x0378", L"\x0600" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x0378\x0308", L"\x0600" }, + { L"÷ [0.2] (Other) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x0378\x0A03" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x0378\x0308\x0A03" }, + { L"÷ [0.2] (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x0378", L"\x1100" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x0378\x0308", L"\x1100" }, + { L"÷ [0.2] (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x0378", L"\x1160" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x0378\x0308", L"\x1160" }, + { L"÷ [0.2] (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x0378", L"\x11A8" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x0378\x0308", L"\x11A8" }, + { L"÷ [0.2] (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x0378", L"\xAC00" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x0378\x0308", L"\xAC00" }, + { L"÷ [0.2] (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x0378", L"\xAC01" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x0378\x0308", L"\xAC01" }, + { L"÷ [0.2] (Other) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0378\x0900" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0378\x0308\x0900" }, + { L"÷ [0.2] (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x0378\x0903" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x0378\x0308\x0903" }, + { L"÷ [0.2] (Other) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x0378", L"\x0904" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x0378\x0308", L"\x0904" }, + { L"÷ [0.2] (Other) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0378", L"\x0D4E" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x0378\x0308", L"\x0D4E" }, + { L"÷ [0.2] (Other) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0378", L"\x0915" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0378\x0308", L"\x0915" }, + { L"÷ [0.2] (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x0378", L"\x231A" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x0378\x0308", L"\x231A" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x0378\x0300" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x0378\x0308\x0300" }, + { L"÷ [0.2] (Other) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x0378\x093C" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]", L"\x0378\x0308\x093C" }, + { L"÷ [0.2] (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x0378\x094D" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x0378\x0308\x094D" }, + { L"÷ [0.2] (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x0378\x200D" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x0378\x0308\x200D" }, + { L"÷ [0.2] (Other) ÷ [999.0] (Other) ÷ [0.3]", L"\x0378", L"\x0378" }, + { L"÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x0378\x0308", L"\x0378" }, + //{ L"÷ [0.2] (CR) × [3.0] (LF) ÷ [4.0] LATIN SMALL LETTER A (Other) ÷ [5.0] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [0.3]", L"\r\n", L"a", L"\n", L"\x0308" }, + { L"÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [0.3]", L"a\x0308" }, + { L"÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC LETTER NOON (Other) ÷ [0.3]", L" \x200D", L"\x0646" }, + { L"÷ [0.2] ARABIC LETTER NOON (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x0646\x200D", L" " }, + { L"÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x1100\x1100" }, + { L"÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\xAC00\x11A8", L"\x1100" }, + { L"÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\xAC01\x11A8", L"\x1100" }, + //{ L"÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]", L"\U0001F1E6\U0001F1E7", L"\U0001F1E8", L"b" }, + //{ L"÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]", L"a", L"\U0001F1E6\U0001F1E7", L"\U0001F1E8", L"b" }, + { L"÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]", L"a", L"\U0001F1E6\U0001F1E7\x200D", L"\U0001F1E8", L"b" }, + { L"÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]", L"a", L"\U0001F1E6\x200D", L"\U0001F1E7\U0001F1E8", L"b" }, + //{ L"÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]", L"a", L"\U0001F1E6\U0001F1E7", L"\U0001F1E8\U0001F1E9", L"b" }, + { L"÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"a\x200D" }, + { L"÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]", L"a\x0308", L"b" }, + { L"÷ [0.2] LATIN SMALL LETTER A (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]", L"a\x0903", L"b" }, + { L"÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) × [9.2] LATIN SMALL LETTER B (Other) ÷ [0.3]", L"a", L"\x0600b" }, + { L"÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3]", L"\U0001F476\U0001F3FF", L"\U0001F476" }, + { L"÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3]", L"a\U0001F3FF", L"\U0001F476" }, + { L"÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]", L"a\U0001F3FF", L"\U0001F476\x200D\U0001F6D1" }, + { L"÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [0.3]", L"\U0001F476\U0001F3FF\x0308\x200D\U0001F476\U0001F3FF" }, + { L"÷ [0.2] OCTAGONAL SIGN (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]", L"\U0001F6D1\x200D\U0001F6D1" }, + //{ L"÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]", L"a\x200D", L"\U0001F6D1" }, + { L"÷ [0.2] UPPER BLADE SCISSORS (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]", L"\x2701\x200D\x2701" }, + //{ L"÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]", L"a\x200D", L"\x2701" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0915", L"\x0924" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0915\x094D\x0924" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0915\x094D\x094D\x0924" }, + //{ L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0915\x094D\x200D\x0924" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0915\x093C\x200D\x094D\x0924" }, + //{ L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0915\x093C\x094D\x200D\x0924" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER YA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0915\x094D\x0924\x094D\x092F" }, + //{ L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER A (Other) ÷ [0.3]", L"\x0915\x094D", L"a" }, + //{ L"÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"a\x094D", L"\x0924" }, + //{ L"÷ [0.2] QUESTION MARK (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"?\x094D", L"\x0924" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0915\x094D\x094D\x0924" }, +}; + +class CodepointWidthDetectorTests +{ + TEST_CLASS(CodepointWidthDetectorTests); + + TEST_METHOD(GraphemeBreakTest) + { + WEX::TestExecution::DisableVerifyExceptions disableVerifyExceptions{}; + WEX::TestExecution::SetVerifyOutput verifyOutputScope{ WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures }; + + auto& cwd = CodepointWidthDetector::Singleton(); + std::vector expected; + std::vector actual; + std::wstring text; + + for (const auto& test : s_graphemeBreakTests) + { + expected.clear(); + for (const auto g : test.graphemes) + { + if (!g) + { + break; + } + expected.emplace_back(g); + } + + text.clear(); + for (const auto& g : expected) + { + text.append(g); + } + + actual.clear(); + for (size_t beg = 0; beg < text.size();) + { + const auto end = cwd.GraphemeNext(text, beg, nullptr); + actual.emplace_back(std::wstring_view{ text }.substr(beg, end - beg)); + beg = end; + } + VERIFY_ARE_EQUAL(expected, actual, test.comment); + + actual.clear(); + for (size_t end = text.size(); end > 0;) + { + const auto beg = cwd.GraphemePrev(text, end, nullptr); + actual.emplace_back(std::wstring_view{ text }.substr(beg, end - beg)); + end = beg; + } + std::reverse(actual.begin(), actual.end()); + VERIFY_ARE_EQUAL(expected, actual, test.comment); + } + } + + TEST_METHOD(BasicGraphemes) + { + static constexpr std::wstring_view text{ L"a\u0363e\u0364\u0364i\u0365" }; + + auto& cwd = CodepointWidthDetector::Singleton(); + + const std::vector expectedAdvances{ 2, 3, 2 }; + const std::vector expectedWidths{ 1, 1, 1 }; + std::vector actualAdvances; + std::vector actualWidths; + + for (size_t beg = 0; beg < text.size();) + { + int width; + const auto end = cwd.GraphemeNext(text, beg, &width); + actualAdvances.emplace_back(end - beg); + actualWidths.emplace_back(width); + beg = end; + } + + VERIFY_ARE_EQUAL(expectedAdvances, actualAdvances); + VERIFY_ARE_EQUAL(expectedWidths, actualWidths); + + actualAdvances.clear(); + actualWidths.clear(); + + for (size_t end = text.size(); end > 0;) + { + int width; + const auto beg = cwd.GraphemePrev(text, end, &width); + actualAdvances.emplace_back(end - beg); + actualWidths.emplace_back(width); + end = beg; + } + + std::reverse(actualAdvances.begin(), actualAdvances.end()); + std::reverse(actualWidths.begin(), actualWidths.end()); + + VERIFY_ARE_EQUAL(expectedAdvances, actualAdvances); + VERIFY_ARE_EQUAL(expectedWidths, actualWidths); + } + + TEST_METHOD(DevanagariConjunctLinker) + { + static constexpr std::wstring_view text{ L"\u0915\u094D\u094D\u0924" }; + + auto& cwd = CodepointWidthDetector::Singleton(); + + int width; + const auto end = cwd.GraphemeNext(text, 0, &width); + VERIFY_ARE_EQUAL(4, end); + VERIFY_ARE_EQUAL(2, width); + } +}; diff --git a/src/types/ut_types/CodepointWidthDetectorTests_gen.go b/src/types/ut_types/CodepointWidthDetectorTests_gen.go new file mode 100644 index 00000000000..93d4597b136 --- /dev/null +++ b/src/types/ut_types/CodepointWidthDetectorTests_gen.go @@ -0,0 +1,139 @@ +package main + +import ( + "bufio" + "bytes" + "fmt" + "io" + "net/http" + "os" + "regexp" + "strconv" + "strings" + "time" +) + +func main() { + if err := run(); err != nil { + fmt.Println(err) + os.Exit(1) + } +} + +func run() error { + data, err := fetch(`https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt`) + if err != nil { + return err + } + + testString := strings.Builder{} + + scanner := bufio.NewScanner(bytes.NewReader(data)) + firstLine := true + + for scanner.Scan() { + line := scanner.Text() + test, comment, _ := strings.Cut(line, "#") + test = strings.TrimSpace(test) + comment = strings.TrimSpace(comment) + + if firstLine { + firstLine = false + + re, err := regexp.Compile(`^GraphemeBreakTest-(\d+\.\d+\.\d+)\.txt$`) + if err != nil { + return err + } + + m := re.FindStringSubmatch(comment) + if len(m) == 0 { + return fmt.Errorf("failed to find version number, got: %s", comment) + } + + _, _ = fmt.Fprintf(&testString, "// Generated by CodepointWidthDetector_gen.go\n") + _, _ = fmt.Fprintf(&testString, "// on %s, from Unicode %s\n", time.Now().UTC().Format(time.RFC3339), m[1]) + + testString.WriteString("struct GraphemeBreakTest\n") + testString.WriteString("{\n") + testString.WriteString(" const wchar_t* comment;\n") + testString.WriteString(" const wchar_t* graphemes[4];\n") + testString.WriteString("};\n") + testString.WriteString("static constexpr GraphemeBreakTest s_graphemeBreakTests[] = {\n") + } + // # GraphemeBreakTest-15.1.0.txt + + if len(test) == 0 || len(comment) == 0 { + continue + } + + graphemes := strings.Split(test, "÷") + for i, g := range graphemes { + graphemes[i] = strings.TrimSpace(g) + } + + testString.WriteString("") + _, _ = fmt.Fprintf(&testString, ` { L"%s"`, comment) + + for _, g := range graphemes { + if len(g) == 0 { + continue + } + + testString.WriteString(`, L"`) + + codepoints := strings.Split(g, "×") + for _, c := range codepoints { + i, err := strconv.ParseUint(strings.TrimSpace(c), 16, 32) + if err != nil { + return err + } + if i == 0x07 { + testString.WriteString(`\a`) + } else if i == 0x08 { + testString.WriteString(`\b`) + } else if i == 0x09 { + testString.WriteString(`\t`) + } else if i == 0x0A { + testString.WriteString(`\n`) + } else if i == 0x0B { + testString.WriteString(`\v`) + } else if i == 0x0C { + testString.WriteString(`\f`) + } else if i == 0x0D { + testString.WriteString(`\r`) + } else if i >= 0x20 && i <= 0x7e { + testString.WriteRune(rune(i)) + } else if i <= 0xff { + _, _ = fmt.Fprintf(&testString, `\x%02X`, i) + } else if i <= 0xffff { + _, _ = fmt.Fprintf(&testString, `\x%04X`, i) + } else { + _, _ = fmt.Fprintf(&testString, `\U%08X`, i) + } + } + + testString.WriteString(`"`) + } + + testString.WriteString(" },\n") + } + + testString.WriteString("};\n") + _, _ = os.Stdout.WriteString(testString.String()) + return nil +} + +func fetch(url string) ([]byte, error) { + res, err := http.Get(url) + if err != nil { + return nil, err + } + defer res.Body.Close() + + body, err := io.ReadAll(res.Body) + if err != nil { + return nil, err + } + + return body, nil +} diff --git a/src/types/ut_types/Types.Unit.Tests.vcxproj b/src/types/ut_types/Types.Unit.Tests.vcxproj index 45a486c63b8..1f42421f4da 100644 --- a/src/types/ut_types/Types.Unit.Tests.vcxproj +++ b/src/types/ut_types/Types.Unit.Tests.vcxproj @@ -11,6 +11,7 @@ + diff --git a/src/types/ut_types/sources b/src/types/ut_types/sources index 71fce07be35..f5741649fe9 100644 --- a/src/types/ut_types/sources +++ b/src/types/ut_types/sources @@ -14,6 +14,7 @@ DLLDEF = SOURCES = \ $(SOURCES) \ + CodepointWidthDetectorTests.cpp \ UuidTests.cpp \ UtilsTests.cpp \ DefaultResource.rc \ From ac1048632a690bc4eefa0df20ad83ad69d79501c Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 22 Mar 2024 17:38:25 +0100 Subject: [PATCH 02/14] Fix the build, Some fine-tuning --- src/types/CodepointWidthDetector.cpp | 346 ++--- src/types/CodepointWidthDetector_gen.go | 1123 ++++++++--------- src/types/inc/CodepointWidthDetector.hpp | 2 +- .../ut_types/CodepointWidthDetectorTests.cpp | 8 +- .../CodepointWidthDetectorTests_gen.go | 283 +++-- 5 files changed, 879 insertions(+), 883 deletions(-) diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index 15218f04c5c..0e28488d610 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -4,12 +4,11 @@ #include "precomp.h" #include "inc/CodepointWidthDetector.hpp" -// We know that ucdToClusterBreak() can never return anything >=CB_COUNT, but the compiler doesn't. +// I was trying to minimize dependencies in this code so that it's easier to port to other terminal applications. +// Also, it has to be fast / have minimal overhead, since it potentially parses every single input character. #pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4). +#pragma warning(disable : 26472) // Don't use a static_cast for arithmetic conversions. Use brace initialization, gsl::narrow_cast or gsl::narrow (type.1). #pragma warning(disable : 26482) // Only index into arrays using constant expressions (bounds.2). -// ICU doesn't play well with MSVC's linter. -#pragma warning(disable : 26476) // Expression/symbol '...' uses a naked union '...' with multiple type pointers: Use variant instead (type.7). -#pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). namespace { @@ -360,7 +359,7 @@ namespace // https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.html // Generated by CodepointWidthDetector_gen.go -// on 2024-03-20T21:19:52Z, from Unicode 15.1.0, 8414 bytes +// on 2024-03-22T16:37:43Z, from Unicode 15.1.0, 8277 bytes // clang-format off static constexpr uint16_t s_stage1[] = { 0x0000, 0x0020, 0x0040, 0x0060, 0x0080, 0x009f, 0x00bf, 0x00ca, 0x00ca, 0x00d3, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, @@ -665,7 +664,7 @@ static constexpr uint8_t s_stage4[] = { 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x41, 0x40, 0xc0, 0x40, 0x40, 0xc0, 0x40, 0x40, - 0xc0, 0x4c, 0xc0, 0x40, 0x40, 0x01, 0xcc, 0x40, + 0xc0, 0x4b, 0xc0, 0x40, 0x40, 0x01, 0xcb, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xc0, 0x40, 0x40, 0x40, 0x40, 0x40, 0xc0, 0xc0, @@ -679,193 +678,196 @@ static constexpr uint8_t s_stage4[] = { 0x40, 0xc0, 0x40, 0x40, 0x40, 0x40, 0xc0, 0x40, 0x40, 0xc0, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0x40, 0xc0, 0x40, - 0xc0, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, - 0x02, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, + 0xc0, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, - 0x40, 0x40, 0x40, 0x02, 0x02, 0x02, 0x02, 0x02, - 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x02, 0x02, - 0x02, 0x02, 0x02, 0x02, 0x02, 0x40, 0x02, 0x02, - 0x40, 0x02, 0x02, 0x40, 0x02, 0x03, 0x03, 0x03, - 0x03, 0x03, 0x03, 0x40, 0x40, 0x02, 0x02, 0x02, - 0x40, 0x01, 0x40, 0x40, 0x40, 0x02, 0x40, 0x40, - 0x40, 0x40, 0x40, 0x40, 0x40, 0x02, 0x02, 0x02, - 0x02, 0x02, 0x03, 0x40, 0x02, 0x02, 0x02, 0x02, - 0x02, 0x40, 0x40, 0x02, 0x40, 0x02, 0x02, 0x02, - 0x02, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - 0x03, 0x40, 0x40, 0x40, 0x40, 0x40, 0x02, 0x40, - 0x40, 0x02, 0x02, 0x40, 0x02, 0x02, 0x02, 0x02, - 0x02, 0x40, 0x02, 0x02, 0x02, 0x40, 0x40, 0x40, - 0x40, 0x03, 0x03, 0x40, 0x40, 0x40, 0x40, 0x40, - 0x40, 0x02, 0x02, 0x03, 0x02, 0x02, 0x02, 0x02, - 0x02, 0x40, 0x40, 0x02, 0x02, 0x02, 0x40, 0x02, - 0x02, 0x02, 0x02, 0x02, 0x0b, 0x02, 0x02, 0x40, - 0x40, 0x02, 0x02, 0x40, 0x40, 0x40, 0x40, 0x02, - 0x40, 0x02, 0x02, 0x40, 0x40, 0x02, 0x02, 0x0b, - 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x02, 0x40, - 0x02, 0x02, 0x02, 0x40, 0x40, 0x40, 0x40, 0x02, - 0x40, 0x40, 0x02, 0x02, 0x02, 0x40, 0x40, 0x40, - 0x02, 0x40, 0x40, 0x02, 0x02, 0x40, 0x02, 0x02, - 0x0b, 0x40, 0x40, 0x02, 0x02, 0x02, 0x40, 0x40, - 0x40, 0x02, 0x02, 0x40, 0x02, 0x02, 0x02, 0x0b, - 0x40, 0x40, 0x40, 0x40, 0x40, 0x02, 0x02, 0x40, - 0x02, 0x02, 0x02, 0x0b, 0x43, 0x40, 0x40, 0x02, - 0x40, 0x40, 0x40, 0x40, 0x02, 0x02, 0x02, 0x02, - 0x02, 0x40, 0x02, 0x40, 0x42, 0x02, 0x02, 0x02, - 0x02, 0x40, 0x40, 0x40, 0x00, 0x00, 0x02, 0x02, - 0x02, 0x02, 0x02, 0x02, 0x40, 0x40, 0x40, 0x40, - 0x02, 0x02, 0x40, 0x00, 0x00, 0x00, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x40, 0x07, 0x07, + 0x40, 0x07, 0x07, 0x40, 0x07, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x40, 0x40, 0x07, 0x07, 0x07, + 0x40, 0x01, 0x40, 0x40, 0x40, 0x07, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x09, 0x40, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x40, 0x40, 0x07, 0x40, 0x07, 0x07, 0x07, + 0x07, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x09, 0x40, 0x40, 0x40, 0x40, 0x40, 0x07, 0x40, + 0x40, 0x07, 0x07, 0x40, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x40, 0x07, 0x07, 0x07, 0x40, 0x40, 0x40, + 0x40, 0x09, 0x09, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x07, 0x07, 0x09, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x40, 0x40, 0x07, 0x07, 0x07, 0x40, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x0a, 0x07, 0x07, 0x40, + 0x40, 0x07, 0x07, 0x40, 0x40, 0x40, 0x40, 0x07, + 0x40, 0x07, 0x07, 0x40, 0x40, 0x07, 0x07, 0x0a, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x07, 0x40, + 0x07, 0x07, 0x07, 0x40, 0x40, 0x40, 0x40, 0x07, + 0x40, 0x40, 0x07, 0x07, 0x07, 0x40, 0x40, 0x40, + 0x07, 0x40, 0x40, 0x07, 0x07, 0x40, 0x07, 0x07, + 0x0a, 0x40, 0x40, 0x07, 0x07, 0x07, 0x40, 0x40, + 0x40, 0x07, 0x07, 0x40, 0x07, 0x07, 0x07, 0x0a, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x07, 0x07, 0x40, + 0x07, 0x07, 0x07, 0x0a, 0x49, 0x40, 0x40, 0x07, + 0x40, 0x40, 0x40, 0x40, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x40, 0x07, 0x40, 0x47, 0x07, 0x07, 0x07, + 0x07, 0x40, 0x40, 0x40, 0x00, 0x00, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x40, 0x40, 0x40, 0x40, + 0x07, 0x07, 0x40, 0x00, 0x00, 0x00, 0x40, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x40, - 0x02, 0x00, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x02, 0x40, 0x00, 0x40, 0x40, 0x00, - 0x00, 0x00, 0x02, 0x40, 0x40, 0x86, 0x86, 0x86, - 0x86, 0x86, 0x86, 0x86, 0x86, 0x47, 0x47, 0x47, - 0x47, 0x47, 0x47, 0x47, 0x47, 0x48, 0x48, 0x48, - 0x48, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40, 0x02, - 0x02, 0x02, 0x02, 0x40, 0x40, 0x40, 0x02, 0x02, - 0x02, 0x01, 0x02, 0x00, 0x02, 0x00, 0x00, 0x02, - 0x02, 0x02, 0x40, 0x40, 0x40, 0x01, 0x02, 0x04, + 0x07, 0x00, 0x07, 0x07, 0x07, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x07, 0x40, 0x00, 0x40, 0x40, 0x00, + 0x00, 0x00, 0x07, 0x40, 0x40, 0x82, 0x82, 0x82, + 0x82, 0x82, 0x82, 0x82, 0x82, 0x43, 0x43, 0x43, + 0x43, 0x43, 0x43, 0x43, 0x43, 0x44, 0x44, 0x44, + 0x44, 0x44, 0x44, 0x44, 0x44, 0x40, 0x40, 0x07, + 0x07, 0x07, 0x07, 0x40, 0x40, 0x40, 0x07, 0x07, + 0x07, 0x01, 0x07, 0x00, 0x07, 0x00, 0x00, 0x07, + 0x07, 0x07, 0x40, 0x40, 0x40, 0x01, 0x07, 0x08, 0x01, 0x01, 0xc0, 0x40, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0x40, 0x40, 0xc0, 0xc0, 0x40, 0x40, 0x41, 0x41, 0x01, 0x01, 0x01, 0x01, 0x01, 0x40, 0xc0, 0x40, 0xc0, 0xc0, 0x40, 0xc0, 0x40, 0x40, 0x40, - 0xc0, 0x4c, 0x40, 0xc0, 0x40, 0x4c, 0x40, 0x40, + 0xc0, 0x4b, 0x40, 0xc0, 0x40, 0x4b, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x01, 0x01, 0x01, 0x01, 0x01, 0x41, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x40, 0x40, 0x40, 0xc0, 0x40, 0xc0, - 0x40, 0x40, 0xc0, 0xcc, 0x40, 0x40, 0x40, 0xc0, - 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0xcc, 0xcc, 0xcc, - 0xcc, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x4c, - 0x4c, 0x40, 0x40, 0x40, 0x40, 0x40, 0xc0, 0x40, + 0x40, 0x40, 0xc0, 0xcb, 0x40, 0x40, 0x40, 0xc0, + 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0xcb, 0xcb, 0xcb, + 0xcb, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x4b, + 0x4b, 0x40, 0x40, 0x40, 0x40, 0x40, 0xc0, 0x40, 0xc0, 0x40, 0x40, 0x40, 0xc0, 0x40, 0x40, 0xc0, 0x40, 0x40, 0x40, 0xc0, 0x40, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x40, 0xc0, 0x40, 0x40, 0x40, 0xc0, 0x40, 0x40, 0x40, 0xc0, 0xc0, 0x40, 0x40, - 0xc0, 0xc0, 0xc0, 0xc0, 0x40, 0x40, 0x8c, 0x8c, - 0x40, 0x40, 0x40, 0x40, 0x4c, 0x80, 0x80, 0x40, - 0x40, 0x40, 0x40, 0x40, 0x4c, 0x40, 0x40, 0x40, - 0x40, 0x40, 0x40, 0x40, 0x4c, 0x40, 0x8c, 0x8c, - 0x8c, 0x8c, 0x4c, 0x4c, 0x4c, 0x8c, 0x4c, 0x4c, - 0x8c, 0x40, 0x40, 0x40, 0x40, 0x4c, 0x4c, 0x4c, - 0x40, 0x40, 0x40, 0x40, 0x40, 0xc0, 0xc0, 0xcc, - 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x4c, 0x4c, 0x40, - 0x40, 0x40, 0x40, 0xc0, 0xc0, 0x40, 0x40, 0xcc, + 0xc0, 0xc0, 0xc0, 0xc0, 0x40, 0x40, 0x8b, 0x8b, + 0x40, 0x40, 0x40, 0x40, 0x4b, 0x80, 0x80, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x4b, 0x40, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x4b, 0x40, 0x8b, 0x8b, + 0x8b, 0x8b, 0x4b, 0x4b, 0x4b, 0x8b, 0x4b, 0x4b, + 0x8b, 0x40, 0x40, 0x40, 0x40, 0x4b, 0x4b, 0x4b, + 0x40, 0x40, 0x40, 0x40, 0x40, 0xc0, 0xc0, 0xcb, + 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x4b, 0x4b, 0x40, + 0x40, 0x40, 0x40, 0xc0, 0xc0, 0x40, 0x40, 0xcb, 0xc0, 0x40, 0x40, 0x40, 0x40, 0xc0, 0xc0, 0x40, 0x40, 0xc0, 0x40, 0x40, 0xc0, 0xc0, 0x40, 0x40, - 0x40, 0x4c, 0x4c, 0x8c, 0x8c, 0x40, 0x4c, 0x4c, - 0x4c, 0x4c, 0x4c, 0xcc, 0xc0, 0x4c, 0xcc, 0x4c, - 0x4c, 0x4c, 0x4c, 0xcc, 0xcc, 0x4c, 0x4c, 0x4c, - 0x40, 0x8c, 0x8c, 0x4c, 0x4c, 0x4c, 0x4c, 0xcc, - 0x4c, 0xcc, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, - 0x4c, 0x4c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, - 0x8c, 0x8c, 0x4c, 0x4c, 0x4c, 0x4c, 0xcc, 0xcc, - 0x4c, 0xcc, 0xcc, 0xcc, 0x4c, 0xcc, 0xcc, 0x4c, - 0xcc, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x40, - 0x40, 0x4c, 0x4c, 0x4c, 0x8c, 0x4c, 0x4c, 0x4c, - 0x4c, 0x4c, 0x4c, 0xcc, 0xcc, 0x4c, 0x4c, 0x8c, - 0x8c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x8c, 0x8c, - 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x8c, 0xcc, - 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x8c, - 0x8c, 0xcc, 0x8c, 0xcc, 0xcc, 0x8c, 0xcc, 0xcc, - 0x8c, 0xcc, 0xcc, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, - 0x8c, 0x40, 0x40, 0x4c, 0x4c, 0x4c, 0x40, 0x4c, - 0x40, 0x4c, 0x40, 0x8c, 0x40, 0x40, 0x40, 0x40, - 0x40, 0x40, 0x40, 0x4c, 0x40, 0x40, 0x4c, 0x40, - 0x40, 0x40, 0x40, 0x8c, 0x40, 0x8c, 0x40, 0x40, - 0x40, 0x8c, 0x8c, 0x8c, 0x40, 0x8c, 0x40, 0x40, - 0x40, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x40, 0x40, - 0x40, 0x40, 0x40, 0x8c, 0x8c, 0x8c, 0x40, 0x40, - 0x40, 0x40, 0x40, 0x40, 0x40, 0x8c, 0x40, 0x40, - 0x40, 0x40, 0x40, 0x4c, 0x4c, 0x4c, 0x40, 0x40, - 0x40, 0x8c, 0x8c, 0x40, 0x40, 0x40, 0x40, 0x8c, + 0x40, 0x4b, 0x4b, 0x8b, 0x8b, 0x40, 0x4b, 0x4b, + 0x4b, 0x4b, 0x4b, 0xcb, 0xc0, 0x4b, 0xcb, 0x4b, + 0x4b, 0x4b, 0x4b, 0xcb, 0xcb, 0x4b, 0x4b, 0x4b, + 0x40, 0x8b, 0x8b, 0x4b, 0x4b, 0x4b, 0x4b, 0xcb, + 0x4b, 0xcb, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, + 0x4b, 0x4b, 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, + 0x8b, 0x8b, 0x4b, 0x4b, 0x4b, 0x4b, 0xcb, 0xcb, + 0x4b, 0xcb, 0xcb, 0xcb, 0x4b, 0xcb, 0xcb, 0x4b, + 0xcb, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x40, + 0x40, 0x4b, 0x4b, 0x4b, 0x8b, 0x4b, 0x4b, 0x4b, + 0x4b, 0x4b, 0x4b, 0xcb, 0xcb, 0x4b, 0x4b, 0x8b, + 0x8b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x8b, 0x8b, + 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0x8b, 0xcb, + 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0x8b, + 0x8b, 0xcb, 0x8b, 0xcb, 0xcb, 0x8b, 0xcb, 0xcb, + 0x8b, 0xcb, 0xcb, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, + 0x8b, 0x40, 0x40, 0x4b, 0x4b, 0x4b, 0x40, 0x4b, + 0x40, 0x4b, 0x40, 0x8b, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x4b, 0x40, 0x40, 0x4b, 0x40, + 0x40, 0x40, 0x40, 0x8b, 0x40, 0x8b, 0x40, 0x40, + 0x40, 0x8b, 0x8b, 0x8b, 0x40, 0x8b, 0x40, 0x40, + 0x40, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x8b, 0x8b, 0x8b, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x8b, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x4b, 0x4b, 0x4b, 0x40, 0x40, + 0x40, 0x8b, 0x8b, 0x40, 0x40, 0x40, 0x40, 0x8b, 0xc0, 0xc0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x40, 0x80, 0x80, 0x80, 0x80, 0x80, 0x40, 0x40, 0x40, 0x40, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x40, 0x40, 0x80, 0x80, 0x02, 0x02, - 0x02, 0x02, 0x02, 0x02, 0x8c, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x8c, 0x80, 0x40, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x40, 0x02, - 0x02, 0x80, 0x80, 0x80, 0x80, 0x80, 0x40, 0x40, + 0x80, 0x80, 0x40, 0x40, 0x80, 0x80, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x8b, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x8b, 0x80, 0x40, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x40, 0x07, + 0x07, 0x80, 0x80, 0x80, 0x80, 0x80, 0x40, 0x40, 0x40, 0x40, 0x40, 0x80, 0x80, 0x80, 0x40, 0x40, - 0x40, 0x40, 0x40, 0x40, 0x40, 0x80, 0x8c, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x40, 0x40, 0x02, - 0x40, 0x40, 0x40, 0x02, 0x40, 0x40, 0x40, 0x40, - 0x02, 0x40, 0x40, 0x40, 0x86, 0x86, 0x86, 0x86, - 0x86, 0x40, 0x40, 0x40, 0x40, 0x02, 0x02, 0x40, - 0x40, 0x40, 0x00, 0x02, 0x00, 0x40, 0x40, 0x02, - 0x40, 0x02, 0x02, 0x02, 0x40, 0x40, 0x02, 0x40, - 0x40, 0x40, 0x40, 0x40, 0x02, 0x02, 0x02, 0x40, - 0x02, 0x02, 0x40, 0x40, 0x89, 0x8a, 0x8a, 0x8a, - 0x8a, 0x8a, 0x8a, 0x8a, 0x8a, 0x89, 0x8a, 0x8a, - 0x8a, 0x8a, 0x40, 0x40, 0x40, 0x40, 0x47, 0x47, - 0x47, 0x47, 0x47, 0x47, 0x47, 0x40, 0x40, 0x40, - 0x48, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40, 0x40, - 0x40, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, - 0x82, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - 0x01, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x42, - 0x42, 0x41, 0x01, 0x01, 0x01, 0x40, 0xc0, 0x40, - 0x40, 0x02, 0x02, 0x02, 0x40, 0x40, 0x03, 0x40, - 0x40, 0x02, 0x40, 0x43, 0x43, 0x40, 0x40, 0x40, - 0x40, 0x02, 0x02, 0x02, 0x02, 0x40, 0x02, 0x02, - 0x40, 0x40, 0x02, 0x02, 0x40, 0x40, 0x02, 0x02, - 0x02, 0x02, 0x43, 0x02, 0x02, 0x40, 0x40, 0x40, - 0x40, 0x02, 0x02, 0x43, 0x02, 0x02, 0x02, 0x02, - 0x40, 0x40, 0x40, 0x40, 0x43, 0x43, 0x43, 0x43, - 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x40, 0x02, - 0x02, 0x02, 0x02, 0x02, 0x02, 0x40, 0x40, 0x02, - 0x40, 0x02, 0x02, 0x40, 0x02, 0x02, 0x02, 0x02, - 0x02, 0x02, 0x43, 0x02, 0x40, 0x40, 0x40, 0x40, - 0x80, 0x80, 0x80, 0x80, 0x02, 0x40, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x80, 0x8b, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x40, 0x40, 0x07, + 0x40, 0x40, 0x40, 0x07, 0x40, 0x40, 0x40, 0x40, + 0x07, 0x40, 0x40, 0x40, 0x82, 0x82, 0x82, 0x82, + 0x82, 0x40, 0x40, 0x40, 0x40, 0x07, 0x07, 0x40, + 0x40, 0x40, 0x00, 0x07, 0x00, 0x40, 0x40, 0x07, + 0x40, 0x07, 0x07, 0x07, 0x40, 0x40, 0x07, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x07, 0x07, 0x07, 0x40, + 0x07, 0x07, 0x40, 0x40, 0x85, 0x86, 0x86, 0x86, + 0x86, 0x86, 0x86, 0x86, 0x86, 0x85, 0x86, 0x86, + 0x86, 0x86, 0x40, 0x40, 0x40, 0x40, 0x43, 0x43, + 0x43, 0x43, 0x43, 0x43, 0x43, 0x40, 0x40, 0x40, + 0x44, 0x44, 0x44, 0x44, 0x44, 0x40, 0x40, 0x40, + 0x40, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x87, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x01, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x47, + 0x47, 0x41, 0x01, 0x01, 0x01, 0x40, 0xc0, 0x40, + 0x40, 0x07, 0x07, 0x07, 0x40, 0x40, 0x09, 0x40, + 0x40, 0x07, 0x40, 0x49, 0x49, 0x40, 0x40, 0x40, + 0x40, 0x07, 0x07, 0x07, 0x07, 0x40, 0x07, 0x07, + 0x40, 0x40, 0x07, 0x07, 0x40, 0x40, 0x07, 0x07, + 0x07, 0x07, 0x49, 0x07, 0x07, 0x40, 0x40, 0x40, + 0x40, 0x07, 0x07, 0x49, 0x07, 0x07, 0x07, 0x07, + 0x40, 0x40, 0x40, 0x40, 0x49, 0x49, 0x49, 0x49, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x40, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x40, 0x40, 0x07, + 0x40, 0x07, 0x07, 0x40, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x49, 0x07, 0x40, 0x40, 0x40, 0x40, + 0x80, 0x80, 0x80, 0x80, 0x07, 0x40, 0x40, 0x40, 0x80, 0x80, 0x80, 0x80, 0x40, 0x80, 0x80, 0x40, 0x40, 0x80, 0x40, 0x40, 0x40, 0x40, 0x40, 0x80, 0x80, 0x80, 0x40, 0x40, 0x80, 0x40, 0x40, 0x01, - 0x01, 0x01, 0x01, 0x40, 0x40, 0x40, 0x40, 0x02, - 0x02, 0x02, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, - 0x02, 0x02, 0x02, 0x02, 0x40, 0x40, 0x02, 0x02, - 0x02, 0x02, 0x02, 0x4c, 0x4c, 0x4c, 0x4c, 0x8c, - 0x4c, 0x4c, 0x4c, 0xc0, 0xc0, 0xc0, 0x40, 0x40, - 0x4c, 0x4c, 0x4c, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, - 0xc0, 0x40, 0x4c, 0xc0, 0xc0, 0x40, 0x40, 0x4c, - 0x4c, 0x4c, 0x4c, 0xcc, 0xcc, 0xc0, 0xc0, 0xc0, - 0xc0, 0xc0, 0xc0, 0xcc, 0xcc, 0xc0, 0xc0, 0xc0, - 0xc0, 0xc0, 0xc0, 0x8c, 0xc0, 0x8c, 0x8c, 0x8c, - 0x8c, 0x8c, 0x8c, 0x8c, 0xc0, 0xc0, 0xc0, 0xc0, - 0xc0, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x45, - 0x45, 0x45, 0x45, 0x45, 0x45, 0x45, 0x45, 0x80, - 0x8c, 0x8c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x80, - 0x80, 0x8c, 0x80, 0x80, 0x80, 0x80, 0x80, 0x8c, - 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x80, 0x4c, 0x4c, - 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x8c, 0x8c, 0x4c, - 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x8c, 0x8c, - 0x8c, 0x8c, 0x8c, 0x8c, 0x4c, 0x8c, 0x8c, 0x8c, - 0x4c, 0x4c, 0x4c, 0x4c, 0x8c, 0x4c, 0x4c, 0x4c, - 0x8c, 0x4c, 0x4c, 0x4c, 0x8c, 0x8c, 0x8c, 0x82, - 0x82, 0x82, 0x82, 0x82, 0x8c, 0x4c, 0x8c, 0x8c, - 0x8c, 0x8c, 0x8c, 0x8c, 0x4c, 0x4c, 0x8c, 0x8c, - 0x8c, 0x8c, 0x8c, 0x8c, 0x40, 0x40, 0x4c, 0x4c, - 0x4c, 0x8c, 0x8c, 0x8c, 0x8c, 0x4c, 0x40, 0x40, - 0x40, 0x40, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, - 0x40, 0x40, 0x40, 0x40, 0x8c, 0x8c, 0x8c, 0x8c, - 0x40, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x40, - 0x8c, 0x41, 0x01, 0x41, 0x41, 0x41, 0x41, 0x41, + 0x01, 0x01, 0x01, 0x40, 0x40, 0x40, 0x40, 0x07, + 0x07, 0x07, 0x01, 0x01, 0x01, 0x01, 0x01, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x40, 0x40, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x4b, 0x4b, 0x4b, 0x4b, 0x8b, + 0x4b, 0x4b, 0x4b, 0xc0, 0xc0, 0xc0, 0x40, 0x40, + 0x4b, 0x4b, 0x4b, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, + 0xc0, 0x40, 0x4b, 0xc0, 0xc0, 0x40, 0x40, 0x4b, + 0x4b, 0x4b, 0x4b, 0xcb, 0xcb, 0xc0, 0xc0, 0xc0, + 0xc0, 0xc0, 0xc0, 0xcb, 0xcb, 0xc0, 0xc0, 0xc0, + 0xc0, 0xc0, 0xc0, 0x8b, 0xc0, 0x8b, 0x8b, 0x8b, + 0x8b, 0x8b, 0x8b, 0x8b, 0xc0, 0xc0, 0xc0, 0xc0, + 0xc0, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4c, + 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x80, + 0x8b, 0x8b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x80, + 0x80, 0x8b, 0x80, 0x80, 0x80, 0x80, 0x80, 0x8b, + 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, 0x80, 0x4b, 0x4b, + 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x8b, 0x8b, 0x4b, + 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x8b, 0x8b, + 0x8b, 0x8b, 0x8b, 0x8b, 0x4b, 0x8b, 0x8b, 0x8b, + 0x4b, 0x4b, 0x4b, 0x4b, 0x8b, 0x4b, 0x4b, 0x4b, + 0x8b, 0x4b, 0x4b, 0x4b, 0x8b, 0x8b, 0x8b, 0x87, + 0x87, 0x87, 0x87, 0x87, 0x8b, 0x4b, 0x8b, 0x8b, + 0x8b, 0x8b, 0x8b, 0x8b, 0x4b, 0x4b, 0x8b, 0x8b, + 0x8b, 0x8b, 0x8b, 0x8b, 0x40, 0x40, 0x4b, 0x4b, + 0x4b, 0x8b, 0x8b, 0x8b, 0x8b, 0x4b, 0x40, 0x40, + 0x40, 0x40, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, + 0x40, 0x40, 0x40, 0x40, 0x8b, 0x8b, 0x8b, 0x8b, + 0x40, 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, 0x40, + 0x8b, 0x41, 0x01, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x40, 0x40, }; -static constexpr uint8_t s_joinRules[13][13] = { - 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, - 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, - 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, - 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, - 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, - 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, - 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, - 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, - 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, +static constexpr uint16_t s_joinRules[] = { + 0b0000010110000000, + 0b0000000000000000, + 0b0000010111101100, + 0b0000010110011000, + 0b0000010110010000, + 0b0000010110011000, + 0b0000010110010000, + 0b0000010110000000, + 0b0000110110000000, + 0b0001111111111101, + 0b0001111111111101, + 0b0000010110000000, + 0b0001010110000000, + 0b0000000000000000, + 0b0000000000000000, + 0b0000000000000000, }; -[[msvc::forceinline]] constexpr uint8_t ucdLookup(const char32_t cp) noexcept +constexpr uint8_t ucdLookup(const char32_t cp) noexcept { const auto s1 = s_stage1[cp >> 11]; const auto s2 = s_stage2[s1 + ((cp >> 6) & 31)]; @@ -873,13 +875,11 @@ static constexpr uint8_t s_joinRules[13][13] = { const auto s4 = s_stage4[s3 + ((cp >> 0) & 7)]; return s4; } -[[msvc::forceinline]] constexpr uint8_t ucdGraphemeJoins(const uint8_t lead, const uint8_t trail) noexcept +constexpr bool ucdGraphemeJoins(const uint8_t lead, const uint8_t trail) noexcept { - const auto l = lead & 15; - const auto t = trail & 15; - return s_joinRules[l][t]; + return s_joinRules[lead & 15] & (1 << (trail & 15)); } -[[msvc::forceinline]] constexpr int ucdToCharacterWidth(const uint8_t val) noexcept +constexpr int ucdToCharacterWidth(const uint8_t val) noexcept { return val >> 6; } @@ -946,7 +946,7 @@ CodepointWidthDetector& CodepointWidthDetector::Singleton() noexcept return s_codepointWidthDetector; } -CodepointWidthDetector::CodepointWidthDetector() : +CodepointWidthDetector::CodepointWidthDetector() noexcept : _enableGraphemes{ Feature_Graphemes::IsEnabled() } { } diff --git a/src/types/CodepointWidthDetector_gen.go b/src/types/CodepointWidthDetector_gen.go index 15741f972c5..ad58a5a39c2 100644 --- a/src/types/CodepointWidthDetector_gen.go +++ b/src/types/CodepointWidthDetector_gen.go @@ -1,566 +1,557 @@ -package main - -import ( - "bytes" - "encoding/xml" - "fmt" - "math" - "os" - "slices" - "strconv" - "strings" - "time" - "unsafe" -) - -type CharacterWidth int - -const ( - cwZeroWidth CharacterWidth = iota - cwNarrow - cwWide - cwAmbiguous -) - -type ClusterBreak int - -const ( - cbOther ClusterBreak = iota - cbControl - cbExtend - cbPrepend - cbZeroWidthJoiner - cbRegionalIndicator - cbHangulL - cbHangulV - cbHangulT - cbHangulLV - cbHangulLVT - cbConjunctLinker - cbExtendedPictographic - - cbCount -) - -type HexInt int - -func (h *HexInt) UnmarshalXMLAttr(attr xml.Attr) error { - v, err := strconv.ParseUint(attr.Value, 16, 32) - if err != nil { - return err - } - *h = HexInt(v) - return nil -} - -type UCD struct { - Description string `xml:"description"` - Repertoire struct { - Group []struct { - GeneralCategory string `xml:"gc,attr"` - GraphemeClusterBreak string `xml:"GCB,attr"` - IndicConjunctBreak string `xml:"InCB,attr"` - ExtendedPictographic string `xml:"ExtPict,attr"` - EastAsian string `xml:"ea,attr"` - - // This maps the following tags: - // , , , - Char []struct { - Codepoint HexInt `xml:"cp,attr"` - FirstCodepoint HexInt `xml:"first-cp,attr"` - LastCodepoint HexInt `xml:"last-cp,attr"` - - GeneralCategory string `xml:"gc,attr"` - GraphemeClusterBreak string `xml:"GCB,attr"` - IndicConjunctBreak string `xml:"InCB,attr"` - ExtendedPictographic string `xml:"ExtPict,attr"` - EastAsian string `xml:"ea,attr"` - } `xml:",any"` - } `xml:"group"` - } `xml:"repertoire"` -} - -func main() { - if err := run(); err != nil { - fmt.Println(err) - os.Exit(1) - } -} - -func run() error { - if len(os.Args) <= 1 { - fmt.Println(`Usage: - go run CodepointWidthDetector_gen.go - -You can download the latest ucd.nounihan.grouped.xml from: - https://www.unicode.org/Public/UCD/latest/ucdxml/ucd.nounihan.grouped.zip`) - os.Exit(1) - } - - data, err := os.ReadFile(os.Args[1]) - if err != nil { - return fmt.Errorf("failed to read XML: %w", err) - } - - ucd := &UCD{} - err = xml.Unmarshal(data, ucd) - if err != nil { - return fmt.Errorf("failed to parse XML: %w", err) - } - - values, err := extractValuesFromUCD(ucd) - if err != nil { - return err - } - - // More stages = Less size. The trajectory roughly follows a+b*c^stages, where c < 1. - // 4 still gives ~30% savings over 3 stages and going beyond 5 gives diminishing returns (<10%). - trie := buildBestTrie(values, 2, 8, 4) - rules := buildJoinRules() - - for cp, expected := range values { - var v TrieType - for _, s := range trie.Stages { - v = s.Values[int(v)+((cp>>s.Shift)&s.Mask)] - } - if v != expected { - return fmt.Errorf("trie sanity check failed for %U", cp) - } - } - - buf := &strings.Builder{} - - _, _ = fmt.Fprintf(buf, "// Generated by CodepointWidthDetector_gen.go\n") - _, _ = fmt.Fprintf(buf, "// on %v, from %s, %d bytes\n", time.Now().UTC().Format(time.RFC3339), ucd.Description, trie.TotalSize+len(rules)*len(rules)) - _, _ = fmt.Fprintf(buf, "// clang-format off\n") - - for i, s := range trie.Stages { - width := 16 - if i != 0 { - width = s.Mask + 1 - } - _, _ = fmt.Fprintf(buf, "static constexpr uint%d_t s_stage%d[] = {", s.Bits, i+1) - for j, value := range s.Values { - if j%width == 0 { - buf.WriteString("\n ") - } - _, _ = fmt.Fprintf(buf, " 0x%0*x,", s.Bits/4, value) - } - buf.WriteString("\n};\n") - } - - _, _ = fmt.Fprintf(buf, "static constexpr uint8_t s_joinRules[%d][%d] = {", len(rules), len(rules)) - for _, row := range rules { - buf.WriteString("\n ") - for _, val := range row { - var i int - if val { - i = 1 - } - _, _ = fmt.Fprintf(buf, " %d,", i) - } - } - buf.WriteString("\n};\n") - - _, _ = fmt.Fprintf(buf, "[[msvc::forceinline]] constexpr uint%d_t ucdLookup(const char32_t cp) noexcept\n", trie.Stages[len(trie.Stages)-1].Bits) - buf.WriteString("{\n") - for i, s := range trie.Stages { - _, _ = fmt.Fprintf(buf, " const auto s%d = s_stage%d[", i+1, i+1) - if i == 0 { - _, _ = fmt.Fprintf(buf, "cp >> %d", s.Shift) - } else { - _, _ = fmt.Fprintf(buf, "s%d + ((cp >> %d) & %d)", i, s.Shift, s.Mask) - } - buf.WriteString("];\n") - } - _, _ = fmt.Fprintf(buf, " return s%d;\n", len(trie.Stages)) - buf.WriteString("}\n") - - buf.WriteString("[[msvc::forceinline]] constexpr uint8_t ucdGraphemeJoins(const uint8_t lead, const uint8_t trail) noexcept\n") - buf.WriteString("{\n") - buf.WriteString(" const auto l = lead & 15;\n") - buf.WriteString(" const auto t = trail & 15;\n") - buf.WriteString(" return s_joinRules[l][t];\n") - buf.WriteString("}\n") - - buf.WriteString("[[msvc::forceinline]] constexpr int ucdToCharacterWidth(const uint8_t val) noexcept\n") - buf.WriteString("{\n") - buf.WriteString(" return val >> 6;\n") - buf.WriteString("}\n") - - buf.WriteString("// clang-format on\n") - - _, _ = os.Stdout.WriteString(buf.String()) - return nil -} - -type TrieType uint32 - -func extractValuesFromUCD(ucd *UCD) ([]TrieType, error) { - values := make([]TrieType, 1114112) - fillRange(values, trieValue(cbOther, cwNarrow)) - - for _, group := range ucd.Repertoire.Group { - for _, char := range group.Char { - generalCategory := coalesce(char.GeneralCategory, group.GeneralCategory) - graphemeClusterBreak := coalesce(char.GraphemeClusterBreak, group.GraphemeClusterBreak) - indicConjunctBreak := coalesce(char.IndicConjunctBreak, group.IndicConjunctBreak) - extendedPictographic := coalesce(char.ExtendedPictographic, group.ExtendedPictographic) - eastAsian := coalesce(char.EastAsian, group.EastAsian) - - firstCp, lastCp := int(char.FirstCodepoint), int(char.LastCodepoint) - if char.Codepoint != 0 { - firstCp, lastCp = int(char.Codepoint), int(char.Codepoint) - } - - var ( - cb ClusterBreak - width CharacterWidth - ) - - switch graphemeClusterBreak { - case "XX": // Anything else - cb = cbOther - case "CR", "LF", "CN": // Carriage Return, Line Feed, Control - // We ignore GB3 which demands that CR × LF do not break apart, because - // a) these control characters won't normally reach our text storage - // b) otherwise we're in a raw write mode and historically conhost stores them in separate cells - cb = cbControl - case "EX", "SM": // Extend, SpacingMark - cb = cbExtend - case "PP": // Prepend - cb = cbPrepend - case "ZWJ": // Zero Width Joiner - cb = cbZeroWidthJoiner - case "RI": // Regional Indicator - cb = cbRegionalIndicator - case "L": // Hangul Syllable Type L - cb = cbHangulL - case "V": // Hangul Syllable Type V - cb = cbHangulV - case "T": // Hangul Syllable Type T - cb = cbHangulT - case "LV": // Hangul Syllable Type LV - cb = cbHangulLV - case "LVT": // Hangul Syllable Type LVT - cb = cbHangulLVT - default: - return nil, fmt.Errorf("unrecognized GCB %s for %U to %U", graphemeClusterBreak, firstCp, lastCp) - } - - if extendedPictographic == "Y" { - // Currently every single Extended_Pictographic codepoint happens to be GCB=XX. - // This is fantastic for us because it means we can stuff it into the ClusterBreak enum - // and treat it as an alias of EXTEND, but with the special GB11 properties. - if cb != cbOther { - return nil, fmt.Errorf("unexpected GCB %s with ExtPict=Y for %U to %U", graphemeClusterBreak, firstCp, lastCp) - } - cb = cbExtendedPictographic - } - - if indicConjunctBreak == "Linker" { - // Similarly here, we can treat it as an alias for EXTEND, but with the GB9c properties. - if cb != cbExtend { - return nil, fmt.Errorf("unexpected GCB %s with InCB=Linker for %U to %U", graphemeClusterBreak, firstCp, lastCp) - } - cb = cbConjunctLinker - } - - switch eastAsian { - case "N", "Na", "H": // neutral, narrow, half-width - width = cwNarrow - case "F", "W": // full-width, wide - width = cwWide - case "A": // ambiguous - width = cwAmbiguous - default: - return nil, fmt.Errorf("unrecognized ea %s for %U to %U", eastAsian, firstCp, lastCp) - } - - // There's no "ea" attribute for "zero width" so we need to do that ourselves. This matches: - // Mc: Mark, spacing combining - // Me: Mark, enclosing - // Mn: Mark, non-spacing - // Cf: Control, format - if strings.HasPrefix(generalCategory, "M") || generalCategory == "Cf" { - width = cwZeroWidth - } - - fillRange(values[firstCp:lastCp+1], trieValue(cb, width)) - } - } - - // Box-drawing and block elements are ambiguous according to their EastAsian attribute, - // but by convention terminals always consider them to be narrow. - fillRange(values[0x2500:0x259F+1], trieValue(cbOther, cwNarrow)) - // U+FE0F Variation Selector-16 is used to turn unqualified Emojis into qualified ones. - // By convention, this also turns them from being ambiguous, = narrow by default, into wide ones. - fillRange(values[0xFE0F:0xFE0F+1], trieValue(cbExtend, cwWide)) - - return values, nil -} - -func trieValue(cb ClusterBreak, width CharacterWidth) TrieType { - return TrieType(byte(cb) | byte(width)<<6) -} - -func coalesce(a, b string) string { - if a != "" { - return a - } - return b -} - -type Stage struct { - Values []TrieType - Shift int - Mask int - Bits int -} - -type Trie struct { - Stages []*Stage - TotalSize int -} - -func buildBestTrie(uncompressed []TrieType, minShift, maxShift, stages int) *Trie { - delta := maxShift - minShift + 1 - results := make(chan *Trie) - bestTrie := &Trie{TotalSize: math.MaxInt} - - iters := 1 - for i := 1; i < stages; i++ { - iters *= delta - } - - for i := 0; i < iters; i++ { - go func(i int) { - // Given minShift=2, maxShift=3, depth=3 this generates - // [2 2 2] - // [3 2 2] - // [2 3 2] - // [3 3 2] - // [2 2 3] - // [3 2 3] - // [2 3 3] - // [3 3 3] - shifts := make([]int, stages-1) - for j := range shifts { - shifts[j] = minShift + i%delta - i /= delta - } - results <- buildTrie(uncompressed, shifts) - }(i) - } - - for i := 0; i < iters; i++ { - t := <-results - if bestTrie.TotalSize > t.TotalSize { - bestTrie = t - } - } - return bestTrie -} - -func buildTrie(uncompressed []TrieType, shifts []int) *Trie { - var cumulativeShift int - var stages []*Stage - - for _, shift := range shifts { - chunkSize := 1 << shift - cache := map[string]TrieType{} - compressed := make([]TrieType, 0, len(uncompressed)/8) - offsets := make([]TrieType, 0, len(uncompressed)/chunkSize) - - for i := 0; i < len(uncompressed); i += chunkSize { - chunk := uncompressed[i:min(len(uncompressed), i+chunkSize)] - // Cast the integer slice to a string so that it can be hashed. - key := unsafe.String((*byte)(unsafe.Pointer(&chunk[0])), len(chunk)*int(unsafe.Sizeof(chunk[0]))) - offset, exists := cache[key] - - if !exists { - // For a 4-stage trie searching for existing occurrences of chunk in compressed yields a ~10% - // compression improvement. Checking for overlaps with the tail end of compressed yields another ~15%. - // FYI I tried to shuffle the order of compressed chunks but found that this has a negligible impact. - if existing := findExisting(compressed, chunk); existing != -1 { - offset = TrieType(existing) - cache[key] = offset - } else { - overlap := measureOverlap(compressed, chunk) - compressed = append(compressed, chunk[overlap:]...) - offset = TrieType(len(compressed) - len(chunk)) - cache[key] = offset - } - } - - offsets = append(offsets, offset) - } - - stages = append(stages, &Stage{ - Values: compressed, - Shift: cumulativeShift, - Mask: chunkSize - 1, - }) - - uncompressed = offsets - cumulativeShift += shift - } - - stages = append(stages, &Stage{ - Values: uncompressed, - Shift: cumulativeShift, - Mask: math.MaxInt32, - }) - slices.Reverse(stages) - - for _, s := range stages { - m := slices.Max(s.Values) - if m <= 0xff { - s.Bits = 8 - } else if m <= 0xffff { - s.Bits = 16 - } else { - s.Bits = 32 - } - } - - totalSize := 0 - for _, s := range stages { - totalSize += (s.Bits / 8) * len(s.Values) - } - - return &Trie{ - Stages: stages, - TotalSize: totalSize, - } -} - -// Finds needle in haystack. Returns -1 if it couldn't be found. -func findExisting(haystack, needle []TrieType) int { - if len(haystack) == 0 || len(needle) == 0 { - return -1 - } - - s := int(unsafe.Sizeof(TrieType(0))) - h := unsafe.Slice((*byte)(unsafe.Pointer(&haystack[0])), len(haystack)*s) - n := unsafe.Slice((*byte)(unsafe.Pointer(&needle[0])), len(needle)*s) - i := 0 - - for { - i = bytes.Index(h[i:], n) - if i == -1 { - return -1 - } - if i%s == 0 { - return i / s - } - } -} - -// Given two slices, this returns the amount by which prev's end overlaps with next's start. -// That is, given [0,1,2,3,4] and [2,3,4,5] this returns 3 because [2,3,4] is the "overlap". -func measureOverlap(prev, next []TrieType) int { - for overlap := min(len(prev), len(next)); overlap >= 0; overlap-- { - if slices.Equal(prev[len(prev)-overlap:], next[:overlap]) { - return overlap - } - } - return 0 -} - -func buildJoinRules() [cbCount][cbCount]bool { - // UAX #29 states: - // > Note: Testing two adjacent characters is insufficient for determining a boundary. - // - // I completely agree, but I really hate it. So this code trades off correctness for simplicity - // by using a simple lookup table anyway. Under most circumstances users won't notice, - // because as far as I can see this only behaves different for degenerate ("invalid") Unicode. - // It reduces our code complexity significantly and is way *way* faster. - // - // This is a great reference for the resulting table: - // https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.html - - // NOTE: We build the table in reverse, because rules with lower numbers take priority. - // (This is primarily relevant for GB9b vs. GB4.) - - // Otherwise, break everywhere. - // GB999: Any ÷ Any - var rules [cbCount][cbCount]bool - - // Do not break within emoji flag sequences. That is, do not break between regional indicator - // (RI) symbols if there is an odd number of RI characters before the break point. - // GB13: [^RI] (RI RI)* RI × RI - // GB12: sot (RI RI)* RI × RI - // - // We cheat here by not checking that the number of RIs is even. Meh! - rules[cbRegionalIndicator][cbRegionalIndicator] = true - - // Do not break within emoji modifier sequences or emoji zwj sequences. - // GB11: \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic} - // - // We cheat here by not checking that the ZWJ is preceded by an ExtPic. Meh! - rules[cbZeroWidthJoiner][cbExtendedPictographic] = true - - // Do not break within certain combinations with Indic_Conjunct_Break (InCB)=Linker. - // GB9c: \p{InCB=Consonant} [\p{InCB=Extend}\p{InCB=Linker}]* \p{InCB=Linker} [\p{InCB=Extend}\p{InCB=Linker}]* × \p{InCB=Consonant} - // - // I'm sure GB9c is great for these languages, but honestly the definition is complete whack. - // Just look at that chonker! This isn't a "cheat" like the others above, this is a reinvention: - // We treat it as having both ClusterBreak.PREPEND and ClusterBreak.EXTEND properties. - fillRange(rules[cbConjunctLinker][:], true) - for i := range rules { - rules[i][cbConjunctLinker] = true - } - - // Do not break before SpacingMarks, or after Prepend characters. - // GB9b: Prepend × - fillRange(rules[cbPrepend][:], true) - - // Do not break before SpacingMarks, or after Prepend characters. - // GB9a: × SpacingMark - // Do not break before extending characters or ZWJ. - // GB9: × (Extend | ZWJ) - for i := range rules { - // CodepointWidthDetector_gen.py sets SpacingMarks to ClusterBreak.EXTEND as well, - // since they're entirely identical to GB9's Extend. - rules[i][cbExtend] = true - rules[i][cbZeroWidthJoiner] = true - } - - // Do not break Hangul syllable sequences. - // GB8: (LVT | T) x T - rules[cbHangulLVT][cbHangulT] = true - rules[cbHangulT][cbHangulT] = true - // GB7: (LV | V) x (V | T) - rules[cbHangulLV][cbHangulT] = true - rules[cbHangulLV][cbHangulV] = true - rules[cbHangulV][cbHangulV] = true - rules[cbHangulV][cbHangulT] = true - // GB6: L x (L | V | LV | LVT) - rules[cbHangulL][cbHangulL] = true - rules[cbHangulL][cbHangulV] = true - rules[cbHangulL][cbHangulLV] = true - rules[cbHangulL][cbHangulLVT] = true - - // Do not break between a CR and LF. Otherwise, break before and after controls. - // GB5: ÷ (Control | CR | LF) - for i := range rules { - rules[i][cbControl] = false - } - // GB4: (Control | CR | LF) ÷ - fillRange(rules[cbControl][:], false) - - // We ignore GB3 which demands that CR × LF do not break apart, because - // a) these control characters won't normally reach our text storage - // b) otherwise we're in a raw write mode and historically conhost stores them in separate cells - - // We also ignore GB1 and GB2 which demand breaks at the start and end, - // because that's not part of the loops in GraphemeNext/Prev and not this table. - return rules -} - -func fillRange[T any](s []T, v T) { - for i := range s { - s[i] = v - } -} +package main + +import ( + "bytes" + "encoding/xml" + "fmt" + "math" + "os" + "slices" + "strconv" + "strings" + "time" + "unsafe" +) + +type CharacterWidth int + +const ( + cwZeroWidth CharacterWidth = iota + cwNarrow + cwWide + cwAmbiguous +) + +type ClusterBreak int + +const ( + cbOther ClusterBreak = iota // GB999 + cbControl // GB3, GB4, GB5 + cbHangulL // GB6, GB7, GB8 + cbHangulV // GB6, GB7, GB8 + cbHangulT // GB6, GB7, GB8 + cbHangulLV // GB6, GB7, GB8 + cbHangulLVT // GB6, GB7, GB8 + cbExtend // GB9, GB9a + cbZeroWidthJoiner // GB9, GB11 + cbPrepend // GB9b + cbConjunctLinker // GB9c + cbExtendedPictographic // GB11 + cbRegionalIndicator // GB12, GB13 + + cbCount +) + +type HexInt int + +func (h *HexInt) UnmarshalXMLAttr(attr xml.Attr) error { + v, err := strconv.ParseUint(attr.Value, 16, 32) + if err != nil { + return err + } + *h = HexInt(v) + return nil +} + +type UCD struct { + Description string `xml:"description"` + Repertoire struct { + Group []struct { + GeneralCategory string `xml:"gc,attr"` + GraphemeClusterBreak string `xml:"GCB,attr"` + IndicConjunctBreak string `xml:"InCB,attr"` + ExtendedPictographic string `xml:"ExtPict,attr"` + EastAsian string `xml:"ea,attr"` + + // This maps the following tags: + // , , , + Char []struct { + Codepoint HexInt `xml:"cp,attr"` + FirstCodepoint HexInt `xml:"first-cp,attr"` + LastCodepoint HexInt `xml:"last-cp,attr"` + + GeneralCategory string `xml:"gc,attr"` + GraphemeClusterBreak string `xml:"GCB,attr"` + IndicConjunctBreak string `xml:"InCB,attr"` + ExtendedPictographic string `xml:"ExtPict,attr"` + EastAsian string `xml:"ea,attr"` + } `xml:",any"` + } `xml:"group"` + } `xml:"repertoire"` +} + +func main() { + if err := run(); err != nil { + fmt.Println(err) + os.Exit(1) + } +} + +func run() error { + if len(os.Args) <= 1 { + fmt.Println(`Usage: + go run CodepointWidthDetector_gen.go + +You can download the latest ucd.nounihan.grouped.xml from: + https://www.unicode.org/Public/UCD/latest/ucdxml/ucd.nounihan.grouped.zip`) + os.Exit(1) + } + + data, err := os.ReadFile(os.Args[1]) + if err != nil { + return fmt.Errorf("failed to read XML: %w", err) + } + + ucd := &UCD{} + err = xml.Unmarshal(data, ucd) + if err != nil { + return fmt.Errorf("failed to parse XML: %w", err) + } + + values, err := extractValuesFromUCD(ucd) + if err != nil { + return err + } + + // More stages = Less size. The trajectory roughly follows a+b*c^stages, where c < 1. + // 4 still gives ~30% savings over 3 stages and going beyond 5 gives diminishing returns (<10%). + trie := buildBestTrie(values, 2, 8, 4) + rules := buildJoinRules() + totalSize := trie.TotalSize + int(unsafe.Sizeof(rules)) + + for cp, expected := range values { + var v TrieType + for _, s := range trie.Stages { + v = s.Values[int(v)+((cp>>s.Shift)&s.Mask)] + } + if v != expected { + return fmt.Errorf("trie sanity check failed for %U", cp) + } + } + + buf := &strings.Builder{} + + _, _ = fmt.Fprintf(buf, "// Generated by CodepointWidthDetector_gen.go\n") + _, _ = fmt.Fprintf(buf, "// on %s, from %s, %d bytes\n", time.Now().UTC().Format(time.RFC3339), ucd.Description, totalSize) + _, _ = fmt.Fprintf(buf, "// clang-format off\n") + + for i, s := range trie.Stages { + width := 16 + if i != 0 { + width = s.Mask + 1 + } + _, _ = fmt.Fprintf(buf, "static constexpr uint%d_t s_stage%d[] = {", s.Bits, i+1) + for j, value := range s.Values { + if j%width == 0 { + buf.WriteString("\n ") + } + _, _ = fmt.Fprintf(buf, " %#0*x,", s.Bits/4, value) + } + buf.WriteString("\n};\n") + } + + buf.WriteString("static constexpr uint16_t s_joinRules[] = {\n") + for _, r := range rules { + _, _ = fmt.Fprintf(buf, " %#016b,\n", r) + } + buf.WriteString("};\n") + + _, _ = fmt.Fprintf(buf, "constexpr uint%d_t ucdLookup(const char32_t cp) noexcept\n", trie.Stages[len(trie.Stages)-1].Bits) + buf.WriteString("{\n") + for i, s := range trie.Stages { + _, _ = fmt.Fprintf(buf, " const auto s%d = s_stage%d[", i+1, i+1) + if i == 0 { + _, _ = fmt.Fprintf(buf, "cp >> %d", s.Shift) + } else { + _, _ = fmt.Fprintf(buf, "s%d + ((cp >> %d) & %d)", i, s.Shift, s.Mask) + } + buf.WriteString("];\n") + } + _, _ = fmt.Fprintf(buf, " return s%d;\n", len(trie.Stages)) + buf.WriteString("}\n") + + buf.WriteString(`constexpr bool ucdGraphemeJoins(const uint8_t lead, const uint8_t trail) noexcept +{ + return s_joinRules[lead & 15] & (1 << (trail & 15)); +} +constexpr int ucdToCharacterWidth(const uint8_t val) noexcept +{ + return val >> 6; +} +// clang-format on +`) + + _, _ = os.Stdout.WriteString(buf.String()) + return nil +} + +type TrieType uint32 + +func extractValuesFromUCD(ucd *UCD) ([]TrieType, error) { + values := make([]TrieType, 1114112) + fillRange(values, trieValue(cbOther, cwNarrow)) + + for _, group := range ucd.Repertoire.Group { + for _, char := range group.Char { + generalCategory := coalesce(char.GeneralCategory, group.GeneralCategory) + graphemeClusterBreak := coalesce(char.GraphemeClusterBreak, group.GraphemeClusterBreak) + indicConjunctBreak := coalesce(char.IndicConjunctBreak, group.IndicConjunctBreak) + extendedPictographic := coalesce(char.ExtendedPictographic, group.ExtendedPictographic) + eastAsian := coalesce(char.EastAsian, group.EastAsian) + + firstCp, lastCp := int(char.FirstCodepoint), int(char.LastCodepoint) + if char.Codepoint != 0 { + firstCp, lastCp = int(char.Codepoint), int(char.Codepoint) + } + + var cb ClusterBreak + switch graphemeClusterBreak { + case "XX": // Anything else + cb = cbOther + case "CR", "LF", "CN": // Carriage Return, Line Feed, Control + // We ignore GB3 which demands that CR × LF do not break apart, because + // a) these control characters won't normally reach our text storage + // b) otherwise we're in a raw write mode and historically conhost stores them in separate cells + cb = cbControl + case "EX", "SM": // Extend, SpacingMark + cb = cbExtend + case "PP": // Prepend + cb = cbPrepend + case "ZWJ": // Zero Width Joiner + cb = cbZeroWidthJoiner + case "RI": // Regional Indicator + cb = cbRegionalIndicator + case "L": // Hangul Syllable Type L + cb = cbHangulL + case "V": // Hangul Syllable Type V + cb = cbHangulV + case "T": // Hangul Syllable Type T + cb = cbHangulT + case "LV": // Hangul Syllable Type LV + cb = cbHangulLV + case "LVT": // Hangul Syllable Type LVT + cb = cbHangulLVT + default: + return nil, fmt.Errorf("unrecognized GCB %s for %U to %U", graphemeClusterBreak, firstCp, lastCp) + } + if extendedPictographic == "Y" { + // Currently every single Extended_Pictographic codepoint happens to be GCB=XX. + // This is fantastic for us because it means we can stuff it into the ClusterBreak enum + // and treat it as an alias of EXTEND, but with the special GB11 properties. + if cb != cbOther { + return nil, fmt.Errorf("unexpected GCB %s with ExtPict=Y for %U to %U", graphemeClusterBreak, firstCp, lastCp) + } + cb = cbExtendedPictographic + } + if indicConjunctBreak == "Linker" { + // Similarly here, we can treat it as an alias for EXTEND, but with the GB9c properties. + if cb != cbExtend { + return nil, fmt.Errorf("unexpected GCB %s with InCB=Linker for %U to %U", graphemeClusterBreak, firstCp, lastCp) + } + cb = cbConjunctLinker + } + + var width CharacterWidth + switch eastAsian { + case "N", "Na", "H": // neutral, narrow, half-width + width = cwNarrow + case "F", "W": // full-width, wide + width = cwWide + case "A": // ambiguous + width = cwAmbiguous + default: + return nil, fmt.Errorf("unrecognized ea %s for %U to %U", eastAsian, firstCp, lastCp) + } + // There's no "ea" attribute for "zero width" so we need to do that ourselves. This matches: + // Mc: Mark, spacing combining + // Me: Mark, enclosing + // Mn: Mark, non-spacing + // Cf: Control, format + if strings.HasPrefix(generalCategory, "M") || generalCategory == "Cf" { + width = cwZeroWidth + } + + fillRange(values[firstCp:lastCp+1], trieValue(cb, width)) + } + } + + // Box-drawing and block elements are ambiguous according to their EastAsian attribute, + // but by convention terminals always consider them to be narrow. + fillRange(values[0x2500:0x259F+1], trieValue(cbOther, cwNarrow)) + // U+FE0F Variation Selector-16 is used to turn unqualified Emojis into qualified ones. + // By convention, this also turns them from being ambiguous, = narrow by default, into wide ones. + fillRange(values[0xFE0F:0xFE0F+1], trieValue(cbExtend, cwWide)) + + return values, nil +} + +func trieValue(cb ClusterBreak, width CharacterWidth) TrieType { + return TrieType(byte(cb) | byte(width)<<6) +} + +func coalesce(a, b string) string { + if a != "" { + return a + } + return b +} + +type Stage struct { + Values []TrieType + Shift int + Mask int + Bits int +} + +type Trie struct { + Stages []*Stage + TotalSize int +} + +func buildBestTrie(uncompressed []TrieType, minShift, maxShift, stages int) *Trie { + delta := maxShift - minShift + 1 + results := make(chan *Trie) + bestTrie := &Trie{TotalSize: math.MaxInt} + + iters := 1 + for i := 1; i < stages; i++ { + iters *= delta + } + + for i := 0; i < iters; i++ { + go func(i int) { + // Given minShift=2, maxShift=3, depth=3 this generates + // [2 2 2] + // [3 2 2] + // [2 3 2] + // [3 3 2] + // [2 2 3] + // [3 2 3] + // [2 3 3] + // [3 3 3] + shifts := make([]int, stages-1) + for j := range shifts { + shifts[j] = minShift + i%delta + i /= delta + } + results <- buildTrie(uncompressed, shifts) + }(i) + } + + for i := 0; i < iters; i++ { + t := <-results + if bestTrie.TotalSize > t.TotalSize { + bestTrie = t + } + } + return bestTrie +} + +func buildTrie(uncompressed []TrieType, shifts []int) *Trie { + var cumulativeShift int + var stages []*Stage + + for _, shift := range shifts { + chunkSize := 1 << shift + cache := map[string]TrieType{} + compressed := make([]TrieType, 0, len(uncompressed)/8) + offsets := make([]TrieType, 0, len(uncompressed)/chunkSize) + + for i := 0; i < len(uncompressed); i += chunkSize { + chunk := uncompressed[i:min(len(uncompressed), i+chunkSize)] + // Cast the integer slice to a string so that it can be hashed. + key := unsafe.String((*byte)(unsafe.Pointer(&chunk[0])), len(chunk)*int(unsafe.Sizeof(chunk[0]))) + offset, exists := cache[key] + + if !exists { + // For a 4-stage trie searching for existing occurrences of chunk in compressed yields a ~10% + // compression improvement. Checking for overlaps with the tail end of compressed yields another ~15%. + // FYI I tried to shuffle the order of compressed chunks but found that this has a negligible impact. + if existing := findExisting(compressed, chunk); existing != -1 { + offset = TrieType(existing) + cache[key] = offset + } else { + overlap := measureOverlap(compressed, chunk) + compressed = append(compressed, chunk[overlap:]...) + offset = TrieType(len(compressed) - len(chunk)) + cache[key] = offset + } + } + + offsets = append(offsets, offset) + } + + stages = append(stages, &Stage{ + Values: compressed, + Shift: cumulativeShift, + Mask: chunkSize - 1, + }) + + uncompressed = offsets + cumulativeShift += shift + } + + stages = append(stages, &Stage{ + Values: uncompressed, + Shift: cumulativeShift, + Mask: math.MaxInt32, + }) + slices.Reverse(stages) + + for _, s := range stages { + m := slices.Max(s.Values) + if m <= 0xff { + s.Bits = 8 + } else if m <= 0xffff { + s.Bits = 16 + } else { + s.Bits = 32 + } + } + + totalSize := 0 + for _, s := range stages { + totalSize += (s.Bits / 8) * len(s.Values) + } + + return &Trie{ + Stages: stages, + TotalSize: totalSize, + } +} + +// Finds needle in haystack. Returns -1 if it couldn't be found. +func findExisting(haystack, needle []TrieType) int { + if len(haystack) == 0 || len(needle) == 0 { + return -1 + } + + s := int(unsafe.Sizeof(TrieType(0))) + h := unsafe.Slice((*byte)(unsafe.Pointer(&haystack[0])), len(haystack)*s) + n := unsafe.Slice((*byte)(unsafe.Pointer(&needle[0])), len(needle)*s) + i := 0 + + for { + i = bytes.Index(h[i:], n) + if i == -1 { + return -1 + } + if i%s == 0 { + return i / s + } + } +} + +// Given two slices, this returns the amount by which prev's end overlaps with next's start. +// That is, given [0,1,2,3,4] and [2,3,4,5] this returns 3 because [2,3,4] is the "overlap". +func measureOverlap(prev, next []TrieType) int { + for overlap := min(len(prev), len(next)); overlap >= 0; overlap-- { + if slices.Equal(prev[len(prev)-overlap:], next[:overlap]) { + return overlap + } + } + return 0 +} + +func buildJoinRules() [16]uint16 { + // UAX #29 states: + // > Note: Testing two adjacent characters is insufficient for determining a boundary. + // + // I completely agree, but I really hate it. So this code trades off correctness for simplicity + // by using a simple lookup table anyway. Under most circumstances users won't notice, + // because as far as I can see this only behaves different for degenerate ("invalid") Unicode. + // It reduces our code complexity significantly and is way *way* faster. + // + // This is a great reference for the resulting table: + // https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.html + + // NOTE: We build the table in reverse, because rules with lower numbers take priority. + // (This is primarily relevant for GB9b vs. GB4.) + + // Otherwise, break everywhere. + // GB999: Any ÷ Any + var rules [16]uint16 + + // Do not break within emoji flag sequences. That is, do not break between regional indicator + // (RI) symbols if there is an odd number of RI characters before the break point. + // GB13: [^RI] (RI RI)* RI × RI + // GB12: sot (RI RI)* RI × RI + // + // We cheat here by not checking that the number of RIs is even. Meh! + rules[cbRegionalIndicator] |= 1 << cbRegionalIndicator + + // Do not break within emoji modifier sequences or emoji zwj sequences. + // GB11: \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic} + // + // We cheat here by not checking that the ZWJ is preceded by an ExtPic. Meh! + rules[cbZeroWidthJoiner] |= 1 << cbExtendedPictographic + + // Do not break within certain combinations with Indic_Conjunct_Break (InCB)=Linker. + // GB9c: \p{InCB=Consonant} [\p{InCB=Extend}\p{InCB=Linker}]* \p{InCB=Linker} [\p{InCB=Extend}\p{InCB=Linker}]* × \p{InCB=Consonant} + // + // I'm sure GB9c is great for these languages, but honestly the definition is complete whack. + // Just look at that chonker! This isn't a "cheat" like the others above, this is a reinvention: + // We treat it as having both ClusterBreak.PREPEND and ClusterBreak.EXTEND properties. + rules[cbConjunctLinker] = math.MaxUint16 + for i := range rules { + rules[i] |= 1 << cbConjunctLinker + } + + // Do not break before SpacingMarks, or after Prepend characters. + // GB9b: Prepend × + rules[cbPrepend] = math.MaxUint16 + + // Do not break before SpacingMarks, or after Prepend characters. + // GB9a: × SpacingMark + // Do not break before extending characters or ZWJ. + // GB9: × (Extend | ZWJ) + for i := range rules { + // CodepointWidthDetector_gen.py sets SpacingMarks to ClusterBreak.EXTEND as well, + // since they're entirely identical to GB9's Extend. + rules[i] |= (1 << cbExtend) | (1 << cbZeroWidthJoiner) + } + + // Do not break Hangul syllable sequences. + // GB8: (LVT | T) x T + rules[cbHangulLVT] |= 1 << cbHangulT + rules[cbHangulT] |= 1 << cbHangulT + // GB7: (LV | V) x (V | T) + rules[cbHangulLV] |= 1 << cbHangulT + rules[cbHangulLV] |= 1 << cbHangulV + rules[cbHangulV] |= 1 << cbHangulV + rules[cbHangulV] |= 1 << cbHangulT + // GB6: L x (L | V | LV | LVT) + rules[cbHangulL] |= 1 << cbHangulL + rules[cbHangulL] |= 1 << cbHangulV + rules[cbHangulL] |= 1 << cbHangulLV + rules[cbHangulL] |= 1 << cbHangulLVT + + // Do not break between a CR and LF. Otherwise, break before and after controls. + // GB5: ÷ (Control | CR | LF) + for i := range rules { + rules[i] &= ^(uint16(1) << cbControl) + } + // GB4: (Control | CR | LF) ÷ + rules[cbControl] = 0 + + // We ignore GB3 which demands that CR × LF do not break apart, because + // a) these control characters won't normally reach our text storage + // b) otherwise we're in a raw write mode and historically conhost stores them in separate cells + + // We also ignore GB1 and GB2 which demand breaks at the start and end, + // because that's not part of the loops in GraphemeNext/Prev and not this table. + + // Set any bits to 0 which are outside the valid [cbOther,cbCount) range. + for i := range rules { + rules[i] &= 1< 0;) { const auto beg = cwd.GraphemePrev(text, end, nullptr); - actual.emplace_back(std::wstring_view{ text }.substr(beg, end - beg)); + actual.emplace_back(til::clamp_slice_abs(text, beg, end)); end = beg; } std::reverse(actual.begin(), actual.end()); @@ -1325,7 +1325,7 @@ class CodepointWidthDetectorTests int width; const auto end = cwd.GraphemeNext(text, 0, &width); - VERIFY_ARE_EQUAL(4, end); + VERIFY_ARE_EQUAL(4u, end); VERIFY_ARE_EQUAL(2, width); } }; diff --git a/src/types/ut_types/CodepointWidthDetectorTests_gen.go b/src/types/ut_types/CodepointWidthDetectorTests_gen.go index 93d4597b136..6b4a6087a38 100644 --- a/src/types/ut_types/CodepointWidthDetectorTests_gen.go +++ b/src/types/ut_types/CodepointWidthDetectorTests_gen.go @@ -1,139 +1,144 @@ -package main - -import ( - "bufio" - "bytes" - "fmt" - "io" - "net/http" - "os" - "regexp" - "strconv" - "strings" - "time" -) - -func main() { - if err := run(); err != nil { - fmt.Println(err) - os.Exit(1) - } -} - -func run() error { - data, err := fetch(`https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt`) - if err != nil { - return err - } - - testString := strings.Builder{} - - scanner := bufio.NewScanner(bytes.NewReader(data)) - firstLine := true - - for scanner.Scan() { - line := scanner.Text() - test, comment, _ := strings.Cut(line, "#") - test = strings.TrimSpace(test) - comment = strings.TrimSpace(comment) - - if firstLine { - firstLine = false - - re, err := regexp.Compile(`^GraphemeBreakTest-(\d+\.\d+\.\d+)\.txt$`) - if err != nil { - return err - } - - m := re.FindStringSubmatch(comment) - if len(m) == 0 { - return fmt.Errorf("failed to find version number, got: %s", comment) - } - - _, _ = fmt.Fprintf(&testString, "// Generated by CodepointWidthDetector_gen.go\n") - _, _ = fmt.Fprintf(&testString, "// on %s, from Unicode %s\n", time.Now().UTC().Format(time.RFC3339), m[1]) - - testString.WriteString("struct GraphemeBreakTest\n") - testString.WriteString("{\n") - testString.WriteString(" const wchar_t* comment;\n") - testString.WriteString(" const wchar_t* graphemes[4];\n") - testString.WriteString("};\n") - testString.WriteString("static constexpr GraphemeBreakTest s_graphemeBreakTests[] = {\n") - } - // # GraphemeBreakTest-15.1.0.txt - - if len(test) == 0 || len(comment) == 0 { - continue - } - - graphemes := strings.Split(test, "÷") - for i, g := range graphemes { - graphemes[i] = strings.TrimSpace(g) - } - - testString.WriteString("") - _, _ = fmt.Fprintf(&testString, ` { L"%s"`, comment) - - for _, g := range graphemes { - if len(g) == 0 { - continue - } - - testString.WriteString(`, L"`) - - codepoints := strings.Split(g, "×") - for _, c := range codepoints { - i, err := strconv.ParseUint(strings.TrimSpace(c), 16, 32) - if err != nil { - return err - } - if i == 0x07 { - testString.WriteString(`\a`) - } else if i == 0x08 { - testString.WriteString(`\b`) - } else if i == 0x09 { - testString.WriteString(`\t`) - } else if i == 0x0A { - testString.WriteString(`\n`) - } else if i == 0x0B { - testString.WriteString(`\v`) - } else if i == 0x0C { - testString.WriteString(`\f`) - } else if i == 0x0D { - testString.WriteString(`\r`) - } else if i >= 0x20 && i <= 0x7e { - testString.WriteRune(rune(i)) - } else if i <= 0xff { - _, _ = fmt.Fprintf(&testString, `\x%02X`, i) - } else if i <= 0xffff { - _, _ = fmt.Fprintf(&testString, `\x%04X`, i) - } else { - _, _ = fmt.Fprintf(&testString, `\U%08X`, i) - } - } - - testString.WriteString(`"`) - } - - testString.WriteString(" },\n") - } - - testString.WriteString("};\n") - _, _ = os.Stdout.WriteString(testString.String()) - return nil -} - -func fetch(url string) ([]byte, error) { - res, err := http.Get(url) - if err != nil { - return nil, err - } - defer res.Body.Close() - - body, err := io.ReadAll(res.Body) - if err != nil { - return nil, err - } - - return body, nil -} +package main + +import ( + "bufio" + "bytes" + "fmt" + "io" + "net/http" + "os" + "regexp" + "strconv" + "strings" + "time" +) + +func main() { + if err := run(); err != nil { + fmt.Println(err) + os.Exit(1) + } +} + +func run() error { + data, err := fetch(`https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt`) + if err != nil { + return err + } + + testString := strings.Builder{} + + scanner := bufio.NewScanner(bytes.NewReader(data)) + firstLine := true + + for scanner.Scan() { + line := scanner.Text() + test, comment, _ := strings.Cut(line, "#") + test = strings.TrimSpace(test) + comment = strings.TrimSpace(comment) + + if firstLine { + firstLine = false + + re, err := regexp.Compile(`^GraphemeBreakTest-(\d+\.\d+\.\d+)\.txt$`) + if err != nil { + return err + } + + m := re.FindStringSubmatch(comment) + if len(m) == 0 { + return fmt.Errorf("failed to find version number, got: %s", comment) + } + + _, _ = fmt.Fprintf( + &testString, + `// Generated by CodepointWidthDetector_gen.go +// on %s, from Unicode %s +struct GraphemeBreakTest +{ + const wchar_t* comment; + const wchar_t* graphemes[4]; +}; +static constexpr GraphemeBreakTest s_graphemeBreakTests[] = { +`, + time.Now().UTC().Format(time.RFC3339), + m[1], + ) + } + // # GraphemeBreakTest-15.1.0.txt + + if len(test) == 0 || len(comment) == 0 { + continue + } + + graphemes := strings.Split(test, "÷") + for i, g := range graphemes { + graphemes[i] = strings.TrimSpace(g) + } + + testString.WriteString("") + _, _ = fmt.Fprintf(&testString, ` { L"%s"`, comment) + + for _, g := range graphemes { + if len(g) == 0 { + continue + } + + testString.WriteString(`, L"`) + + codepoints := strings.Split(g, "×") + for _, c := range codepoints { + i, err := strconv.ParseUint(strings.TrimSpace(c), 16, 32) + if err != nil { + return err + } + if i == 0x07 { + testString.WriteString(`\a`) + } else if i == 0x08 { + testString.WriteString(`\b`) + } else if i == 0x09 { + testString.WriteString(`\t`) + } else if i == 0x0A { + testString.WriteString(`\n`) + } else if i == 0x0B { + testString.WriteString(`\v`) + } else if i == 0x0C { + testString.WriteString(`\f`) + } else if i == 0x0D { + testString.WriteString(`\r`) + } else if i >= 0x20 && i <= 0x7e { + testString.WriteRune(rune(i)) + } else if i <= 0xff { + _, _ = fmt.Fprintf(&testString, `\x%02X`, i) + } else if i <= 0xffff { + _, _ = fmt.Fprintf(&testString, `\x%04X`, i) + } else { + _, _ = fmt.Fprintf(&testString, `\U%08X`, i) + } + } + + testString.WriteString(`"`) + } + + testString.WriteString(" },\n") + } + + testString.WriteString("};\n") + _, _ = os.Stdout.WriteString(testString.String()) + return nil +} + +func fetch(url string) ([]byte, error) { + res, err := http.Get(url) + if err != nil { + return nil, err + } + defer res.Body.Close() + + body, err := io.ReadAll(res.Body) + if err != nil { + return nil, err + } + + return body, nil +} From b5295576691bd46e7e397fbf6d7dfbbad1bdc5e2 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 22 Mar 2024 17:54:19 +0100 Subject: [PATCH 03/14] A setting doesn't seem feasible --- doc/cascadia/profiles.schema.json | 5 --- src/cascadia/TerminalCore/ICoreSettings.idl | 1 - src/cascadia/TerminalCore/Terminal.cpp | 3 -- .../GlobalAppSettings.idl | 1 - .../TerminalSettingsModel/MTSMSettings.h | 1 - .../TerminalSettings.cpp | 1 - .../TerminalSettingsModel/TerminalSettings.h | 1 - src/cascadia/inc/ControlProperties.h | 1 - src/types/CodepointWidthDetector.cpp | 43 ++++++++++--------- src/types/inc/CodepointWidthDetector.hpp | 8 +--- .../ut_types/CodepointWidthDetectorTests.cpp | 18 ++++++++ 11 files changed, 42 insertions(+), 41 deletions(-) diff --git a/doc/cascadia/profiles.schema.json b/doc/cascadia/profiles.schema.json index 5f8cf7fbd1d..7225059372a 100644 --- a/doc/cascadia/profiles.schema.json +++ b/doc/cascadia/profiles.schema.json @@ -2344,11 +2344,6 @@ "description": "Force the terminal to use the legacy input encoding. Certain keys in some applications may stop working when enabling this setting.", "type": "boolean" }, - "experimental.graphemes": { - "default": true, - "description": "When set to true, the terminal will use grapheme cluster boundaries for cursor movement. Otherwise, the terminal will use codepoint boundaries.", - "type": "boolean" - }, "experimental.useBackgroundImageForWindow": { "default": false, "description": "When set to true, the background image for the currently focused profile is expanded to encompass the entire window, beneath other panes.", diff --git a/src/cascadia/TerminalCore/ICoreSettings.idl b/src/cascadia/TerminalCore/ICoreSettings.idl index 6b6b4b4c3f5..1f3c78aa09b 100644 --- a/src/cascadia/TerminalCore/ICoreSettings.idl +++ b/src/cascadia/TerminalCore/ICoreSettings.idl @@ -20,7 +20,6 @@ namespace Microsoft.Terminal.Core String WordDelimiters; Boolean ForceVTInput; - Boolean Graphemes; Boolean TrimBlockSelection; Boolean DetectURLs; Boolean VtPassthrough; diff --git a/src/cascadia/TerminalCore/Terminal.cpp b/src/cascadia/TerminalCore/Terminal.cpp index b13acc3ff11..6eabcdab072 100644 --- a/src/cascadia/TerminalCore/Terminal.cpp +++ b/src/cascadia/TerminalCore/Terminal.cpp @@ -14,8 +14,6 @@ #include #include -#include "../../types/inc/CodepointWidthDetector.hpp" - using namespace winrt::Microsoft::Terminal::Core; using namespace Microsoft::Terminal::Core; using namespace Microsoft::Console; @@ -99,7 +97,6 @@ void Terminal::UpdateSettings(ICoreSettings settings) _autoMarkPrompts = settings.AutoMarkPrompts(); _getTerminalInput().ForceDisableWin32InputMode(settings.ForceVTInput()); - CodepointWidthDetector::Singleton().SetEnableGraphemes(settings.Graphemes()); if (settings.TabColor() == nullptr) { diff --git a/src/cascadia/TerminalSettingsModel/GlobalAppSettings.idl b/src/cascadia/TerminalSettingsModel/GlobalAppSettings.idl index 038f1afec53..612262d4b11 100644 --- a/src/cascadia/TerminalSettingsModel/GlobalAppSettings.idl +++ b/src/cascadia/TerminalSettingsModel/GlobalAppSettings.idl @@ -80,7 +80,6 @@ namespace Microsoft.Terminal.Settings.Model INHERITABLE_SETTING(Boolean, SoftwareRendering); INHERITABLE_SETTING(Boolean, UseBackgroundImageForWindow); INHERITABLE_SETTING(Boolean, ForceVTInput); - INHERITABLE_SETTING(Boolean, Graphemes); INHERITABLE_SETTING(Boolean, DebugFeaturesEnabled); INHERITABLE_SETTING(Boolean, StartOnUserLogin); INHERITABLE_SETTING(Boolean, AlwaysOnTop); diff --git a/src/cascadia/TerminalSettingsModel/MTSMSettings.h b/src/cascadia/TerminalSettingsModel/MTSMSettings.h index 16124774453..cb8eb8bcdff 100644 --- a/src/cascadia/TerminalSettingsModel/MTSMSettings.h +++ b/src/cascadia/TerminalSettingsModel/MTSMSettings.h @@ -28,7 +28,6 @@ Author(s): X(bool, SoftwareRendering, "experimental.rendering.software", false) \ X(bool, UseBackgroundImageForWindow, "experimental.useBackgroundImageForWindow", false) \ X(bool, ForceVTInput, "experimental.input.forceVT", false) \ - X(bool, Graphemes, "experimental.graphemes", Feature_Graphemes::IsEnabled()) \ X(bool, TrimBlockSelection, "trimBlockSelection", true) \ X(bool, DetectURLs, "experimental.detectURLs", true) \ X(bool, AlwaysShowTabs, "alwaysShowTabs", true) \ diff --git a/src/cascadia/TerminalSettingsModel/TerminalSettings.cpp b/src/cascadia/TerminalSettingsModel/TerminalSettings.cpp index c46a11f9cf6..e5c07a71103 100644 --- a/src/cascadia/TerminalSettingsModel/TerminalSettings.cpp +++ b/src/cascadia/TerminalSettingsModel/TerminalSettings.cpp @@ -364,7 +364,6 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation _SoftwareRendering = globalSettings.SoftwareRendering(); _UseBackgroundImageForWindow = globalSettings.UseBackgroundImageForWindow(); _ForceVTInput = globalSettings.ForceVTInput(); - _Graphemes = globalSettings.Graphemes(); _TrimBlockSelection = globalSettings.TrimBlockSelection(); _DetectURLs = globalSettings.DetectURLs(); _EnableUnfocusedAcrylic = globalSettings.EnableUnfocusedAcrylic(); diff --git a/src/cascadia/TerminalSettingsModel/TerminalSettings.h b/src/cascadia/TerminalSettingsModel/TerminalSettings.h index 70c751e563d..184e05ac8f9 100644 --- a/src/cascadia/TerminalSettingsModel/TerminalSettings.h +++ b/src/cascadia/TerminalSettingsModel/TerminalSettings.h @@ -159,7 +159,6 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation INHERITABLE_SETTING(Model::TerminalSettings, bool, SoftwareRendering, false); INHERITABLE_SETTING(Model::TerminalSettings, bool, UseBackgroundImageForWindow, false); INHERITABLE_SETTING(Model::TerminalSettings, bool, ForceVTInput, false); - INHERITABLE_SETTING(Model::TerminalSettings, bool, Graphemes, false); INHERITABLE_SETTING(Model::TerminalSettings, hstring, PixelShaderPath); INHERITABLE_SETTING(Model::TerminalSettings, hstring, PixelShaderImagePath); diff --git a/src/cascadia/inc/ControlProperties.h b/src/cascadia/inc/ControlProperties.h index 81f34dd384f..a7700b04958 100644 --- a/src/cascadia/inc/ControlProperties.h +++ b/src/cascadia/inc/ControlProperties.h @@ -46,7 +46,6 @@ X(bool, TrimBlockSelection, true) \ X(bool, SuppressApplicationTitle) \ X(bool, ForceVTInput, false) \ - X(bool, Graphemes, false) \ X(winrt::hstring, StartingTitle) \ X(bool, DetectURLs, true) \ X(bool, VtPassthrough, false) \ diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index 0e28488d610..8cf96475fcf 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -4,6 +4,9 @@ #include "precomp.h" #include "inc/CodepointWidthDetector.hpp" +// Due to the Feature_Graphemes::IsEnabled() feature flagging, some code may be disabled. +#pragma warning(disable : 4702) // unreachable code + // I was trying to minimize dependencies in this code so that it's easier to port to other terminal applications. // Also, it has to be fast / have minimal overhead, since it potentially parses every single input character. #pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4). @@ -946,25 +949,27 @@ CodepointWidthDetector& CodepointWidthDetector::Singleton() noexcept return s_codepointWidthDetector; } -CodepointWidthDetector::CodepointWidthDetector() noexcept : - _enableGraphemes{ Feature_Graphemes::IsEnabled() } -{ -} - size_t CodepointWidthDetector::GraphemeNext(const std::wstring_view& str, size_t offset, int* width) noexcept { - if (!_enableGraphemes.load(std::memory_order_relaxed)) + if constexpr (!Feature_Graphemes::IsEnabled()) { return _graphemeNextOld(str, offset, width); } + int widthIgnored; + if (!width) + { + width = &widthIgnored; + } + const auto beg = str.begin(); const auto end = str.end(); auto it = beg + std::min(offset, str.size()); if (it == end) { - return 0; + *width = 1; + return offset; } char32_t cp; @@ -1000,26 +1005,30 @@ size_t CodepointWidthDetector::GraphemeNext(const std::wstring_view& str, size_t lead = trail; } - if (width) - { - *width = totalWidth < 1 ? 1 : (totalWidth > 2 ? 2 : totalWidth); - } + *width = totalWidth < 1 ? 1 : (totalWidth > 2 ? 2 : totalWidth); return it - beg; } // This code is identical to GraphemeNext() but with the order of operations reversed since we're iterating backwards. size_t CodepointWidthDetector::GraphemePrev(const std::wstring_view& str, size_t offset, int* width) noexcept { - if (!_enableGraphemes.load(std::memory_order_relaxed)) + if constexpr (!Feature_Graphemes::IsEnabled()) { return _graphemePrevOld(str, offset, width); } + int widthIgnored; + if (!width) + { + width = &widthIgnored; + } + const auto beg = str.begin(); auto it = beg + std::min(offset, str.size()); if (it == beg) { + *width = 1; return 0; } @@ -1056,10 +1065,7 @@ size_t CodepointWidthDetector::GraphemePrev(const std::wstring_view& str, size_t trail = lead; } - if (width) - { - *width = totalWidth < 1 ? 1 : (totalWidth > 2 ? 2 : totalWidth); - } + *width = totalWidth < 1 ? 1 : (totalWidth > 2 ? 2 : totalWidth); return it - beg; } @@ -1164,11 +1170,6 @@ catch (...) return 1; } -void CodepointWidthDetector::SetEnableGraphemes(const bool enable) noexcept -{ - _enableGraphemes.store(enable, std::memory_order_relaxed); -} - // Method Description: // - Sets a function that should be used as the fallback mechanism for // determining a particular glyph's width, should the glyph be an ambiguous diff --git a/src/types/inc/CodepointWidthDetector.hpp b/src/types/inc/CodepointWidthDetector.hpp index bc01b0fe08b..e834eb16a45 100644 --- a/src/types/inc/CodepointWidthDetector.hpp +++ b/src/types/inc/CodepointWidthDetector.hpp @@ -7,23 +7,19 @@ struct CodepointWidthDetector { static CodepointWidthDetector& Singleton() noexcept; - CodepointWidthDetector() noexcept; - size_t GraphemeNext(const std::wstring_view& str, size_t offset, int* width) noexcept; size_t GraphemePrev(const std::wstring_view& str, size_t offset, int* width) noexcept; - void SetEnableGraphemes(bool enable) noexcept; void SetFallbackMethod(std::function pfnFallback) noexcept; void ClearFallbackCache() noexcept; private: __declspec(noinline) int _checkFallbackViaCache(char32_t codepoint) noexcept; - __declspec(noinline) size_t _graphemeNextOld(const std::wstring_view& str, size_t offset, int* width) noexcept; - __declspec(noinline) size_t _graphemePrevOld(const std::wstring_view& str, size_t offset, int* width) noexcept; + size_t _graphemeNextOld(const std::wstring_view& str, size_t offset, int* width) noexcept; + size_t _graphemePrevOld(const std::wstring_view& str, size_t offset, int* width) noexcept; int _getWidthOld(char32_t cp) noexcept; std::unordered_map _fallbackCache; std::function _pfnFallbackMethod; - std::atomic _enableGraphemes; }; diff --git a/src/types/ut_types/CodepointWidthDetectorTests.cpp b/src/types/ut_types/CodepointWidthDetectorTests.cpp index 6771ed97da5..7a1f986fdf4 100644 --- a/src/types/ut_types/CodepointWidthDetectorTests.cpp +++ b/src/types/ut_types/CodepointWidthDetectorTests.cpp @@ -6,6 +6,9 @@ #include "../types/inc/CodepointWidthDetector.hpp" +// Due to the Feature_Graphemes::IsEnabled() feature flagging, some code may be disabled. +#pragma warning(disable : 4702) // unreachable code + // FYI at the time of writing you may have to generate this table in cmd with // go run CodepointWidthDetectorTests_gen.go > temp.txt // because PowerShell garbles Unicode text between piped commands. @@ -1228,6 +1231,11 @@ class CodepointWidthDetectorTests TEST_METHOD(GraphemeBreakTest) { + if constexpr (!Feature_Graphemes::IsEnabled()) + { + return; + } + WEX::TestExecution::DisableVerifyExceptions disableVerifyExceptions{}; WEX::TestExecution::SetVerifyOutput verifyOutputScope{ WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures }; @@ -1277,6 +1285,11 @@ class CodepointWidthDetectorTests TEST_METHOD(BasicGraphemes) { + if constexpr (!Feature_Graphemes::IsEnabled()) + { + return; + } + static constexpr std::wstring_view text{ L"a\u0363e\u0364\u0364i\u0365" }; auto& cwd = CodepointWidthDetector::Singleton(); @@ -1319,6 +1332,11 @@ class CodepointWidthDetectorTests TEST_METHOD(DevanagariConjunctLinker) { + if constexpr (!Feature_Graphemes::IsEnabled()) + { + return; + } + static constexpr std::wstring_view text{ L"\u0915\u094D\u094D\u0924" }; auto& cwd = CodepointWidthDetector::Singleton(); From cc40fb2ce958b531daba4c6565ce450020254aa6 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 22 Mar 2024 18:11:51 +0100 Subject: [PATCH 04/14] AuditMode fix --- src/buffer/out/Row.cpp | 2 +- src/buffer/out/Row.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/buffer/out/Row.cpp b/src/buffer/out/Row.cpp index 62bc50df499..76407cd0d9e 100644 --- a/src/buffer/out/Row.cpp +++ b/src/buffer/out/Row.cpp @@ -664,7 +664,7 @@ catch (...) charsConsumed = ch - chBeg; } -[[msvc::forceinline]] void ROW::WriteHelper::_replaceTextUnicode(size_t ch, size_t off) +[[msvc::forceinline]] void ROW::WriteHelper::_replaceTextUnicode(size_t ch, size_t off) noexcept { auto& cwd = CodepointWidthDetector::Singleton(); const auto len = chars.size(); diff --git a/src/buffer/out/Row.hpp b/src/buffer/out/Row.hpp index bd39c8461e0..8160d4ec857 100644 --- a/src/buffer/out/Row.hpp +++ b/src/buffer/out/Row.hpp @@ -181,7 +181,7 @@ class ROW final bool IsValid() const noexcept; void ReplaceCharacters(til::CoordType width) noexcept; void ReplaceText() noexcept; - void _replaceTextUnicode(size_t ch, size_t off); + void _replaceTextUnicode(size_t ch, size_t off) noexcept; void CopyTextFrom(const std::span& charOffsets) noexcept; static void _copyOffsets(uint16_t* dst, const uint16_t* src, uint16_t size, uint16_t offset) noexcept; void Finish(); From f3b4d821858f6fbe70e611e7038c1a32e170a917 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 31 May 2024 23:14:34 +0200 Subject: [PATCH 05/14] Address all of the things, mostly --- .github/actions/spelling/expect/expect.txt | 4 +- src/buffer/out/Row.cpp | 73 +- src/buffer/out/textBuffer.cpp | 16 +- src/cascadia/TerminalApp/AppLogic.cpp | 17 +- src/cascadia/TerminalApp/TerminalPage.cpp | 41 +- .../TerminalConnection/ConptyConnection.cpp | 47 +- .../TerminalConnection/ConptyConnection.h | 2 +- src/cascadia/TerminalControl/ControlCore.cpp | 18 + src/cascadia/TerminalControl/EventArgs.idl | 7 + .../TerminalControl/IControlSettings.idl | 1 + .../TerminalSettingsEditor/Rendering.xaml | 8 + .../RenderingViewModel.cpp | 1 + .../RenderingViewModel.h | 1 + .../RenderingViewModel.idl | 2 + .../Resources/en-US/Resources.resw | 18 + .../TerminalSettingsModel/EnumMappings.cpp | 1 + .../TerminalSettingsModel/EnumMappings.h | 1 + .../TerminalSettingsModel/EnumMappings.idl | 1 + .../GlobalAppSettings.cpp | 5 + .../GlobalAppSettings.idl | 1 + .../TerminalSettingsModel/MTSMSettings.h | 1 + .../TerminalSettings.cpp | 1 + .../TerminalSettingsModel/TerminalSettings.h | 1 + .../TerminalSettingsSerializationHelpers.h | 9 + src/cascadia/inc/ControlProperties.h | 1 + src/host/ConsoleArguments.cpp | 91 +- src/host/ConsoleArguments.hpp | 26 +- src/host/VtIo.cpp | 23 + src/host/inputBuffer.cpp | 3 +- src/host/screenInfo.cpp | 26 +- src/host/settings.cpp | 10 + src/host/settings.hpp | 10 + src/host/ut_host/ConsoleArgumentsTests.cpp | 140 -- src/inc/conpty-static.h | 14 +- src/propslib/RegistrySerialization.cpp | 1 + src/terminal/adapter/DispatchTypes.hpp | 10 + src/terminal/adapter/adaptDispatch.cpp | 89 +- .../GraphemeTableGen/GraphemeTableGen.csproj | 10 + src/tools/GraphemeTableGen/Program.cs | 579 +++++++ .../GraphemeTestTableGen.csproj | 10 + src/tools/GraphemeTestTableGen/Program.cs | 121 ++ src/types/CodepointWidthDetector.cpp | 1476 ++++++++--------- src/types/CodepointWidthDetector_gen.go | 557 ------- src/types/GlyphWidth.cpp | 6 +- src/types/inc/CodepointWidthDetector.hpp | 62 +- .../ut_types/CodepointWidthDetectorTests.cpp | 36 +- .../CodepointWidthDetectorTests_gen.go | 144 -- src/winconpty/winconpty.cpp | 45 +- src/winconpty/winconpty.h | 7 +- 49 files changed, 1888 insertions(+), 1886 deletions(-) create mode 100644 src/tools/GraphemeTableGen/GraphemeTableGen.csproj create mode 100644 src/tools/GraphemeTableGen/Program.cs create mode 100644 src/tools/GraphemeTestTableGen/GraphemeTestTableGen.csproj create mode 100644 src/tools/GraphemeTestTableGen/Program.cs delete mode 100644 src/types/CodepointWidthDetector_gen.go delete mode 100644 src/types/ut_types/CodepointWidthDetectorTests_gen.go diff --git a/.github/actions/spelling/expect/expect.txt b/.github/actions/spelling/expect/expect.txt index 7f91a39436a..186133a074e 100644 --- a/.github/actions/spelling/expect/expect.txt +++ b/.github/actions/spelling/expect/expect.txt @@ -179,7 +179,6 @@ changelists charinfo CHARSETINFO chh -chonker chshdng CHT CLASSSTRING @@ -1049,7 +1048,6 @@ mdmerge MDs MEASUREITEM megamix -Meh memallocator meme MENUCHAR @@ -1988,6 +1986,7 @@ WCIW WCSHELPER wcsicmp wcsrev +wcswidth wddm wddmcon WDDMCONSOLECONTEXT @@ -2142,6 +2141,7 @@ XFORM XIn XManifest XMath +XNamespace xorg XPan XResource diff --git a/src/buffer/out/Row.cpp b/src/buffer/out/Row.cpp index 6be7af47bc5..9d1469c1c4f 100644 --- a/src/buffer/out/Row.cpp +++ b/src/buffer/out/Row.cpp @@ -566,6 +566,7 @@ void ROW::ReplaceAttributes(const til::CoordType beginIndex, const til::CoordTyp void ROW::ReplaceCharacters(til::CoordType columnBegin, til::CoordType width, const std::wstring_view& chars) try { + assert(width >= 1 && width <= 2); WriteHelper h{ *this, columnBegin, _columnCount, chars }; if (!h.IsValid()) { @@ -634,16 +635,17 @@ catch (...) throw; } -[[msvc::forceinline]] void ROW::WriteHelper::ReplaceText() noexcept +void ROW::WriteHelper::ReplaceText() noexcept { // This function starts with a fast-pass for ASCII. ASCII is still predominant in technical areas. // // We can infer the "end" from the amount of columns we're given (colLimit - colBeg), // because ASCII is always 1 column wide per character. - const auto len = std::min(chars.size(), colLimit - colBeg); + auto len = std::min(chars.size(), colLimit - colEnd); size_t ch = chBeg; + size_t off = 0; - for (size_t off = 0; off < len; ++off) + for (; off < len; ++off) { if (chars[off] >= 0x80) [[unlikely]] { @@ -660,27 +662,70 @@ catch (...) charsConsumed = ch - chBeg; } -[[msvc::forceinline]] void ROW::WriteHelper::_replaceTextUnicode(size_t ch, size_t off) noexcept +void ROW::WriteHelper::_replaceTextUnicode(size_t ch, size_t off) noexcept { auto& cwd = CodepointWidthDetector::Singleton(); const auto len = chars.size(); - // The non-ASCII character we have encountered may be a combining mark, like "a^" which is then displayed as "â". - // In order to recognize both characters as a single grapheme, we need to back up by 1 ASCII character - // and let MeasureNext() find the next proper grapheme boundary. - if (off != 0) + // Check if the new text joins with the existing contents of the row to form a single grapheme cluster. + if (off == 0) { + auto colPrev = colBeg; + while (colPrev > 0 && row._uncheckedIsTrailer(--colPrev)) + { + } + + const auto chPrev = row._uncheckedCharOffset(colPrev); + const std::wstring_view charsPrev{ row._chars.data() + chPrev, ch - chPrev }; + + GraphemeState state; + cwd.GraphemeNext(state, charsPrev); + cwd.GraphemeNext(state, chars); + + if (state.len > 0) + { + colBegDirty = colPrev; + colEnd = colPrev; + + const auto colEndNew = gsl::narrow_cast(colEnd + state.width); + if (colEndNew > colLimit) + { + colEndDirty = colLimit; + charsConsumed = ch - chBeg; + return; + } + + // Fill our char-offset buffer with 1 entry containing the mapping from the + // current column (colEnd) to the start of the glyph in the string (ch)... + til::at(row._charOffsets, colEnd++) = gsl::narrow_cast(chPrev); + // ...followed by 0-N entries containing an indication that the + // columns are just a wide-glyph extension of the preceding one. + while (colEnd < colEndNew) + { + til::at(row._charOffsets, colEnd++) = gsl::narrow_cast(chPrev | CharOffsetsTrailer); + } + + ch += state.len; + off += state.len; + } + } + else + { + // The non-ASCII character we have encountered may be a combining mark, like "a^" which is then displayed as "â". + // In order to recognize both characters as a single grapheme, we need to back up by 1 ASCII character + // and let MeasureNext() find the next proper grapheme boundary. --colEnd; --ch; --off; } + GraphemeState state{ .beg = chars.data() + off }; + while (off < len) { - int width; - const auto end = cwd.GraphemeNext(chars, off, &width); + cwd.GraphemeNext(state, chars); - const auto colEndNew = gsl::narrow_cast(colEnd + width); + const auto colEndNew = gsl::narrow_cast(colEnd + state.width); if (colEndNew > colLimit) { colEndDirty = colLimit; @@ -698,8 +743,8 @@ catch (...) til::at(row._charOffsets, colEnd++) = gsl::narrow_cast(ch | CharOffsetsTrailer); } - ch += end - off; - off = end; + ch += state.len; + off += state.len; } colEndDirty = colEnd; @@ -804,7 +849,7 @@ catch (...) } #pragma warning(pop) -[[msvc::forceinline]] void ROW::WriteHelper::Finish() +void ROW::WriteHelper::Finish() { colEndDirty = row._adjustForward(colEndDirty); diff --git a/src/buffer/out/textBuffer.cpp b/src/buffer/out/textBuffer.cpp index 3c7ee55ebf2..f881ff67e32 100644 --- a/src/buffer/out/textBuffer.cpp +++ b/src/buffer/out/textBuffer.cpp @@ -409,14 +409,18 @@ void TextBuffer::_PrepareForDoubleByteSequence(const DbcsAttribute dbcsAttribute size_t TextBuffer::GraphemeNext(const std::wstring_view& chars, size_t position) noexcept { auto& cwd = CodepointWidthDetector::Singleton(); - return cwd.GraphemeNext(chars, position, nullptr); + GraphemeState state{ .beg = chars.data() + position }; + cwd.GraphemeNext(state, chars); + return position + state.len; } // It's the counterpart to GraphemeNext. See GraphemeNext. size_t TextBuffer::GraphemePrev(const std::wstring_view& chars, size_t position) noexcept { auto& cwd = CodepointWidthDetector::Singleton(); - return cwd.GraphemePrev(chars, position, nullptr); + GraphemeState state{ .beg = chars.data() + position }; + cwd.GraphemePrev(state, chars); + return position - state.len; } // Ever wondered how much space a piece of text needs before inserting it? This function will tell you! @@ -465,11 +469,13 @@ size_t TextBuffer::FitTextIntoColumns(const std::wstring_view& chars, til::Coord col--; } + GraphemeState state{ .beg = chars.data() + dist }; + while (dist < len) { - int width; - dist = cwd.GraphemeNext(chars, dist, &width); - col += width; + cwd.GraphemeNext(state, chars); + dist += state.len; + col += state.width; // If we ran out of columns, we need to always return `columnLimit` and not `cols`, // because if we tried inserting a wide glyph into just 1 remaining column it will diff --git a/src/cascadia/TerminalApp/AppLogic.cpp b/src/cascadia/TerminalApp/AppLogic.cpp index dc11b506449..840c1db9272 100644 --- a/src/cascadia/TerminalApp/AppLogic.cpp +++ b/src/cascadia/TerminalApp/AppLogic.cpp @@ -3,16 +3,18 @@ #include "pch.h" #include "AppLogic.h" -#include "../inc/WindowingBehavior.h" -#include "AppLogic.g.cpp" -#include "FindTargetWindowResult.g.cpp" -#include "SettingsLoadEventArgs.h" #include #include #include +#include "SettingsLoadEventArgs.h" +#include "../../types/inc/CodepointWidthDetector.hpp" #include "../../types/inc/utils.hpp" +#include "../inc/WindowingBehavior.h" + +#include "AppLogic.g.cpp" +#include "FindTargetWindowResult.g.cpp" using namespace winrt::Windows::ApplicationModel; using namespace winrt::Windows::ApplicationModel::DataTransfer; @@ -433,13 +435,6 @@ namespace winrt::TerminalApp::implementation } } - if (initialLoad) - { - // Register for directory change notification. - _RegisterSettingsChange(); - return; - } - // Here, we successfully reloaded the settings, and created a new // TerminalSettings object. diff --git a/src/cascadia/TerminalApp/TerminalPage.cpp b/src/cascadia/TerminalApp/TerminalPage.cpp index b9ab23b62b2..4397ea19bdd 100644 --- a/src/cascadia/TerminalApp/TerminalPage.cpp +++ b/src/cascadia/TerminalApp/TerminalPage.cpp @@ -4,18 +4,11 @@ #include "pch.h" #include "TerminalPage.h" -#include "TerminalPage.g.cpp" -#include "RenameWindowRequestedArgs.g.cpp" -#include "RequestMoveContentArgs.g.cpp" -#include "RequestReceiveContentArgs.g.cpp" -#include "LaunchPositionRequest.g.cpp" - -#include -#include #include #include #include +#include #include "../../types/inc/utils.hpp" #include "App.h" @@ -24,7 +17,12 @@ #include "SettingsPaneContent.h" #include "ScratchpadContent.h" #include "TabRowControl.h" -#include "Utils.h" + +#include "TerminalPage.g.cpp" +#include "RenameWindowRequestedArgs.g.cpp" +#include "RequestMoveContentArgs.g.cpp" +#include "RequestReceiveContentArgs.g.cpp" +#include "LaunchPositionRequest.g.cpp" using namespace winrt; using namespace winrt::Microsoft::Terminal::Control; @@ -1211,6 +1209,26 @@ namespace winrt::TerminalApp::implementation TerminalSettings settings, const bool inheritCursor) { + // The only way to create string references to literals in WinRT is through std::optional. Fun! + static std::optional textMeasurement; + static const auto textMeasurementInit = [&]() { + switch (_settings.GlobalSettings().TextMeasurement()) + { + case TextMeasurement::Graphemes: + textMeasurement.emplace(L"graphemes"); + break; + case TextMeasurement::Wcswidth: + textMeasurement.emplace(L"wcswidth"); + break; + case TextMeasurement::Console: + textMeasurement.emplace(L"console"); + break; + default: + break; + } + return true; + }(); + TerminalConnection::ITerminalConnection connection{ nullptr }; auto connectionType = profile.ConnectionType(); @@ -1282,6 +1300,11 @@ namespace winrt::TerminalApp::implementation } } + if (textMeasurement) + { + valueSet.Insert(L"textMeasurement", Windows::Foundation::PropertyValue::CreateString(*textMeasurement)); + } + if (const auto id = settings.SessionId(); id != winrt::guid{}) { valueSet.Insert(L"sessionId", Windows::Foundation::PropertyValue::CreateGuid(id)); diff --git a/src/cascadia/TerminalConnection/ConptyConnection.cpp b/src/cascadia/TerminalConnection/ConptyConnection.cpp index 7d326ea65c1..46707ff49ad 100644 --- a/src/cascadia/TerminalConnection/ConptyConnection.cpp +++ b/src/cascadia/TerminalConnection/ConptyConnection.cpp @@ -5,7 +5,6 @@ #include "ConptyConnection.h" #include -#include #include #include "CTerminalHandoff.h" @@ -259,11 +258,39 @@ namespace winrt::Microsoft::Terminal::TerminalConnection::implementation _cols = unbox_prop_or(settings, L"initialCols", _cols); _sessionId = unbox_prop_or(settings, L"sessionId", _sessionId); _environment = settings.TryLookup(L"environment").try_as(); - _inheritCursor = unbox_prop_or(settings, L"inheritCursor", _inheritCursor); _profileGuid = unbox_prop_or(settings, L"profileGuid", _profileGuid); - const auto& initialEnvironment{ unbox_prop_or(settings, L"initialEnvironment", L"") }; + _flags = PSEUDOCONSOLE_RESIZE_QUIRK; + + // If we're using an existing buffer, we want the new connection + // to reuse the existing cursor. When not setting this flag, the + // PseudoConsole sends a clear screen VT code which our renderer + // interprets into making all the previous lines be outside the + // current viewport. + const auto inheritCursor = unbox_prop_or(settings, L"inheritCursor", false); + if (inheritCursor) + { + _flags |= PSEUDOCONSOLE_INHERIT_CURSOR; + } + + const auto textMeasurement = unbox_prop_or(settings, L"textMeasurement", winrt::hstring{}); + if (!textMeasurement.empty()) + { + if (textMeasurement == L"graphemes") + { + _flags |= PSEUDOCONSOLE_GLYPH_WIDTH_GRAPHEMES; + } + else if (textMeasurement == L"wcswidth") + { + _flags |= PSEUDOCONSOLE_GLYPH_WIDTH_WCSWIDTH; + } + else if (textMeasurement == L"console") + { + _flags |= PSEUDOCONSOLE_GLYPH_WIDTH_CONSOLE; + } + } + const auto& initialEnvironment{ unbox_prop_or(settings, L"initialEnvironment", L"") }; const bool reloadEnvironmentVariables = unbox_prop_or(settings, L"reloadEnvironmentVariables", false); if (reloadEnvironmentVariables) @@ -318,19 +345,7 @@ namespace winrt::Microsoft::Terminal::TerminalConnection::implementation // handoff from an already-started PTY process. if (!_inPipe) { - DWORD flags = PSEUDOCONSOLE_RESIZE_QUIRK; - - // If we're using an existing buffer, we want the new connection - // to reuse the existing cursor. When not setting this flag, the - // PseudoConsole sends a clear screen VT code which our renderer - // interprets into making all the previous lines be outside the - // current viewport. - if (_inheritCursor) - { - flags |= PSEUDOCONSOLE_INHERIT_CURSOR; - } - - THROW_IF_FAILED(_CreatePseudoConsoleAndPipes(til::unwrap_coord_size(dimensions), flags, &_inPipe, &_outPipe, &_hPC)); + THROW_IF_FAILED(_CreatePseudoConsoleAndPipes(til::unwrap_coord_size(dimensions), _flags, &_inPipe, &_outPipe, &_hPC)); if (_initialParentHwnd != 0) { diff --git a/src/cascadia/TerminalConnection/ConptyConnection.h b/src/cascadia/TerminalConnection/ConptyConnection.h index 246621c5eeb..1ce3b72ba92 100644 --- a/src/cascadia/TerminalConnection/ConptyConnection.h +++ b/src/cascadia/TerminalConnection/ConptyConnection.h @@ -90,7 +90,7 @@ namespace winrt::Microsoft::Terminal::TerminalConnection::implementation til::u8state _u8State{}; std::wstring _u16Str{}; std::array _buffer{}; - bool _inheritCursor{ false }; + DWORD _flags{ 0 }; til::env _initialEnv{}; guid _profileGuid{}; diff --git a/src/cascadia/TerminalControl/ControlCore.cpp b/src/cascadia/TerminalControl/ControlCore.cpp index 6b4b86a194d..ce0fa3d4c09 100644 --- a/src/cascadia/TerminalControl/ControlCore.cpp +++ b/src/cascadia/TerminalControl/ControlCore.cpp @@ -18,6 +18,7 @@ #include "../../renderer/atlas/AtlasEngine.h" #include "../../renderer/base/renderer.hpp" #include "../../renderer/uia/UiaRenderer.hpp" +#include "../../types/inc/CodepointWidthDetector.hpp" #include "ControlCore.g.cpp" #include "SelectionColor.g.cpp" @@ -71,6 +72,23 @@ namespace winrt::Microsoft::Terminal::Control::implementation _desiredFont{ DEFAULT_FONT_FACE, 0, DEFAULT_FONT_WEIGHT, DEFAULT_FONT_SIZE, CP_UTF8 }, _actualFont{ DEFAULT_FONT_FACE, 0, DEFAULT_FONT_WEIGHT, { 0, DEFAULT_FONT_SIZE }, CP_UTF8, false } { + static const auto textMeasurementInit = [&]() { + TextMeasurementMode mode = TextMeasurementMode::Graphemes; + switch (settings.TextMeasurement()) + { + case TextMeasurement::Wcswidth: + mode = TextMeasurementMode::Wcswidth; + break; + case TextMeasurement::Console: + mode = TextMeasurementMode::Console; + break; + default: + break; + } + CodepointWidthDetector::Singleton().Reset(mode); + return true; + }(); + _settings = winrt::make_self(settings, unfocusedAppearance); _terminal = std::make_shared<::Microsoft::Terminal::Core::Terminal>(); const auto lock = _terminal->LockForWriting(); diff --git a/src/cascadia/TerminalControl/EventArgs.idl b/src/cascadia/TerminalControl/EventArgs.idl index 6b0bf11e359..8521f13f978 100644 --- a/src/cascadia/TerminalControl/EventArgs.idl +++ b/src/cascadia/TerminalControl/EventArgs.idl @@ -18,6 +18,13 @@ namespace Microsoft.Terminal.Control Direct3D11, }; + enum TextMeasurement + { + Graphemes, + Wcswidth, + Console, + }; + runtimeclass FontSizeChangedArgs { Int32 Width { get; }; diff --git a/src/cascadia/TerminalControl/IControlSettings.idl b/src/cascadia/TerminalControl/IControlSettings.idl index 51eaf34f90d..725b23431fa 100644 --- a/src/cascadia/TerminalControl/IControlSettings.idl +++ b/src/cascadia/TerminalControl/IControlSettings.idl @@ -62,6 +62,7 @@ namespace Microsoft.Terminal.Control Microsoft.Terminal.Control.GraphicsAPI GraphicsAPI { get; }; Boolean DisablePartialInvalidation { get; }; Boolean SoftwareRendering { get; }; + Microsoft.Terminal.Control.TextMeasurement TextMeasurement { get; }; Boolean ShowMarks { get; }; Boolean UseBackgroundImageForWindow { get; }; Boolean RightClickContextMenu { get; }; diff --git a/src/cascadia/TerminalSettingsEditor/Rendering.xaml b/src/cascadia/TerminalSettingsEditor/Rendering.xaml index 4b9534167d0..6285ebc4ff2 100644 --- a/src/cascadia/TerminalSettingsEditor/Rendering.xaml +++ b/src/cascadia/TerminalSettingsEditor/Rendering.xaml @@ -41,5 +41,13 @@ + + + + diff --git a/src/cascadia/TerminalSettingsEditor/RenderingViewModel.cpp b/src/cascadia/TerminalSettingsEditor/RenderingViewModel.cpp index 74c4d5922a8..f369e0f14ff 100644 --- a/src/cascadia/TerminalSettingsEditor/RenderingViewModel.cpp +++ b/src/cascadia/TerminalSettingsEditor/RenderingViewModel.cpp @@ -17,5 +17,6 @@ namespace winrt::Microsoft::Terminal::Settings::Editor::implementation _settings{ std::move(settings) } { INITIALIZE_BINDABLE_ENUM_SETTING(GraphicsAPI, GraphicsAPI, winrt::Microsoft::Terminal::Control::GraphicsAPI, L"Globals_GraphicsAPI_", L"Text"); + INITIALIZE_BINDABLE_ENUM_SETTING(TextMeasurement, TextMeasurement, winrt::Microsoft::Terminal::Control::TextMeasurement, L"Globals_TextMeasurement_", L"Text"); } } diff --git a/src/cascadia/TerminalSettingsEditor/RenderingViewModel.h b/src/cascadia/TerminalSettingsEditor/RenderingViewModel.h index b3042d893a1..1eda9e515fe 100644 --- a/src/cascadia/TerminalSettingsEditor/RenderingViewModel.h +++ b/src/cascadia/TerminalSettingsEditor/RenderingViewModel.h @@ -16,6 +16,7 @@ namespace winrt::Microsoft::Terminal::Settings::Editor::implementation GETSET_BINDABLE_ENUM_SETTING(GraphicsAPI, winrt::Microsoft::Terminal::Control::GraphicsAPI, _settings.GlobalSettings().GraphicsAPI); PERMANENT_OBSERVABLE_PROJECTED_SETTING(_settings.GlobalSettings(), DisablePartialInvalidation); PERMANENT_OBSERVABLE_PROJECTED_SETTING(_settings.GlobalSettings(), SoftwareRendering); + GETSET_BINDABLE_ENUM_SETTING(TextMeasurement, winrt::Microsoft::Terminal::Control::TextMeasurement, _settings.GlobalSettings().TextMeasurement); private: Model::CascadiaSettings _settings{ nullptr }; diff --git a/src/cascadia/TerminalSettingsEditor/RenderingViewModel.idl b/src/cascadia/TerminalSettingsEditor/RenderingViewModel.idl index 1ca164fbd97..a9fa2ddb352 100644 --- a/src/cascadia/TerminalSettingsEditor/RenderingViewModel.idl +++ b/src/cascadia/TerminalSettingsEditor/RenderingViewModel.idl @@ -15,5 +15,7 @@ namespace Microsoft.Terminal.Settings.Editor Windows.Foundation.Collections.IObservableVector GraphicsAPIList { get; }; PERMANENT_OBSERVABLE_PROJECTED_SETTING(Boolean, DisablePartialInvalidation); PERMANENT_OBSERVABLE_PROJECTED_SETTING(Boolean, SoftwareRendering); + IInspectable CurrentTextMeasurement; + Windows.Foundation.Collections.IObservableVector TextMeasurementList { get; }; } } diff --git a/src/cascadia/TerminalSettingsEditor/Resources/en-US/Resources.resw b/src/cascadia/TerminalSettingsEditor/Resources/en-US/Resources.resw index 2ac6770cab4..008d82b60c0 100644 --- a/src/cascadia/TerminalSettingsEditor/Resources/en-US/Resources.resw +++ b/src/cascadia/TerminalSettingsEditor/Resources/en-US/Resources.resw @@ -342,6 +342,24 @@ When enabled, the terminal will use a software rasterizer (WARP). This setting should be left disabled under almost all circumstances. {Locked="WARP"} WARP is the "Windows Advanced Rasterization Platform". + + Text measurement mode + This text is shown next to a list of choices. + + + This changes the way incoming text is grouped into cells. The "Grapheme clusters" option is the most modern and Unicode-correct way to do so, while "wcswidth" is a common approach on UNIX, and "Windows Console" replicates the way it used to work on Windows. Changing this setting requires a restart of Windows Terminal and it only applies to applications launched from within it. + + + Grapheme clusters + The default choice between multiple graphics APIs. + + + wcswidth + {Locked="wcswidth"} + + + Windows Console + Columns Header for a control to choose the number of columns in the terminal's text grid. diff --git a/src/cascadia/TerminalSettingsModel/EnumMappings.cpp b/src/cascadia/TerminalSettingsModel/EnumMappings.cpp index 15665908b78..87b65b4fb2d 100644 --- a/src/cascadia/TerminalSettingsModel/EnumMappings.cpp +++ b/src/cascadia/TerminalSettingsModel/EnumMappings.cpp @@ -40,6 +40,7 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation DEFINE_ENUM_MAP(Model::WindowingMode, WindowingMode); DEFINE_ENUM_MAP(Microsoft::Terminal::Core::MatchMode, MatchMode); DEFINE_ENUM_MAP(Microsoft::Terminal::Control::GraphicsAPI, GraphicsAPI); + DEFINE_ENUM_MAP(Microsoft::Terminal::Control::TextMeasurement, TextMeasurement); // Profile Settings DEFINE_ENUM_MAP(Model::CloseOnExitMode, CloseOnExitMode); diff --git a/src/cascadia/TerminalSettingsModel/EnumMappings.h b/src/cascadia/TerminalSettingsModel/EnumMappings.h index 722ce920953..f76274b0497 100644 --- a/src/cascadia/TerminalSettingsModel/EnumMappings.h +++ b/src/cascadia/TerminalSettingsModel/EnumMappings.h @@ -36,6 +36,7 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation static winrt::Windows::Foundation::Collections::IMap WindowingMode(); static winrt::Windows::Foundation::Collections::IMap MatchMode(); static winrt::Windows::Foundation::Collections::IMap GraphicsAPI(); + static winrt::Windows::Foundation::Collections::IMap TextMeasurement(); // Profile Settings static winrt::Windows::Foundation::Collections::IMap CloseOnExitMode(); diff --git a/src/cascadia/TerminalSettingsModel/EnumMappings.idl b/src/cascadia/TerminalSettingsModel/EnumMappings.idl index 11801182999..f5def5fbaf9 100644 --- a/src/cascadia/TerminalSettingsModel/EnumMappings.idl +++ b/src/cascadia/TerminalSettingsModel/EnumMappings.idl @@ -18,6 +18,7 @@ namespace Microsoft.Terminal.Settings.Model static Windows.Foundation.Collections.IMap WindowingMode { get; }; static Windows.Foundation.Collections.IMap MatchMode { get; }; static Windows.Foundation.Collections.IMap GraphicsAPI { get; }; + static Windows.Foundation.Collections.IMap TextMeasurement { get; }; // Profile Settings static Windows.Foundation.Collections.IMap CloseOnExitMode { get; }; diff --git a/src/cascadia/TerminalSettingsModel/GlobalAppSettings.cpp b/src/cascadia/TerminalSettingsModel/GlobalAppSettings.cpp index ff6dd5eec63..822bfc5cb9a 100644 --- a/src/cascadia/TerminalSettingsModel/GlobalAppSettings.cpp +++ b/src/cascadia/TerminalSettingsModel/GlobalAppSettings.cpp @@ -240,6 +240,11 @@ Json::Value GlobalAppSettings::ToJson() { _GraphicsAPI.reset(); } + if (_TextMeasurement == Control::TextMeasurement::Graphemes) + { + _TextMeasurement.reset(); + } + if (_DisablePartialInvalidation == false) { _DisablePartialInvalidation.reset(); diff --git a/src/cascadia/TerminalSettingsModel/GlobalAppSettings.idl b/src/cascadia/TerminalSettingsModel/GlobalAppSettings.idl index 7e46bcc0517..1a1bab60b9b 100644 --- a/src/cascadia/TerminalSettingsModel/GlobalAppSettings.idl +++ b/src/cascadia/TerminalSettingsModel/GlobalAppSettings.idl @@ -79,6 +79,7 @@ namespace Microsoft.Terminal.Settings.Model INHERITABLE_SETTING(Microsoft.Terminal.Control.GraphicsAPI, GraphicsAPI); INHERITABLE_SETTING(Boolean, DisablePartialInvalidation); INHERITABLE_SETTING(Boolean, SoftwareRendering); + INHERITABLE_SETTING(Microsoft.Terminal.Control.TextMeasurement, TextMeasurement); INHERITABLE_SETTING(Boolean, UseBackgroundImageForWindow); INHERITABLE_SETTING(Boolean, ForceVTInput); INHERITABLE_SETTING(Boolean, DebugFeaturesEnabled); diff --git a/src/cascadia/TerminalSettingsModel/MTSMSettings.h b/src/cascadia/TerminalSettingsModel/MTSMSettings.h index f0e30684c53..d681771816b 100644 --- a/src/cascadia/TerminalSettingsModel/MTSMSettings.h +++ b/src/cascadia/TerminalSettingsModel/MTSMSettings.h @@ -27,6 +27,7 @@ Author(s): X(winrt::Microsoft::Terminal::Control::GraphicsAPI, GraphicsAPI, "rendering.graphicsAPI") \ X(bool, DisablePartialInvalidation, "rendering.disablePartialInvalidation", false) \ X(bool, SoftwareRendering, "rendering.software", false) \ + X(winrt::Microsoft::Terminal::Control::TextMeasurement, TextMeasurement, "compatibility.textMeasurement") \ X(bool, UseBackgroundImageForWindow, "experimental.useBackgroundImageForWindow", false) \ X(bool, ForceVTInput, "experimental.input.forceVT", false) \ X(bool, TrimBlockSelection, "trimBlockSelection", true) \ diff --git a/src/cascadia/TerminalSettingsModel/TerminalSettings.cpp b/src/cascadia/TerminalSettingsModel/TerminalSettings.cpp index 29b9bb811fe..170432c6264 100644 --- a/src/cascadia/TerminalSettingsModel/TerminalSettings.cpp +++ b/src/cascadia/TerminalSettingsModel/TerminalSettings.cpp @@ -367,6 +367,7 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation _GraphicsAPI = globalSettings.GraphicsAPI(); _DisablePartialInvalidation = globalSettings.DisablePartialInvalidation(); _SoftwareRendering = globalSettings.SoftwareRendering(); + _TextMeasurement = globalSettings.TextMeasurement(); _UseBackgroundImageForWindow = globalSettings.UseBackgroundImageForWindow(); _ForceVTInput = globalSettings.ForceVTInput(); _TrimBlockSelection = globalSettings.TrimBlockSelection(); diff --git a/src/cascadia/TerminalSettingsModel/TerminalSettings.h b/src/cascadia/TerminalSettingsModel/TerminalSettings.h index 33658427f94..6deae7ba62b 100644 --- a/src/cascadia/TerminalSettingsModel/TerminalSettings.h +++ b/src/cascadia/TerminalSettingsModel/TerminalSettings.h @@ -158,6 +158,7 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation INHERITABLE_SETTING(Model::TerminalSettings, Microsoft::Terminal::Control::GraphicsAPI, GraphicsAPI); INHERITABLE_SETTING(Model::TerminalSettings, bool, DisablePartialInvalidation, false); INHERITABLE_SETTING(Model::TerminalSettings, bool, SoftwareRendering, false); + INHERITABLE_SETTING(Model::TerminalSettings, Microsoft::Terminal::Control::TextMeasurement, TextMeasurement); INHERITABLE_SETTING(Model::TerminalSettings, bool, UseBackgroundImageForWindow, false); INHERITABLE_SETTING(Model::TerminalSettings, bool, ForceVTInput, false); diff --git a/src/cascadia/TerminalSettingsModel/TerminalSettingsSerializationHelpers.h b/src/cascadia/TerminalSettingsModel/TerminalSettingsSerializationHelpers.h index d15c65c0fab..b1a2505f194 100644 --- a/src/cascadia/TerminalSettingsModel/TerminalSettingsSerializationHelpers.h +++ b/src/cascadia/TerminalSettingsModel/TerminalSettingsSerializationHelpers.h @@ -770,3 +770,12 @@ JSON_ENUM_MAPPER(::winrt::Microsoft::Terminal::Control::GraphicsAPI) pair_type{ "direct3d11", ValueType::Direct3D11 }, }; }; + +JSON_ENUM_MAPPER(::winrt::Microsoft::Terminal::Control::TextMeasurement) +{ + JSON_MAPPINGS(3) = { + pair_type{ "graphemes", ValueType::Graphemes }, + pair_type{ "wcswidth", ValueType::Wcswidth }, + pair_type{ "console", ValueType::Console }, + }; +}; diff --git a/src/cascadia/inc/ControlProperties.h b/src/cascadia/inc/ControlProperties.h index f848f401b6b..1e8dd30f34b 100644 --- a/src/cascadia/inc/ControlProperties.h +++ b/src/cascadia/inc/ControlProperties.h @@ -76,6 +76,7 @@ X(winrt::Microsoft::Terminal::Control::GraphicsAPI, GraphicsAPI) \ X(bool, DisablePartialInvalidation, false) \ X(bool, SoftwareRendering, false) \ + X(winrt::Microsoft::Terminal::Control::TextMeasurement, TextMeasurement) \ X(bool, UseBackgroundImageForWindow, false) \ X(bool, ShowMarks, false) \ X(winrt::Microsoft::Terminal::Control::CopyFormat, CopyFormatting, 0) \ diff --git a/src/host/ConsoleArguments.cpp b/src/host/ConsoleArguments.cpp index 6a22e8170a6..660dea62c3d 100644 --- a/src/host/ConsoleArguments.cpp +++ b/src/host/ConsoleArguments.cpp @@ -7,22 +7,21 @@ #include using namespace Microsoft::Console::Utils; -const std::wstring_view ConsoleArguments::VT_MODE_ARG = L"--vtmode"; -const std::wstring_view ConsoleArguments::HEADLESS_ARG = L"--headless"; -const std::wstring_view ConsoleArguments::SERVER_HANDLE_ARG = L"--server"; -const std::wstring_view ConsoleArguments::SIGNAL_HANDLE_ARG = L"--signal"; -const std::wstring_view ConsoleArguments::HANDLE_PREFIX = L"0x"; -const std::wstring_view ConsoleArguments::CLIENT_COMMANDLINE_ARG = L"--"; -const std::wstring_view ConsoleArguments::FORCE_V1_ARG = L"-ForceV1"; -const std::wstring_view ConsoleArguments::FORCE_NO_HANDOFF_ARG = L"-ForceNoHandoff"; -const std::wstring_view ConsoleArguments::FILEPATH_LEADER_PREFIX = L"\\??\\"; -const std::wstring_view ConsoleArguments::WIDTH_ARG = L"--width"; -const std::wstring_view ConsoleArguments::HEIGHT_ARG = L"--height"; -const std::wstring_view ConsoleArguments::INHERIT_CURSOR_ARG = L"--inheritcursor"; -const std::wstring_view ConsoleArguments::RESIZE_QUIRK = L"--resizeQuirk"; -const std::wstring_view ConsoleArguments::FEATURE_ARG = L"--feature"; -const std::wstring_view ConsoleArguments::FEATURE_PTY_ARG = L"pty"; -const std::wstring_view ConsoleArguments::COM_SERVER_ARG = L"-Embedding"; +static constexpr std::wstring_view VT_MODE_ARG{ L"--vtmode" }; +static constexpr std::wstring_view HEADLESS_ARG{ L"--headless" }; +static constexpr std::wstring_view SERVER_HANDLE_ARG{ L"--server" }; +static constexpr std::wstring_view SIGNAL_HANDLE_ARG{ L"--signal" }; +static constexpr std::wstring_view HANDLE_PREFIX{ L"0x" }; +static constexpr std::wstring_view CLIENT_COMMANDLINE_ARG{ L"--" }; +static constexpr std::wstring_view FORCE_V1_ARG{ L"-ForceV1" }; +static constexpr std::wstring_view FORCE_NO_HANDOFF_ARG{ L"-ForceNoHandoff" }; +static constexpr std::wstring_view FILEPATH_LEADER_PREFIX{ L"\\??\\" }; +static constexpr std::wstring_view WIDTH_ARG{ L"--width" }; +static constexpr std::wstring_view HEIGHT_ARG{ L"--height" }; +static constexpr std::wstring_view INHERIT_CURSOR_ARG{ L"--inheritcursor" }; +static constexpr std::wstring_view RESIZE_QUIRK{ L"--resizeQuirk" }; +static constexpr std::wstring_view GLYPH_WIDTH{ L"--textMeasurement" }; +static constexpr std::wstring_view COM_SERVER_ARG{ L"-Embedding" }; // NOTE: Thinking about adding more commandline args that control conpty, for // the Terminal? Make sure you add them to the commandline in // ConsoleEstablishHandoff. We use that to initialize the ConsoleArguments for a @@ -204,37 +203,6 @@ void ConsoleArguments::s_ConsumeArg(_Inout_ std::vector& args, _In return (hasNext) ? S_OK : E_INVALIDARG; } -// Routine Description: -// Similar to s_GetArgumentValue. -// Attempts to get the next arg as a "feature" arg - this can be used for -// feature detection. -// If the next arg is not recognized, then we don't support that feature. -// Currently, the only supported feature arg is `pty`, to identify pty support. -// Arguments: -// args: A collection of wstrings representing command-line arguments -// index: the index of the argument of which to get the value for. The value -// should be at (index+1). index will be decremented by one on success. -// pSetting: receives the string at index+1 -// Return Value: -// S_OK if we parsed the string successfully, otherwise E_INVALIDARG indicating -// failure. -[[nodiscard]] HRESULT ConsoleArguments::s_HandleFeatureValue(_Inout_ std::vector& args, _Inout_ size_t& index) -{ - auto hr = E_INVALIDARG; - auto hasNext = (index + 1) < args.size(); - if (hasNext) - { - s_ConsumeArg(args, index); - auto value = args[index]; - if (value == FEATURE_PTY_ARG) - { - hr = S_OK; - } - s_ConsumeArg(args, index); - } - return (hasNext) ? hr : E_INVALIDARG; -} - // Method Description: // Routine Description: // Given the commandline of tokens `args`, tries to find the argument at @@ -385,13 +353,10 @@ void ConsoleArguments::s_ConsumeArg(_Inout_ std::vector& args, _In std::vector args; auto hr = S_OK; - // Make a mutable copy of the commandline for tokenizing - auto copy = _commandline; - // Tokenize the commandline auto argc = 0; wil::unique_hlocal_ptr argv; - argv.reset(CommandLineToArgvW(copy.c_str(), &argc)); + argv.reset(CommandLineToArgvW(_commandline.c_str(), &argc)); RETURN_LAST_ERROR_IF(argv == nullptr); for (auto i = 1; i < argc; ++i) @@ -406,7 +371,7 @@ void ConsoleArguments::s_ConsumeArg(_Inout_ std::vector& args, _In { hr = E_INVALIDARG; - auto arg = args[i]; + const std::wstring_view arg{ args[i] }; if (arg.substr(0, HANDLE_PREFIX.length()) == HANDLE_PREFIX || arg == SERVER_HANDLE_ARG) @@ -415,7 +380,7 @@ void ConsoleArguments::s_ConsumeArg(_Inout_ std::vector& args, _In // --server 0x4 (new method) // 0x4 (legacy method) // If we see >1 of these, it's invalid. - auto serverHandleVal = arg; + std::wstring serverHandleVal{ arg }; if (arg == SERVER_HANDLE_ARG) { @@ -485,10 +450,6 @@ void ConsoleArguments::s_ConsumeArg(_Inout_ std::vector& args, _In { hr = s_GetArgumentValue(args, i, &_height); } - else if (arg == FEATURE_ARG) - { - hr = s_HandleFeatureValue(args, i); - } else if (arg == HEADLESS_ARG) { _headless = true; @@ -507,6 +468,10 @@ void ConsoleArguments::s_ConsumeArg(_Inout_ std::vector& args, _In s_ConsumeArg(args, i); hr = S_OK; } + else if (arg == GLYPH_WIDTH) + { + hr = s_GetArgumentValue(args, i, &_textMeasurement); + } else if (arg == CLIENT_COMMANDLINE_ARG) { // Everything after this is the explicit commandline @@ -615,19 +580,19 @@ HANDLE ConsoleArguments::GetVtOutHandle() const return _vtOutHandle; } -std::wstring ConsoleArguments::GetOriginalCommandLine() const +const std::wstring& ConsoleArguments::GetClientCommandline() const { - return _commandline; + return _clientCommandline; } -std::wstring ConsoleArguments::GetClientCommandline() const +const std::wstring& ConsoleArguments::GetVtMode() const { - return _clientCommandline; + return _vtMode; } -std::wstring ConsoleArguments::GetVtMode() const +const std::wstring& ConsoleArguments::GetTextMeasurement() const { - return _vtMode; + return _textMeasurement; } bool ConsoleArguments::GetForceV1() const diff --git a/src/host/ConsoleArguments.hpp b/src/host/ConsoleArguments.hpp index 926bc063924..6d41ed60ffe 100644 --- a/src/host/ConsoleArguments.hpp +++ b/src/host/ConsoleArguments.hpp @@ -44,9 +44,9 @@ class ConsoleArguments bool HasSignalHandle() const; HANDLE GetSignalHandle() const; - std::wstring GetOriginalCommandLine() const; - std::wstring GetClientCommandline() const; - std::wstring GetVtMode() const; + const std::wstring& GetClientCommandline() const; + const std::wstring& GetVtMode() const; + const std::wstring& GetTextMeasurement() const; bool GetForceV1() const; bool GetForceNoHandoff() const; @@ -59,23 +59,6 @@ class ConsoleArguments void EnableConptyModeForTests(); #endif - static const std::wstring_view VT_MODE_ARG; - static const std::wstring_view HEADLESS_ARG; - static const std::wstring_view SERVER_HANDLE_ARG; - static const std::wstring_view SIGNAL_HANDLE_ARG; - static const std::wstring_view HANDLE_PREFIX; - static const std::wstring_view CLIENT_COMMANDLINE_ARG; - static const std::wstring_view FORCE_V1_ARG; - static const std::wstring_view FORCE_NO_HANDOFF_ARG; - static const std::wstring_view FILEPATH_LEADER_PREFIX; - static const std::wstring_view WIDTH_ARG; - static const std::wstring_view HEIGHT_ARG; - static const std::wstring_view INHERIT_CURSOR_ARG; - static const std::wstring_view RESIZE_QUIRK; - static const std::wstring_view FEATURE_ARG; - static const std::wstring_view FEATURE_PTY_ARG; - static const std::wstring_view COM_SERVER_ARG; - private: #ifdef UNIT_TESTING // This accessor used to create a copy of this class for unit testing comparison ease. @@ -123,6 +106,7 @@ class ConsoleArguments HANDLE _vtOutHandle; std::wstring _vtMode; + std::wstring _textMeasurement; bool _forceNoHandoff; bool _forceV1; @@ -150,8 +134,6 @@ class ConsoleArguments [[nodiscard]] static HRESULT s_GetArgumentValue(_Inout_ std::vector& args, _Inout_ size_t& index, _Out_opt_ short* const pSetting); - [[nodiscard]] static HRESULT s_HandleFeatureValue(_Inout_ std::vector& args, - _Inout_ size_t& index); [[nodiscard]] static HRESULT s_ParseHandleArg(const std::wstring& handleAsText, _Inout_ DWORD& handleAsVal); diff --git a/src/host/VtIo.cpp b/src/host/VtIo.cpp index 23500c2f6a8..4399a97c09e 100644 --- a/src/host/VtIo.cpp +++ b/src/host/VtIo.cpp @@ -13,6 +13,7 @@ #include "handle.h" // LockConsole #include "input.h" // ProcessCtrlEvents #include "output.h" // CloseConsoleProcessState +#include "../types/inc/CodepointWidthDetector.hpp" using namespace Microsoft::Console; using namespace Microsoft::Console::Render; @@ -73,6 +74,28 @@ VtIo::VtIo() : // If we were already given VT handles, set up the VT IO engine to use those. if (pArgs->InConptyMode()) { + // Honestly, no idea where else to put this. + if (const auto& textMeasurement = pArgs->GetTextMeasurement(); !textMeasurement.empty()) + { + auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); + SettingsTextMeasurementMode settingsMode = SettingsTextMeasurementMode::Graphemes; + TextMeasurementMode mode = TextMeasurementMode::Graphemes; + + if (textMeasurement == L"wcswidth") + { + settingsMode = SettingsTextMeasurementMode::Wcswidth; + mode = TextMeasurementMode::Wcswidth; + } + else if (textMeasurement == L"console") + { + settingsMode = SettingsTextMeasurementMode::Console; + mode = TextMeasurementMode::Console; + } + + gci.SetTextMeasurementMode(settingsMode); + CodepointWidthDetector::Singleton().Reset(mode); + } + return _Initialize(pArgs->GetVtInHandle(), pArgs->GetVtOutHandle(), pArgs->GetVtMode(), pArgs->GetSignalHandle()); } // Didn't need to initialize if we didn't have VT stuff. It's still OK, but report we did nothing. diff --git a/src/host/inputBuffer.cpp b/src/host/inputBuffer.cpp index a843a01082a..6f60ab1c5bc 100644 --- a/src/host/inputBuffer.cpp +++ b/src/host/inputBuffer.cpp @@ -810,7 +810,8 @@ bool InputBuffer::_CoalesceEvent(const INPUT_RECORD& inEvent) noexcept // You can't update the repeat count of such a A,B pair, because they're stored as A,A,B,B (down-down, up-up). // I believe the proper approach is to store pairs of characters as pairs, update their combined // repeat count and only when they're being read de-coalesce them into their alternating form. - !IsGlyphFullWidth(inKey.uChar.UnicodeChar)) + // TODO:GH#8000 IsGlyphFullWidth was replaced with til::is_surrogate to get rid off the former. Neither approach is fully correct. + !til::is_surrogate(inKey.uChar.UnicodeChar)) { lastKey.wRepeatCount += inKey.wRepeatCount; return true; diff --git a/src/host/screenInfo.cpp b/src/host/screenInfo.cpp index 3e3046e11f8..5680e7490f9 100644 --- a/src/host/screenInfo.cpp +++ b/src/host/screenInfo.cpp @@ -514,15 +514,30 @@ void SCREEN_INFORMATION::RefreshFontWithRenderer() { if (IsActiveScreenBuffer()) { + auto& globals = ServiceLocator::LocateGlobals(); + const auto& gci = globals.getConsoleInformation(); + // Hand the handle to our internal structure to the font change trigger in case it updates it based on what's appropriate. - if (ServiceLocator::LocateGlobals().pRender != nullptr) + if (globals.pRender != nullptr) { - ServiceLocator::LocateGlobals().pRender->TriggerFontChange(ServiceLocator::LocateGlobals().dpi, - GetDesiredFont(), - GetCurrentFont()); + globals.pRender->TriggerFontChange(globals.dpi, GetDesiredFont(), GetCurrentFont()); + } - CodepointWidthDetector::Singleton().ClearFallbackCache(); + TextMeasurementMode mode; + switch (gci.GetTextMeasurementMode()) + { + case SettingsTextMeasurementMode::Wcswidth: + mode = TextMeasurementMode::Wcswidth; + break; + case SettingsTextMeasurementMode::Console: + mode = TextMeasurementMode::Console; + break; + default: + mode = TextMeasurementMode::Graphemes; + break; } + + CodepointWidthDetector::Singleton().Reset(mode); } } @@ -2456,7 +2471,6 @@ Viewport SCREEN_INFORMATION::GetVirtualViewport() const noexcept // Method Description: // - Returns true if the character at the cursor's current position is wide. -// See IsGlyphFullWidth // Arguments: // - // Return Value: diff --git a/src/host/settings.cpp b/src/host/settings.cpp index 0798fe847ab..e0b07e38f75 100644 --- a/src/host/settings.cpp +++ b/src/host/settings.cpp @@ -777,6 +777,16 @@ bool Settings::GetCopyColor() const noexcept return _fCopyColor; } +SettingsTextMeasurementMode Settings::GetTextMeasurementMode() const noexcept +{ + return _textMeasurement; +} + +void Settings::SetTextMeasurementMode(const SettingsTextMeasurementMode mode) noexcept +{ + _textMeasurement = mode; +} + bool Settings::GetEnableBuiltinGlyphs() const noexcept { return _fEnableBuiltinGlyphs; diff --git a/src/host/settings.hpp b/src/host/settings.hpp index a254589755f..94c38afbf43 100644 --- a/src/host/settings.hpp +++ b/src/host/settings.hpp @@ -24,6 +24,13 @@ constexpr unsigned short MIN_WINDOW_OPACITY = 0x4D; // 0x4D is approximately 30% #include "ConsoleArguments.hpp" #include "../renderer/inc/RenderSettings.hpp" +enum class SettingsTextMeasurementMode : DWORD +{ + Graphemes, + Wcswidth, + Console, +}; + class Settings { using RenderSettings = Microsoft::Console::Render::RenderSettings; @@ -171,6 +178,8 @@ class Settings bool GetUseDx() const noexcept; bool GetCopyColor() const noexcept; + SettingsTextMeasurementMode GetTextMeasurementMode() const noexcept; + void SetTextMeasurementMode(SettingsTextMeasurementMode mode) noexcept; bool GetEnableBuiltinGlyphs() const noexcept; private: @@ -213,6 +222,7 @@ class Settings std::wstring _LaunchFaceName; bool _fAllowAltF4Close; DWORD _dwVirtTermLevel; + SettingsTextMeasurementMode _textMeasurement = SettingsTextMeasurementMode::Graphemes; bool _fUseDx; bool _fCopyColor; bool _fEnableBuiltinGlyphs = true; diff --git a/src/host/ut_host/ConsoleArgumentsTests.cpp b/src/host/ut_host/ConsoleArgumentsTests.cpp index d640469f236..0f60f33237e 100644 --- a/src/host/ut_host/ConsoleArgumentsTests.cpp +++ b/src/host/ut_host/ConsoleArgumentsTests.cpp @@ -1144,143 +1144,3 @@ void ConsoleArgumentsTests::SignalHandleTests() false), // runAsComServer false); // successful parse? } - -void ConsoleArgumentsTests::FeatureArgTests() -{ - // Just some assorted positive values that could be valid handles. No specific correlation to anything. - auto hInSample = UlongToHandle(0x10); - auto hOutSample = UlongToHandle(0x24); - - std::wstring commandline; - - commandline = L"conhost.exe --feature pty"; - ArgTestsRunner(L"#1 Normal case, pass a supported feature", - commandline, - hInSample, - hOutSample, - ConsoleArguments(commandline, - L"", - hInSample, - hOutSample, - L"", // vtMode - 0, // width - 0, // height - false, // forceV1 - false, // forceNoHandoff - false, // headless - true, // createServerHandle - 0, // serverHandle - 0, // signalHandle - false, // inheritCursor - false), // runAsComServer - true); // successful parse? - commandline = L"conhost.exe --feature tty"; - ArgTestsRunner(L"#2 Error case, pass an unsupported feature", - commandline, - hInSample, - hOutSample, - ConsoleArguments(commandline, - L"", - hInSample, - hOutSample, - L"", // vtMode - 0, // width - 0, // height - false, // forceV1 - false, // forceNoHandoff - false, // headless - true, // createServerHandle - 0, // serverHandle - 0, // signalHandle - false, // inheritCursor - false), // runAsComServer - false); // successful parse? - - commandline = L"conhost.exe --feature pty --feature pty"; - ArgTestsRunner(L"#3 Many supported features", - commandline, - hInSample, - hOutSample, - ConsoleArguments(commandline, - L"", - hInSample, - hOutSample, - L"", // vtMode - 0, // width - 0, // height - false, // forceV1 - false, // forceNoHandoff - false, // headless - true, // createServerHandle - 0, // serverHandle - 0, // signalHandle - false, // inheritCursor - false), // runAsComServer - true); // successful parse? - - commandline = L"conhost.exe --feature pty --feature tty"; - ArgTestsRunner(L"#4 At least one unsupported feature", - commandline, - hInSample, - hOutSample, - ConsoleArguments(commandline, - L"", - hInSample, - hOutSample, - L"", // vtMode - 0, // width - 0, // height - false, // forceV1 - false, // forceNoHandoff - false, // headless - true, // createServerHandle - 0, // serverHandle - 0, // signalHandle - false, // inheritCursor - false), // runAsComServer - false); // successful parse? - - commandline = L"conhost.exe --feature pty --feature"; - ArgTestsRunner(L"#5 no value to the feature flag", - commandline, - hInSample, - hOutSample, - ConsoleArguments(commandline, - L"", - hInSample, - hOutSample, - L"", // vtMode - 0, // width - 0, // height - false, // forceV1 - false, // forceNoHandoff - false, // headless - true, // createServerHandle - 0, // serverHandle - 0, // signalHandle - false, // inheritCursor - false), // runAsComServer - false); // successful parse? - - commandline = L"conhost.exe --feature pty --feature --signal foo"; - ArgTestsRunner(L"#6 a invalid feature value that is otherwise a valid arg", - commandline, - hInSample, - hOutSample, - ConsoleArguments(commandline, - L"", - hInSample, - hOutSample, - L"", // vtMode - 0, // width - 0, // height - false, // forceV1 - false, // forceNoHandoff - false, // headless - true, // createServerHandle - 0, // serverHandle - 0, // signalHandle - false, // inheritCursor - false), // runAsComServer - false); // successful parse? -} diff --git a/src/inc/conpty-static.h b/src/inc/conpty-static.h index 1dd9a4123e2..d05ab56e8d3 100644 --- a/src/inc/conpty-static.h +++ b/src/inc/conpty-static.h @@ -23,7 +23,19 @@ #endif #endif -#define PSEUDOCONSOLE_RESIZE_QUIRK (2u) +// CreatePseudoConsole Flags +#ifndef PSEUDOCONSOLE_INHERIT_CURSOR +#define PSEUDOCONSOLE_INHERIT_CURSOR (0x1) +#endif +#ifndef PSEUDOCONSOLE_RESIZE_QUIRK +#define PSEUDOCONSOLE_RESIZE_QUIRK (0x2) +#endif +#ifndef PSEUDOCONSOLE_GLYPH_WIDTH__MASK +#define PSEUDOCONSOLE_GLYPH_WIDTH__MASK 0x18 +#define PSEUDOCONSOLE_GLYPH_WIDTH_GRAPHEMES 0x08 +#define PSEUDOCONSOLE_GLYPH_WIDTH_WCSWIDTH 0x10 +#define PSEUDOCONSOLE_GLYPH_WIDTH_CONSOLE 0x18 +#endif CONPTY_EXPORT HRESULT WINAPI ConptyCreatePseudoConsole(COORD size, HANDLE hInput, HANDLE hOutput, DWORD dwFlags, HPCON* phPC); CONPTY_EXPORT HRESULT WINAPI ConptyCreatePseudoConsoleAsUser(HANDLE hToken, COORD size, HANDLE hInput, HANDLE hOutput, DWORD dwFlags, HPCON* phPC); diff --git a/src/propslib/RegistrySerialization.cpp b/src/propslib/RegistrySerialization.cpp index a350700c66f..8f737981a2c 100644 --- a/src/propslib/RegistrySerialization.cpp +++ b/src/propslib/RegistrySerialization.cpp @@ -62,6 +62,7 @@ const RegistrySerialization::_RegPropertyMap RegistrySerialization::s_PropertyMa { _RegPropertyType::Boolean, CONSOLE_REGISTRY_TERMINALSCROLLING, SET_FIELD_AND_SIZE(_TerminalScrolling) }, { _RegPropertyType::Boolean, CONSOLE_REGISTRY_USEDX, SET_FIELD_AND_SIZE(_fUseDx) }, { _RegPropertyType::Boolean, CONSOLE_REGISTRY_COPYCOLOR, SET_FIELD_AND_SIZE(_fCopyColor) }, + { _RegPropertyType::Dword, L"TextMeasurement", SET_FIELD_AND_SIZE(_textMeasurement) }, #if TIL_FEATURE_CONHOSTATLASENGINE_ENABLED { _RegPropertyType::Boolean, L"EnableBuiltinGlyphs", SET_FIELD_AND_SIZE(_fEnableBuiltinGlyphs) }, #endif diff --git a/src/terminal/adapter/DispatchTypes.hpp b/src/terminal/adapter/DispatchTypes.hpp index a0a74d8c83a..46b8a316d39 100644 --- a/src/terminal/adapter/DispatchTypes.hpp +++ b/src/terminal/adapter/DispatchTypes.hpp @@ -545,9 +545,19 @@ namespace Microsoft::Console::VirtualTerminal::DispatchTypes ALTERNATE_SCROLL = DECPrivateMode(1007), ASB_AlternateScreenBuffer = DECPrivateMode(1049), XTERM_BracketedPasteMode = DECPrivateMode(2004), + GCM_GraphemeClusterMode = DECPrivateMode(2027), W32IM_Win32InputMode = DECPrivateMode(9001), }; + enum ModeResponses : VTInt + { + DECRPM_Unsupported = 0, + DECRPM_Enabled = 1, + DECRPM_Disabled = 2, + DECRPM_PermanentlyEnabled = 3, + DECRPM_PermanentlyDisabled = 4, + }; + enum CharacterSets : uint64_t { DecSpecialGraphics = VTID("0"), diff --git a/src/terminal/adapter/adaptDispatch.cpp b/src/terminal/adapter/adaptDispatch.cpp index 4135cac88d6..5da19619d6d 100644 --- a/src/terminal/adapter/adaptDispatch.cpp +++ b/src/terminal/adapter/adaptDispatch.cpp @@ -4,10 +4,11 @@ #include "precomp.h" #include "adaptDispatch.hpp" +#include "../../inc/unicode.hpp" #include "../../renderer/base/renderer.hpp" -#include "../../types/inc/Viewport.hpp" +#include "../../types/inc/CodepointWidthDetector.hpp" #include "../../types/inc/utils.hpp" -#include "../../inc/unicode.hpp" +#include "../../types/inc/Viewport.hpp" #include "../parser/ascii.hpp" using namespace Microsoft::Console::Types; @@ -2012,6 +2013,8 @@ bool AdaptDispatch::_ModeParamsHelper(const DispatchTypes::ModeParams param, con case DispatchTypes::ModeParams::XTERM_BracketedPasteMode: _api.SetSystemMode(ITerminalApi::Mode::BracketedPaste, enable); return !_api.IsConsolePty(); + case DispatchTypes::ModeParams::GCM_GraphemeClusterMode: + return true; case DispatchTypes::ModeParams::W32IM_Win32InputMode: _terminalInput.SetInputMode(TerminalInput::Mode::Win32, enable); // ConPTY requests the Win32InputMode on startup and disables it on shutdown. When nesting ConPTY inside @@ -2058,116 +2061,126 @@ bool AdaptDispatch::ResetMode(const DispatchTypes::ModeParams param) // - True if handled successfully. False otherwise. bool AdaptDispatch::RequestMode(const DispatchTypes::ModeParams param) { - auto enabled = std::optional{}; + static constexpr auto mapTempBoolState = [](bool enabled) { return enabled ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; }; + static constexpr auto mapPermBoolState = [](bool enabled) { return enabled ? DispatchTypes::DECRPM_PermanentlyEnabled : DispatchTypes::DECRPM_PermanentlyDisabled; }; + auto state = DispatchTypes::DECRPM_Unsupported; switch (param) { case DispatchTypes::ModeParams::IRM_InsertReplaceMode: - enabled = _modes.test(Mode::InsertReplace); + state = mapTempBoolState(_modes.test(Mode::InsertReplace)); break; case DispatchTypes::ModeParams::LNM_LineFeedNewLineMode: // VT apps expect that the system and input modes are the same, so if // they become out of sync, we just act as if LNM mode isn't supported. if (_api.GetSystemMode(ITerminalApi::Mode::LineFeed) == _terminalInput.GetInputMode(TerminalInput::Mode::LineFeed)) { - enabled = _terminalInput.GetInputMode(TerminalInput::Mode::LineFeed); + state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::LineFeed)); } break; case DispatchTypes::ModeParams::DECCKM_CursorKeysMode: - enabled = _terminalInput.GetInputMode(TerminalInput::Mode::CursorKey); + state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::CursorKey)); break; case DispatchTypes::ModeParams::DECANM_AnsiMode: - enabled = _api.GetStateMachine().GetParserMode(StateMachine::Mode::Ansi); + state = mapTempBoolState(_api.GetStateMachine().GetParserMode(StateMachine::Mode::Ansi)); break; case DispatchTypes::ModeParams::DECCOLM_SetNumberOfColumns: // DECCOLM is not supported in conpty mode if (!_api.IsConsolePty()) { - enabled = _modes.test(Mode::Column); + state = mapTempBoolState(_modes.test(Mode::Column)); } break; case DispatchTypes::ModeParams::DECSCNM_ScreenMode: - enabled = _renderSettings.GetRenderMode(RenderSettings::Mode::ScreenReversed); + state = mapTempBoolState(_renderSettings.GetRenderMode(RenderSettings::Mode::ScreenReversed)); break; case DispatchTypes::ModeParams::DECOM_OriginMode: - enabled = _modes.test(Mode::Origin); + state = mapTempBoolState(_modes.test(Mode::Origin)); break; case DispatchTypes::ModeParams::DECAWM_AutoWrapMode: - enabled = _api.GetSystemMode(ITerminalApi::Mode::AutoWrap); + state = mapTempBoolState(_api.GetSystemMode(ITerminalApi::Mode::AutoWrap)); break; case DispatchTypes::ModeParams::DECARM_AutoRepeatMode: - enabled = _terminalInput.GetInputMode(TerminalInput::Mode::AutoRepeat); + state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::AutoRepeat)); break; case DispatchTypes::ModeParams::ATT610_StartCursorBlink: - enabled = _pages.ActivePage().Cursor().IsBlinkingAllowed(); + state = mapTempBoolState(_pages.ActivePage().Cursor().IsBlinkingAllowed()); break; case DispatchTypes::ModeParams::DECTCEM_TextCursorEnableMode: - enabled = _pages.ActivePage().Cursor().IsVisible(); + state = mapTempBoolState(_pages.ActivePage().Cursor().IsVisible()); break; case DispatchTypes::ModeParams::XTERM_EnableDECCOLMSupport: // DECCOLM is not supported in conpty mode if (!_api.IsConsolePty()) { - enabled = _modes.test(Mode::AllowDECCOLM); + state = mapTempBoolState(_modes.test(Mode::AllowDECCOLM)); } break; case DispatchTypes::ModeParams::DECPCCM_PageCursorCouplingMode: - enabled = _modes.test(Mode::PageCursorCoupling); + state = mapTempBoolState(_modes.test(Mode::PageCursorCoupling)); break; case DispatchTypes::ModeParams::DECNKM_NumericKeypadMode: - enabled = _terminalInput.GetInputMode(TerminalInput::Mode::Keypad); + state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::Keypad)); break; case DispatchTypes::ModeParams::DECBKM_BackarrowKeyMode: - enabled = _terminalInput.GetInputMode(TerminalInput::Mode::BackarrowKey); + state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::BackarrowKey)); break; case DispatchTypes::ModeParams::DECLRMM_LeftRightMarginMode: - enabled = _modes.test(Mode::AllowDECSLRM); + state = mapTempBoolState(_modes.test(Mode::AllowDECSLRM)); break; case DispatchTypes::ModeParams::DECECM_EraseColorMode: - enabled = _modes.test(Mode::EraseColor); + state = mapTempBoolState(_modes.test(Mode::EraseColor)); break; case DispatchTypes::ModeParams::VT200_MOUSE_MODE: - enabled = _terminalInput.GetInputMode(TerminalInput::Mode::DefaultMouseTracking); + state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::DefaultMouseTracking)); break; case DispatchTypes::ModeParams::BUTTON_EVENT_MOUSE_MODE: - enabled = _terminalInput.GetInputMode(TerminalInput::Mode::ButtonEventMouseTracking); + state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::ButtonEventMouseTracking)); break; case DispatchTypes::ModeParams::ANY_EVENT_MOUSE_MODE: - enabled = _terminalInput.GetInputMode(TerminalInput::Mode::AnyEventMouseTracking); + state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::AnyEventMouseTracking)); break; case DispatchTypes::ModeParams::UTF8_EXTENDED_MODE: - enabled = _terminalInput.GetInputMode(TerminalInput::Mode::Utf8MouseEncoding); + state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::Utf8MouseEncoding)); break; case DispatchTypes::ModeParams::SGR_EXTENDED_MODE: - enabled = _terminalInput.GetInputMode(TerminalInput::Mode::SgrMouseEncoding); + state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::SgrMouseEncoding)); break; case DispatchTypes::ModeParams::FOCUS_EVENT_MODE: - enabled = _terminalInput.GetInputMode(TerminalInput::Mode::FocusEvent); + state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::FocusEvent)); break; case DispatchTypes::ModeParams::ALTERNATE_SCROLL: - enabled = _terminalInput.GetInputMode(TerminalInput::Mode::AlternateScroll); + state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::AlternateScroll)); break; case DispatchTypes::ModeParams::ASB_AlternateScreenBuffer: - enabled = _usingAltBuffer; + state = mapTempBoolState(_usingAltBuffer); break; case DispatchTypes::ModeParams::XTERM_BracketedPasteMode: - enabled = _api.GetSystemMode(ITerminalApi::Mode::BracketedPaste); + state = mapTempBoolState(_api.GetSystemMode(ITerminalApi::Mode::BracketedPaste)); + break; + case DispatchTypes::ModeParams::GCM_GraphemeClusterMode: + { + const auto mode = CodepointWidthDetector::Singleton().GetMode(); + state = mapPermBoolState(mode == TextMeasurementMode::Graphemes); break; + } case DispatchTypes::ModeParams::W32IM_Win32InputMode: - enabled = _terminalInput.GetInputMode(TerminalInput::Mode::Win32); + state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::Win32)); break; default: - enabled = std::nullopt; break; } - // 1 indicates the mode is enabled, 2 it's disabled, and 0 it's unsupported - const auto state = enabled.has_value() ? (enabled.value() ? 1 : 2) : 0; - const auto isPrivate = param >= DispatchTypes::DECPrivateMode(0); - const auto prefix = isPrivate ? L"?" : L""; - const auto mode = isPrivate ? param - DispatchTypes::DECPrivateMode(0) : param; - const auto response = wil::str_printf(L"\x1b[%s%d;%d$y", prefix, mode, state); - _api.ReturnResponse(response); + VTInt mode = param; + std::wstring_view prefix; + + if (mode >= DispatchTypes::DECPrivateMode(0)) + { + mode -= DispatchTypes::DECPrivateMode(0); + prefix = L"?"; + } + + _api.ReturnResponse(fmt::format(FMT_COMPILE(L"\x1b[{}{};{}$y"), prefix, mode, state)); return true; } diff --git a/src/tools/GraphemeTableGen/GraphemeTableGen.csproj b/src/tools/GraphemeTableGen/GraphemeTableGen.csproj new file mode 100644 index 00000000000..6f4623a81bd --- /dev/null +++ b/src/tools/GraphemeTableGen/GraphemeTableGen.csproj @@ -0,0 +1,10 @@ + + + + Exe + net8.0 + enable + enable + + + diff --git a/src/tools/GraphemeTableGen/Program.cs b/src/tools/GraphemeTableGen/Program.cs new file mode 100644 index 00000000000..fd838bc8e85 --- /dev/null +++ b/src/tools/GraphemeTableGen/Program.cs @@ -0,0 +1,579 @@ +using System.Text; +using System.Runtime.InteropServices; +using System.Numerics; +using System.Xml.Linq; +using TrieType = uint; + +// UAX #29 uses "A ÷ B" to indicate that there's a potential break opportunity between A and B. +// But ÷ is not a valid identifier in Go, so we use Ω which is. +const byte Ω = 0b11; + +// JoinRules doesn't quite follow UAX #29, as it states: +// > Note: Testing two adjacent characters is insufficient for determining a boundary. +// +// I completely agree, however it makes the implementation complex and slow, and it only benefits what can be considered +// edge cases in the context of terminals. By using a lookup table anyway this results in a >100MB/s throughput, +// before adding any fast-passes whatsoever. This is 2x as fast as any standards conforming implementation I found. +// +// This affects the following rules: +// * GB9c: \p{InCB=Consonant} [\p{InCB=Extend}\p{InCB=Linker}]* \p{InCB=Linker} [\p{InCB=Extend}\p{InCB=Linker}]* × \p{InCB=Consonant} +// "Do not break within certain combinations with Indic_Conjunct_Break (InCB)=Linker." +// Our implementation does this: +// × \p{InCB=Linker} +// \p{InCB=Linker} × \p{InCB=Consonant} +// In other words, it doesn't check for a leading \p{InCB=Consonant} or a series of Extenders/Linkers in between. +// I suspect that these simplified rules are sufficient for the vast majority of terminal use cases. +// * GB11: \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic} +// "Do not break within emoji modifier sequences or emoji zwj sequences." +// Our implementation does this: +// ZWJ × \p{Extended_Pictographic} +// In other words, it doesn't check whether the ZWJ is led by another \p{InCB=Extended_Pictographic}. +// Again, I suspect that a trailing, standalone ZWJ is a rare occurrence and joining it with any Emoji is fine. +// * GB12: sot (RI RI)* RI × RI +// GB13: [^RI] (RI RI)* RI × RI +// "Do not break within emoji flag sequences. That is, do not break between regional indicator +// (RI) symbols if there is an odd number of RI characters before the break point." +// Our implementation does this (this is not a real notation): +// RI ÷ RI × RI ÷ RI +// In other words, it joins any pair of RIs and then immediately aborts further RI joins. +// Unlike the above two cases, this is a bit more risky, because it's much more likely to be encountered in practice. +// Imagine a shell that doesn't understand graphemes for instance. You type 2 flags (= 4 RIs) and backspace. +// You'll now have 3 RIs. If iterating through it forwards, you'd join the first two, then get 1 lone RI at the end, +// whereas if you iterate backwards you'd join the last two, then get 1 lone RI at the start. +// This asymmetry may have some subtle effects, but I suspect that it's still rare enough to not matter much. +// +// This is a great reference for the resulting table: +// https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.html +byte[][][] joinRules = +[ + // Base table + [ + /* | leading -> trailing codepoint */ + /* v | cbOther | cbControl | cbExtend | cbRI | cbPrepend | cbHangulL | cbHangulV | cbHangulT | cbHangulLV | cbHangulLVT | cbInCBLinker | cbInCBConsonant | cbExtPic | cbZWJ | */ + /* cbOther | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbControl | */ [Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */], + /* cbExtend | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbRI | */ [Ω /* | */, Ω /* | */, 0 /* | */, 1 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbPrepend | */ [0 /* | */, Ω /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */], + /* cbHangulL | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, 0 /* | */, Ω /* | */, 0 /* | */, 0 /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbHangulV | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbHangulT | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbHangulLV | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbHangulLVT | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbInCBLinker | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, 0 /* | */, Ω /* | */, 0 /* | */], + /* cbInCBConsonant | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbExtPic | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbZWJ | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, 0 /* | */, 0 /* | */], + ], + // Once we have encountered a Regional Indicator pair we'll enter this table. + // It's a copy of the base table, but further Regional Indicator joins are forbidden. + [ + /* | leading -> trailing codepoint */ + /* v | cbOther | cbControl | cbExtend | cbRI | cbPrepend | cbHangulL | cbHangulV | cbHangulT | cbHangulLV | cbHangulLVT | cbInCBLinker | cbInCBConsonant | cbExtPic | cbZWJ | */ + /* cbOther | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbControl | */ [Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */], + /* cbExtend | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbRI | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbPrepend | */ [0 /* | */, Ω /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */], + /* cbHangulL | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, 0 /* | */, Ω /* | */, 0 /* | */, 0 /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbHangulV | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbHangulT | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbHangulLV | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbHangulLVT | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbInCBLinker | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, 0 /* | */, Ω /* | */, 0 /* | */], + /* cbInCBConsonant | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbExtPic | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], + /* cbZWJ | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, 0 /* | */, 0 /* | */], + ] +]; + +if (args.Length != 1) +{ + Console.WriteLine( + """ + Usage: GraphemeTableGen + + You can download the latest ucd.nounihan.grouped.xml from: + https://www.unicode.org/Public/UCD/latest/ucdxml/ucd.nounihan.grouped.zip + """ + ); + Environment.Exit(1); +} + +var ucd = ExtractValuesFromUcd(args[0]); + +// Find the best trie configuration over the given block sizes (2^2 - 2^8) and stages (4). +// More stages = Less size. The trajectory roughly follows a+b*c^stages, where c < 1. +// 4 still gives ~30% savings over 3 stages and going beyond 5 gives diminishing returns (<10%). +var trie = BuildBestTrie(ucd.Values, 2, 8, 4); +// The joinRules above has 2 bits per value. This packs it into 32-bit integers to save space. +var rules = PrepareRulesTable(joinRules); +// Each rules item has the same length. Each item is 32 bits = 4 bytes. +var totalSize = trie.TotalSize + rules.Length * rules[0].Length * sizeof(TrieType); + +// Run a quick sanity check to ensure that the trie works as expected. +foreach (var (expected, cp) in ucd.Values.Select((v, i) => (v, i))) +{ + TrieType v = 0; + foreach (var s in trie.Stages) + { + v = s.Values[(int)v + ((cp >> s.Shift) & s.Mask)]; + } + + if (v != expected) + { + throw new Exception($"trie sanity check failed for {cp:X}"); + } +} + +// All the remaining code starting here simply generates the C++ output. +var buf = new StringBuilder(); +buf.Append("// Generated by GraphemeTableGen\n"); +buf.Append($"// on {DateTime.UtcNow.ToString("yyyy'-'MM'-'dd'T'HH':'mm':'ssK")}, from {ucd.Description}, {totalSize} bytes\n"); +buf.Append("// clang-format off\n"); + +foreach (var stage in trie.Stages) +{ + var fmt = $" 0x{{0:x{stage.Bits / 4}}},"; + var width = 16; + if (stage.Index != 0) + { + width = stage.Mask + 1; + } + + buf.Append($"static constexpr uint{stage.Bits}_t s_stage{stage.Index}[] = {{"); + foreach (var (value, j) in stage.Values.Select((v, j) => (v, j))) + { + if (j % width == 0) + { + buf.Append("\n "); + } + buf.AppendFormat(fmt, value); + } + buf.Append("\n};\n"); +} + +buf.Append($"static constexpr uint32_t s_joinRules[{rules.Length}][{rules[0].Length}] = {{\n"); +foreach (var table in rules) +{ + buf.Append(" {\n"); + foreach (var r in table) + { + buf.Append($" 0b{r:b32},\n"); + } + buf.Append(" },\n"); +} +buf.Append("};\n"); + +buf.Append($"constexpr uint{trie.Stages[^1].Bits}_t ucdLookup(const char32_t cp) noexcept\n"); +buf.Append("{\n"); +foreach (var stage in trie.Stages) +{ + buf.Append($" const auto s{stage.Index} = s_stage{stage.Index}["); + if (stage.Index == 0) + { + buf.Append($"cp >> {stage.Shift}"); + } + else + { + buf.Append($"s{stage.Index - 1} + ((cp >> {stage.Shift}) & {stage.Mask})"); + } + + buf.Append("];\n"); +} +buf.Append($" return s{trie.Stages.Count - 1};\n"); +buf.Append("}\n"); + +buf.Append("constexpr uint8_t ucdGraphemeJoins(const uint8_t state, const uint8_t lead, const uint8_t trail) noexcept\n"); +buf.Append("{\n"); +buf.Append(" const auto l = lead & 15;\n"); +buf.Append(" const auto t = trail & 15;\n"); +buf.Append($" return (s_joinRules[state][l] >> (t * {BitOperations.PopCount(Ω)})) & {Ω};\n"); +buf.Append("}\n"); +buf.Append("constexpr bool ucdGraphemeDone(const uint8_t state) noexcept\n"); +buf.Append("{\n"); +buf.Append($" return state == {Ω};\n"); +buf.Append("}\n"); +buf.Append("constexpr uint8_t ucdToCharacterWidth(const uint8_t val) noexcept\n"); +buf.Append("{\n"); +buf.Append(" return val >> 6;\n"); +buf.Append("}\n"); +buf.Append("// clang-format on\n"); + +Console.Write(buf); + +// This reads the given ucd.nounihan.grouped.xml file and extracts the +// CharacterWidth and ClusterBreak properties for all codepoints. +static Ucd ExtractValuesFromUcd(string path) +{ + var values = new TrieType[1114112]; + Array.Fill(values, TrieValue(ClusterBreak.Other, CharacterWidth.Narrow)); + + XNamespace ns = "http://www.unicode.org/ns/2003/ucd/1.0"; + var doc = XDocument.Load(path); + var root = doc.Root!; + var description = root.Element(ns + "description")!.Value; + + foreach (var group in doc.Root!.Descendants(ns + "group")) + { + var groupGeneralCategory = group.Attribute("gc")?.Value; + var groupGraphemeClusterBreak = group.Attribute("GCB")?.Value; + var groupIndicConjunctBreak = group.Attribute("InCB")?.Value; + var groupExtendedPictographic = group.Attribute("ExtPict")?.Value; + var groupEastAsian = group.Attribute("ea")?.Value; + + foreach (var ch in group.Elements()) + { + int firstCp; + int lastCp; + if (ch.Attribute("cp") is { } val) + { + var cp = Convert.ToInt32(val.Value, 16); + firstCp = cp; + lastCp = cp; + } + else + { + firstCp = Convert.ToInt32(ch.Attribute("first-cp")!.Value, 16); + lastCp = Convert.ToInt32(ch.Attribute("last-cp")!.Value, 16); + } + + var generalCategory = ch.Attribute("gc")?.Value ?? groupGeneralCategory ?? ""; + var graphemeClusterBreak = ch.Attribute("GCB")?.Value ?? groupGraphemeClusterBreak ?? ""; + var indicConjunctBreak = ch.Attribute("InCB")?.Value ?? groupIndicConjunctBreak ?? ""; + var extendedPictographic = ch.Attribute("ExtPict")?.Value ?? groupExtendedPictographic ?? ""; + var eastAsian = ch.Attribute("ea")?.Value ?? groupEastAsian ?? ""; + + var cb = graphemeClusterBreak switch + { + "XX" => ClusterBreak.Other, // Anything else + // We ignore GB3 which demands that CR × LF do not break apart, because + // * these control characters won't normally reach our text storage + // * otherwise we're in a raw write mode and historically conhost stores them in separate cells + "CR" or "LF" or "CN" => ClusterBreak.Control, // Carriage Return, Line Feed, Control + "EX" or "SM" => ClusterBreak.Extend, // Extend, SpacingMark + "PP" => ClusterBreak.Prepend, // Prepend + "ZWJ" => ClusterBreak.ZWJ, // Zero Width Joiner + "RI" => ClusterBreak.RI, // Regional Indicator + "L" => ClusterBreak.HangulL, // Hangul Syllable Type L + "V" => ClusterBreak.HangulV, // Hangul Syllable Type V + "T" => ClusterBreak.HangulT, // Hangul Syllable Type T + "LV" => ClusterBreak.HangulLV, // Hangul Syllable Type LV + "LVT" => ClusterBreak.HangulLVT, // Hangul Syllable Type LVT + _ => throw new Exception($"Unrecognized GCB {graphemeClusterBreak} for {firstCp} to {lastCp}") + }; + + if (extendedPictographic == "Y") + { + // Currently every single Extended_Pictographic codepoint happens to be GCB=XX. + // This is fantastic for us because it means we can stuff it into the ClusterBreak enum + // and treat it as an alias of EXTEND, but with the special GB11 properties. + if (cb != ClusterBreak.Other) + { + throw new Exception( + $"Unexpected GCB {graphemeClusterBreak} with ExtPict=Y for {firstCp} to {lastCp}"); + } + + cb = ClusterBreak.ExtPic; + } + + cb = indicConjunctBreak switch + { + "None" or "Extend" => cb, + "Linker" => ClusterBreak.InCBLinker, + "Consonant" => ClusterBreak.InCBConsonant, + _ => throw new Exception($"Unrecognized InCB {indicConjunctBreak} for {firstCp} to {lastCp}") + }; + + var width = eastAsian switch + { + "N" or "Na" or "H" => CharacterWidth.Narrow, // Half-width, Narrow, Neutral + "F" or "W" => CharacterWidth.Wide, // Wide, Full-width + "A" => CharacterWidth.Ambiguous, // Ambiguous + _ => throw new Exception($"Unrecognized ea {eastAsian} for {firstCp} to {lastCp}") + }; + + // There's no "ea" attribute for "zero width" so we need to do that ourselves. This matches: + // Mc: Mark, spacing combining + // Me: Mark, enclosing + // Mn: Mark, non-spacing + // Cf: Control, format + if (generalCategory.StartsWith("M") || generalCategory == "Cf") + { + width = CharacterWidth.ZeroWidth; + } + + Fill(firstCp, lastCp, TrieValue(cb, width)); + } + } + + // Box-drawing and block elements are ambiguous according to their EastAsian attribute, + // but by convention terminals always consider them to be narrow. + Fill(0x2500, 0x259F, TrieValue(ClusterBreak.Other, CharacterWidth.Narrow)); + + return new Ucd + { + Description = description, + Values = values.ToList(), + }; + + void Fill(int first, int last, TrieType value) + { + Array.Fill(values, value, first, last - first + 1); + } +} + +// Packs the arguments into a single integer that's stored as-is in the final trie stage. +static TrieType TrieValue(ClusterBreak cb, CharacterWidth width) +{ + return (TrieType)((byte)(cb) | (byte)(width) << 6); +} + +// Because each item in the list of 2D rule tables only uses 2 bits and not all 8 in each byte, +// this function packs them into chunks of 32-bit integers to save space. +static uint[][] PrepareRulesTable(byte[][][] rules) +{ + var compressed = new uint[rules.Length][]; + for (var i = 0; i < compressed.Length; i++) + { + compressed[i] = new uint[16]; + } + + foreach (var (table, prevIndex) in rules.Select((v, i) => (v, i))) + { + foreach (var (row, lead) in table.Select((v, i) => (v, i))) + { + if (table[lead].Length > 16) + { + throw new Exception("Can't pack row into 32 bits"); + } + + uint nextIndices = 0; + foreach (var (nextIndex, trail) in row.Select((v, i) => (v, i))) + { + if (nextIndex > Ω) + { + throw new Exception("Can't pack table index into 2 bits"); + } + + nextIndices |= (uint)(nextIndex << (trail * 2)); + } + + compressed[prevIndex][lead] = nextIndices; + } + } + + return compressed; +} + +// This tries all possible trie configurations and returns the one with the smallest size. It's brute force. +static Trie BuildBestTrie(List uncompressed, int minShift, int maxShift, int stages) +{ + var depth = stages - 1; + var delta = maxShift - minShift + 1; + var total = 1; + for (var i = 0; i < depth; i++) + { + total *= delta; + } + + var tasks = new int[total][]; + for (var i = 0; i < total; i++) + { + // Given minShift=2, maxShift=3, depth=3 this generates + // [2 2 2] + // [3 2 2] + // [2 3 2] + // [3 3 2] + // [2 2 3] + // [3 2 3] + // [2 3 3] + // [3 3 3] + var shifts = new int[depth]; + for (int j = 0, index = i; j < depth; j++, index /= delta) + { + shifts[j] = minShift + index % delta; + } + + tasks[i] = shifts; + } + + return tasks.AsParallel().Select(shifts => BuildTrie(uncompressed, shifts)).MinBy(t => t.TotalSize)!; +} + +// Compresses the given uncompressed data into a multi-level trie with shifts.Count+1 stages. +// shifts defines the power-of-two sizes of the deduplicated chunks in each stage. +// The final output receives no deduplication which is why this returns shifts.Count+1 stages. +static Trie BuildTrie(List uncompressed, Span shifts) +{ + var cumulativeShift = 0; + var stages = new List(); + + for (int i = 0; i < shifts.Length; i++) + { + var shift = shifts[i]; + var chunkSize = 1 << shift; + var cache = new Dictionary(); + var compressed = new List(); + var offsets = new List(); + + for (var off = 0; off < uncompressed.Count; off += chunkSize) + { + var key = new ReadOnlyTrieTypeSpan(uncompressed, off, Math.Min(chunkSize, uncompressed.Count - off)); + + // Cast the integer slice to a string so that it can be hashed. + + if (!cache.TryGetValue(key, out var offset)) + { + // For a 4-stage trie searching for existing occurrences of chunk in compressed yields a ~10% + // compression improvement. Checking for overlaps with the tail end of compressed yields another ~15%. + // FYI I tried to shuffle the order of compressed chunks but found that this has a negligible impact. + var haystack = CollectionsMarshal.AsSpan(compressed); + var needle = key.AsSpan(); + var existing = FindExisting(haystack, needle); + if (existing >= 0) + { + offset = (TrieType)existing; + cache[key] = offset; + } + else + { + var overlap = MeasureOverlap(CollectionsMarshal.AsSpan(compressed), needle); + compressed.AddRange(needle[overlap..]); + offset = (TrieType)(compressed.Count - needle.Length); + cache[key] = offset; + } + } + + offsets.Add(offset); + } + + stages.Add(new Stage + { + Values = compressed, + Index = shifts.Length - i, + Shift = cumulativeShift, + Mask = chunkSize - 1, + Bits = 0, + }); + + uncompressed = offsets; + cumulativeShift += shift; + } + + stages.Add(new Stage + { + Values = uncompressed, + Index = 0, + Shift = cumulativeShift, + Mask = int.MaxValue, + Bits = 0, + }); + + stages.Reverse(); + + foreach (var s in stages) + { + var m = s.Values.Max(); + s.Bits = m switch + { + <= 0xff => 8, + <= 0xffff => 16, + _ => 32 + }; + } + + return new Trie + { + Stages = stages, + TotalSize = stages.Sum(s => (s.Bits / 8) * s.Values.Count) + }; +} + +// Finds needle in haystack. Returns -1 if it couldn't be found. +static int FindExisting(ReadOnlySpan haystack, ReadOnlySpan needle) +{ + var idx = haystack.IndexOf(needle); + return idx; +} + +// Given two slices, this returns the amount by which `prev`s end overlaps with `next`s start. +// That is, given [0,1,2,3,4] and [2,3,4,5] this returns 3 because [2,3,4] is the "overlap". +static int MeasureOverlap(ReadOnlySpan prev, ReadOnlySpan next) +{ + for (var overlap = Math.Min(prev.Length, next.Length); overlap >= 0; overlap--) + { + if (prev[^overlap..].SequenceEqual(next[..overlap])) + { + return overlap; + } + } + + return 0; +} + +enum CharacterWidth +{ + ZeroWidth, + Narrow, + Wide, + Ambiguous +} + +enum ClusterBreak +{ + Other, // GB999 + Control, // GB3, GB4, GB5 -- includes CR, LF + Extend, // GB9, GB9a -- includes SpacingMark + RI, // GB12, GB13 + Prepend, // GB9b + HangulL, // GB6, GB7, GB8 + HangulV, // GB6, GB7, GB8 + HangulT, // GB6, GB7, GB8 + HangulLV, // GB6, GB7, GB8 + HangulLVT, // GB6, GB7, GB8 + InCBLinker, // GB9c + InCBConsonant, // GB9c + ExtPic, // GB11 + ZWJ, // GB9, GB11 +} + +class Ucd +{ + public required string Description; + public required List Values; +} + +class Stage +{ + public required List Values; + public required int Index; + public required int Shift; + public required int Mask; + public required int Bits; +} + +class Trie +{ + public required List Stages; + public required int TotalSize; +} + +// Because you can't put a Span into a Dictionary. +// This works around that by simply keeping a reference to the List around. +struct ReadOnlyTrieTypeSpan(List list, int start, int length) +{ + public ReadOnlySpan AsSpan() => CollectionsMarshal.AsSpan(list).Slice(start, length); + + public override bool Equals(object? obj) + { + return obj is ReadOnlyTrieTypeSpan other && AsSpan().SequenceEqual(other.AsSpan()); + } + + public override int GetHashCode() + { + HashCode hashCode = default; + hashCode.AddBytes(MemoryMarshal.AsBytes(AsSpan())); + return hashCode.ToHashCode(); + } +} diff --git a/src/tools/GraphemeTestTableGen/GraphemeTestTableGen.csproj b/src/tools/GraphemeTestTableGen/GraphemeTestTableGen.csproj new file mode 100644 index 00000000000..6f4623a81bd --- /dev/null +++ b/src/tools/GraphemeTestTableGen/GraphemeTestTableGen.csproj @@ -0,0 +1,10 @@ + + + + Exe + net8.0 + enable + enable + + + diff --git a/src/tools/GraphemeTestTableGen/Program.cs b/src/tools/GraphemeTestTableGen/Program.cs new file mode 100644 index 00000000000..2c45396dddc --- /dev/null +++ b/src/tools/GraphemeTestTableGen/Program.cs @@ -0,0 +1,121 @@ +using System.Text; +using System.Text.RegularExpressions; + +string data; +using (var client = new HttpClient()) +{ + var response = await client.GetAsync("https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt"); + response.EnsureSuccessStatusCode(); + data = await response.Content.ReadAsStringAsync(); +} + +var testString = new StringBuilder(); +var scanner = new StringReader(data); +var firstLine = true; + +while (await scanner.ReadLineAsync() is { } line) +{ + var parts = line.Split('#'); + var test = parts[0].Trim(); + var comment = parts.Length > 1 ? parts[1].Trim() : ""; + + if (firstLine) + { + firstLine = false; + + var re = new Regex(@"^GraphemeBreakTest-(\d+\.\d+\.\d+)\.txt$"); + var m = re.Match(comment); + if (!m.Success) + { + throw new Exception($"Failed to find version number, got: {comment}"); + } + + testString.Append( + $$""" + // Generated by GraphemeTestTableGen + // on {{DateTime.UtcNow.ToString("yyyy'-'MM'-'dd'T'HH':'mm':'ssK")}}, from Unicode {{m.Groups[1].Value}} + struct GraphemeBreakTest + { + const wchar_t* comment; + const wchar_t* graphemes[4]; + }; + static constexpr GraphemeBreakTest s_graphemeBreakTests[] = { + + """ + ); + } + + if (test == "" || comment == "") + { + continue; + } + + var graphemes = test.Split('÷'); + for (var i = 0; i < graphemes.Length; i++) + { + graphemes[i] = graphemes[i].Trim(); + } + + testString.Append($" {{ L\"{comment}\""); + + foreach (var g in graphemes) + { + if (string.IsNullOrEmpty(g)) + { + continue; + } + + testString.Append(", L\""); + + var codepoints = g.Split('×'); + foreach (var c in codepoints) + { + var i = Convert.ToUInt32(c.Trim(), 16); + switch (i) + { + case 0x07: + testString.Append("\\a"); + break; + case 0x08: + testString.Append("\\b"); + break; + case 0x09: + testString.Append("\\t"); + break; + case 0x0A: + testString.Append("\\n"); + break; + case 0x0B: + testString.Append("\\v"); + break; + case 0x0C: + testString.Append("\\f"); + break; + case 0x0D: + testString.Append("\\r"); + break; + case >= 0x20 and <= 0x7e: + testString.Append((char)i); + break; + case <= 0xff: + testString.Append($"\\x{i:X2}"); + break; + case <= 0xffff: + testString.Append($"\\x{i:X4}"); + break; + default: + testString.Append($"\\U{i:X8}"); + break; + } + } + + testString.Append("\""); + } + + testString.Append(" },\n"); +} + +testString.Append("};\n"); + +Console.OutputEncoding = System.Text.Encoding.UTF8; +Console.Write(testString); diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index 8cf96475fcf..ff8aa1df47e 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -4,334 +4,15 @@ #include "precomp.h" #include "inc/CodepointWidthDetector.hpp" -// Due to the Feature_Graphemes::IsEnabled() feature flagging, some code may be disabled. -#pragma warning(disable : 4702) // unreachable code - // I was trying to minimize dependencies in this code so that it's easier to port to other terminal applications. -// Also, it has to be fast / have minimal overhead, since it potentially parses every single input character. +// That's why it doesn't use any of the GSL helpers and makes minimal use of the STL. #pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4). #pragma warning(disable : 26472) // Don't use a static_cast for arithmetic conversions. Use brace initialization, gsl::narrow_cast or gsl::narrow (type.1). -#pragma warning(disable : 26482) // Only index into arrays using constant expressions (bounds.2). -namespace -{ - // used to store range data in CodepointWidthDetector's internal map - struct UnicodeRange final - { - char32_t lowerBound; - char32_t upperBound : 31; - char32_t isAmbiguous : 1; - }; - - static bool operator<(const UnicodeRange& range, const unsigned int searchTerm) noexcept - { - return range.upperBound < searchTerm; - } - - // Generated by Generate-CodepointWidthsFromUCD.ps1 -Pack:True -Full: -NoOverrides:False - // on 2024-03-20 12:57:23Z from Unicode 15.1.0. - // 321154 (0x4E682) codepoints covered. - // 240 (0xF0) codepoints overridden. - // Override path: .\src\types\unicode_width_overrides.xml - static constexpr std::array s_wideAndAmbiguousTable{ - UnicodeRange{ 0xa1, 0xa1, 1 }, - UnicodeRange{ 0xa4, 0xa4, 1 }, - UnicodeRange{ 0xa7, 0xa8, 1 }, - UnicodeRange{ 0xaa, 0xaa, 1 }, - UnicodeRange{ 0xad, 0xae, 1 }, - UnicodeRange{ 0xb0, 0xb4, 1 }, - UnicodeRange{ 0xb6, 0xba, 1 }, - UnicodeRange{ 0xbc, 0xbf, 1 }, - UnicodeRange{ 0xc6, 0xc6, 1 }, - UnicodeRange{ 0xd0, 0xd0, 1 }, - UnicodeRange{ 0xd7, 0xd8, 1 }, - UnicodeRange{ 0xde, 0xe1, 1 }, - UnicodeRange{ 0xe6, 0xe6, 1 }, - UnicodeRange{ 0xe8, 0xea, 1 }, - UnicodeRange{ 0xec, 0xed, 1 }, - UnicodeRange{ 0xf0, 0xf0, 1 }, - UnicodeRange{ 0xf2, 0xf3, 1 }, - UnicodeRange{ 0xf7, 0xfa, 1 }, - UnicodeRange{ 0xfc, 0xfc, 1 }, - UnicodeRange{ 0xfe, 0xfe, 1 }, - UnicodeRange{ 0x101, 0x101, 1 }, - UnicodeRange{ 0x111, 0x111, 1 }, - UnicodeRange{ 0x113, 0x113, 1 }, - UnicodeRange{ 0x11b, 0x11b, 1 }, - UnicodeRange{ 0x126, 0x127, 1 }, - UnicodeRange{ 0x12b, 0x12b, 1 }, - UnicodeRange{ 0x131, 0x133, 1 }, - UnicodeRange{ 0x138, 0x138, 1 }, - UnicodeRange{ 0x13f, 0x142, 1 }, - UnicodeRange{ 0x144, 0x144, 1 }, - UnicodeRange{ 0x148, 0x14b, 1 }, - UnicodeRange{ 0x14d, 0x14d, 1 }, - UnicodeRange{ 0x152, 0x153, 1 }, - UnicodeRange{ 0x166, 0x167, 1 }, - UnicodeRange{ 0x16b, 0x16b, 1 }, - UnicodeRange{ 0x1ce, 0x1ce, 1 }, - UnicodeRange{ 0x1d0, 0x1d0, 1 }, - UnicodeRange{ 0x1d2, 0x1d2, 1 }, - UnicodeRange{ 0x1d4, 0x1d4, 1 }, - UnicodeRange{ 0x1d6, 0x1d6, 1 }, - UnicodeRange{ 0x1d8, 0x1d8, 1 }, - UnicodeRange{ 0x1da, 0x1da, 1 }, - UnicodeRange{ 0x1dc, 0x1dc, 1 }, - UnicodeRange{ 0x251, 0x251, 1 }, - UnicodeRange{ 0x261, 0x261, 1 }, - UnicodeRange{ 0x2c4, 0x2c4, 1 }, - UnicodeRange{ 0x2c7, 0x2c7, 1 }, - UnicodeRange{ 0x2c9, 0x2cb, 1 }, - UnicodeRange{ 0x2cd, 0x2cd, 1 }, - UnicodeRange{ 0x2d0, 0x2d0, 1 }, - UnicodeRange{ 0x2d8, 0x2db, 1 }, - UnicodeRange{ 0x2dd, 0x2dd, 1 }, - UnicodeRange{ 0x2df, 0x2df, 1 }, - UnicodeRange{ 0x300, 0x36f, 1 }, - UnicodeRange{ 0x391, 0x3a1, 1 }, - UnicodeRange{ 0x3a3, 0x3a9, 1 }, - UnicodeRange{ 0x3b1, 0x3c1, 1 }, - UnicodeRange{ 0x3c3, 0x3c9, 1 }, - UnicodeRange{ 0x401, 0x401, 1 }, - UnicodeRange{ 0x410, 0x44f, 1 }, - UnicodeRange{ 0x451, 0x451, 1 }, - UnicodeRange{ 0x1100, 0x115f, 0 }, - UnicodeRange{ 0x2010, 0x2010, 1 }, - UnicodeRange{ 0x2013, 0x2016, 1 }, - UnicodeRange{ 0x2018, 0x2019, 1 }, - UnicodeRange{ 0x201c, 0x201d, 1 }, - UnicodeRange{ 0x2020, 0x2022, 1 }, - UnicodeRange{ 0x2024, 0x2027, 1 }, - UnicodeRange{ 0x2030, 0x2030, 1 }, - UnicodeRange{ 0x2032, 0x2033, 1 }, - UnicodeRange{ 0x2035, 0x2035, 1 }, - UnicodeRange{ 0x203b, 0x203b, 1 }, - UnicodeRange{ 0x203e, 0x203e, 1 }, - UnicodeRange{ 0x2074, 0x2074, 1 }, - UnicodeRange{ 0x207f, 0x207f, 1 }, - UnicodeRange{ 0x2081, 0x2084, 1 }, - UnicodeRange{ 0x20ac, 0x20ac, 1 }, - UnicodeRange{ 0x2103, 0x2103, 1 }, - UnicodeRange{ 0x2105, 0x2105, 1 }, - UnicodeRange{ 0x2109, 0x2109, 1 }, - UnicodeRange{ 0x2113, 0x2113, 1 }, - UnicodeRange{ 0x2116, 0x2116, 1 }, - UnicodeRange{ 0x2121, 0x2122, 1 }, - UnicodeRange{ 0x2126, 0x2126, 1 }, - UnicodeRange{ 0x212b, 0x212b, 1 }, - UnicodeRange{ 0x2153, 0x2154, 1 }, - UnicodeRange{ 0x215b, 0x215e, 1 }, - UnicodeRange{ 0x2160, 0x216b, 1 }, - UnicodeRange{ 0x2170, 0x2179, 1 }, - UnicodeRange{ 0x2189, 0x2189, 1 }, - UnicodeRange{ 0x2190, 0x2199, 1 }, - UnicodeRange{ 0x21b8, 0x21b9, 1 }, - UnicodeRange{ 0x21d2, 0x21d2, 1 }, - UnicodeRange{ 0x21d4, 0x21d4, 1 }, - UnicodeRange{ 0x21e7, 0x21e7, 1 }, - UnicodeRange{ 0x2200, 0x2200, 1 }, - UnicodeRange{ 0x2202, 0x2203, 1 }, - UnicodeRange{ 0x2207, 0x2208, 1 }, - UnicodeRange{ 0x220b, 0x220b, 1 }, - UnicodeRange{ 0x220f, 0x220f, 1 }, - UnicodeRange{ 0x2211, 0x2211, 1 }, - UnicodeRange{ 0x2215, 0x2215, 1 }, - UnicodeRange{ 0x221a, 0x221a, 1 }, - UnicodeRange{ 0x221d, 0x2220, 1 }, - UnicodeRange{ 0x2223, 0x2223, 1 }, - UnicodeRange{ 0x2225, 0x2225, 1 }, - UnicodeRange{ 0x2227, 0x222c, 1 }, - UnicodeRange{ 0x222e, 0x222e, 1 }, - UnicodeRange{ 0x2234, 0x2237, 1 }, - UnicodeRange{ 0x223c, 0x223d, 1 }, - UnicodeRange{ 0x2248, 0x2248, 1 }, - UnicodeRange{ 0x224c, 0x224c, 1 }, - UnicodeRange{ 0x2252, 0x2252, 1 }, - UnicodeRange{ 0x2260, 0x2261, 1 }, - UnicodeRange{ 0x2264, 0x2267, 1 }, - UnicodeRange{ 0x226a, 0x226b, 1 }, - UnicodeRange{ 0x226e, 0x226f, 1 }, - UnicodeRange{ 0x2282, 0x2283, 1 }, - UnicodeRange{ 0x2286, 0x2287, 1 }, - UnicodeRange{ 0x2295, 0x2295, 1 }, - UnicodeRange{ 0x2299, 0x2299, 1 }, - UnicodeRange{ 0x22a5, 0x22a5, 1 }, - UnicodeRange{ 0x22bf, 0x22bf, 1 }, - UnicodeRange{ 0x2312, 0x2312, 1 }, - UnicodeRange{ 0x231a, 0x231b, 0 }, - UnicodeRange{ 0x2329, 0x232a, 0 }, - UnicodeRange{ 0x23e9, 0x23ec, 0 }, - UnicodeRange{ 0x23f0, 0x23f0, 0 }, - UnicodeRange{ 0x23f3, 0x23f3, 0 }, - UnicodeRange{ 0x2460, 0x24e9, 1 }, - UnicodeRange{ 0x24eb, 0x24ff, 1 }, - UnicodeRange{ 0x25a0, 0x25a1, 1 }, - UnicodeRange{ 0x25a3, 0x25a9, 1 }, - UnicodeRange{ 0x25b2, 0x25b3, 1 }, - UnicodeRange{ 0x25b6, 0x25b7, 1 }, - UnicodeRange{ 0x25bc, 0x25bd, 1 }, - UnicodeRange{ 0x25c0, 0x25c1, 1 }, - UnicodeRange{ 0x25c6, 0x25c8, 1 }, - UnicodeRange{ 0x25cb, 0x25cb, 1 }, - UnicodeRange{ 0x25ce, 0x25d1, 1 }, - UnicodeRange{ 0x25e2, 0x25e5, 1 }, - UnicodeRange{ 0x25ef, 0x25ef, 1 }, - UnicodeRange{ 0x25fd, 0x25fe, 0 }, - UnicodeRange{ 0x2605, 0x2606, 1 }, - UnicodeRange{ 0x2609, 0x2609, 1 }, - UnicodeRange{ 0x260e, 0x260f, 1 }, - UnicodeRange{ 0x2614, 0x2615, 0 }, - UnicodeRange{ 0x261c, 0x261c, 1 }, - UnicodeRange{ 0x261e, 0x261e, 1 }, - UnicodeRange{ 0x2640, 0x2640, 1 }, - UnicodeRange{ 0x2642, 0x2642, 1 }, - UnicodeRange{ 0x2648, 0x2653, 0 }, - UnicodeRange{ 0x2660, 0x2661, 1 }, - UnicodeRange{ 0x2663, 0x2665, 1 }, - UnicodeRange{ 0x2667, 0x266a, 1 }, - UnicodeRange{ 0x266c, 0x266d, 1 }, - UnicodeRange{ 0x266f, 0x266f, 1 }, - UnicodeRange{ 0x267f, 0x267f, 0 }, - UnicodeRange{ 0x2693, 0x2693, 0 }, - UnicodeRange{ 0x269e, 0x269f, 1 }, - UnicodeRange{ 0x26a1, 0x26a1, 0 }, - UnicodeRange{ 0x26aa, 0x26ab, 0 }, - UnicodeRange{ 0x26bd, 0x26be, 0 }, - UnicodeRange{ 0x26bf, 0x26bf, 1 }, - UnicodeRange{ 0x26c4, 0x26c5, 0 }, - UnicodeRange{ 0x26c6, 0x26cd, 1 }, - UnicodeRange{ 0x26ce, 0x26ce, 0 }, - UnicodeRange{ 0x26cf, 0x26d3, 1 }, - UnicodeRange{ 0x26d4, 0x26d4, 0 }, - UnicodeRange{ 0x26d5, 0x26e1, 1 }, - UnicodeRange{ 0x26e3, 0x26e3, 1 }, - UnicodeRange{ 0x26e8, 0x26e9, 1 }, - UnicodeRange{ 0x26ea, 0x26ea, 0 }, - UnicodeRange{ 0x26eb, 0x26f1, 1 }, - UnicodeRange{ 0x26f2, 0x26f3, 0 }, - UnicodeRange{ 0x26f4, 0x26f4, 1 }, - UnicodeRange{ 0x26f5, 0x26f5, 0 }, - UnicodeRange{ 0x26f6, 0x26f9, 1 }, - UnicodeRange{ 0x26fa, 0x26fa, 0 }, - UnicodeRange{ 0x26fb, 0x26fc, 1 }, - UnicodeRange{ 0x26fd, 0x26fd, 0 }, - UnicodeRange{ 0x26fe, 0x26ff, 1 }, - UnicodeRange{ 0x2705, 0x2705, 0 }, - UnicodeRange{ 0x270a, 0x270b, 0 }, - UnicodeRange{ 0x2728, 0x2728, 0 }, - UnicodeRange{ 0x273d, 0x273d, 1 }, - UnicodeRange{ 0x274c, 0x274c, 0 }, - UnicodeRange{ 0x274e, 0x274e, 0 }, - UnicodeRange{ 0x2753, 0x2755, 0 }, - UnicodeRange{ 0x2757, 0x2757, 0 }, - UnicodeRange{ 0x2776, 0x277f, 1 }, - UnicodeRange{ 0x2795, 0x2797, 0 }, - UnicodeRange{ 0x27b0, 0x27b0, 0 }, - UnicodeRange{ 0x27bf, 0x27bf, 0 }, - UnicodeRange{ 0x2b1b, 0x2b1c, 0 }, - UnicodeRange{ 0x2b50, 0x2b50, 0 }, - UnicodeRange{ 0x2b55, 0x2b55, 0 }, - UnicodeRange{ 0x2b56, 0x2b59, 1 }, - UnicodeRange{ 0x2e80, 0x2e99, 0 }, - UnicodeRange{ 0x2e9b, 0x2ef3, 0 }, - UnicodeRange{ 0x2f00, 0x2fd5, 0 }, - UnicodeRange{ 0x2ff0, 0x303e, 0 }, - UnicodeRange{ 0x3041, 0x3096, 0 }, - UnicodeRange{ 0x3099, 0x30ff, 0 }, - UnicodeRange{ 0x3105, 0x312f, 0 }, - UnicodeRange{ 0x3131, 0x318e, 0 }, - UnicodeRange{ 0x3190, 0x31e3, 0 }, - UnicodeRange{ 0x31ef, 0x321e, 0 }, - UnicodeRange{ 0x3220, 0x3247, 0 }, - UnicodeRange{ 0x3248, 0x324f, 1 }, - UnicodeRange{ 0x3250, 0x4dbf, 0 }, - UnicodeRange{ 0x4e00, 0xa48c, 0 }, - UnicodeRange{ 0xa490, 0xa4c6, 0 }, - UnicodeRange{ 0xa960, 0xa97c, 0 }, - UnicodeRange{ 0xac00, 0xd7a3, 0 }, - UnicodeRange{ 0xe000, 0xf8ff, 1 }, - UnicodeRange{ 0xf900, 0xfaff, 0 }, - UnicodeRange{ 0xfe00, 0xfe0f, 1 }, - UnicodeRange{ 0xfe10, 0xfe19, 0 }, - UnicodeRange{ 0xfe30, 0xfe52, 0 }, - UnicodeRange{ 0xfe54, 0xfe66, 0 }, - UnicodeRange{ 0xfe68, 0xfe6b, 0 }, - UnicodeRange{ 0xff01, 0xff60, 0 }, - UnicodeRange{ 0xffe0, 0xffe6, 0 }, - UnicodeRange{ 0xfffd, 0xfffd, 1 }, - UnicodeRange{ 0x16fe0, 0x16fe4, 0 }, - UnicodeRange{ 0x16ff0, 0x16ff1, 0 }, - UnicodeRange{ 0x17000, 0x187f7, 0 }, - UnicodeRange{ 0x18800, 0x18cd5, 0 }, - UnicodeRange{ 0x18d00, 0x18d08, 0 }, - UnicodeRange{ 0x1aff0, 0x1aff3, 0 }, - UnicodeRange{ 0x1aff5, 0x1affb, 0 }, - UnicodeRange{ 0x1affd, 0x1affe, 0 }, - UnicodeRange{ 0x1b000, 0x1b122, 0 }, - UnicodeRange{ 0x1b132, 0x1b132, 0 }, - UnicodeRange{ 0x1b150, 0x1b152, 0 }, - UnicodeRange{ 0x1b155, 0x1b155, 0 }, - UnicodeRange{ 0x1b164, 0x1b167, 0 }, - UnicodeRange{ 0x1b170, 0x1b2fb, 0 }, - UnicodeRange{ 0x1f004, 0x1f004, 0 }, - UnicodeRange{ 0x1f0cf, 0x1f0cf, 0 }, - UnicodeRange{ 0x1f100, 0x1f10a, 1 }, - UnicodeRange{ 0x1f110, 0x1f12d, 1 }, - UnicodeRange{ 0x1f130, 0x1f169, 1 }, - UnicodeRange{ 0x1f170, 0x1f18d, 1 }, - UnicodeRange{ 0x1f18e, 0x1f18e, 0 }, - UnicodeRange{ 0x1f18f, 0x1f190, 1 }, - UnicodeRange{ 0x1f191, 0x1f19a, 0 }, - UnicodeRange{ 0x1f19b, 0x1f1ac, 1 }, - UnicodeRange{ 0x1f1e6, 0x1f202, 0 }, - UnicodeRange{ 0x1f210, 0x1f23b, 0 }, - UnicodeRange{ 0x1f240, 0x1f248, 0 }, - UnicodeRange{ 0x1f250, 0x1f251, 0 }, - UnicodeRange{ 0x1f260, 0x1f265, 0 }, - UnicodeRange{ 0x1f300, 0x1f320, 0 }, - UnicodeRange{ 0x1f32d, 0x1f335, 0 }, - UnicodeRange{ 0x1f337, 0x1f37c, 0 }, - UnicodeRange{ 0x1f37e, 0x1f393, 0 }, - UnicodeRange{ 0x1f3a0, 0x1f3ca, 0 }, - UnicodeRange{ 0x1f3cf, 0x1f3d3, 0 }, - UnicodeRange{ 0x1f3e0, 0x1f3f0, 0 }, - UnicodeRange{ 0x1f3f4, 0x1f3f4, 0 }, - UnicodeRange{ 0x1f3f8, 0x1f43e, 0 }, - UnicodeRange{ 0x1f440, 0x1f440, 0 }, - UnicodeRange{ 0x1f442, 0x1f4fc, 0 }, - UnicodeRange{ 0x1f4ff, 0x1f53d, 0 }, - UnicodeRange{ 0x1f54b, 0x1f54e, 0 }, - UnicodeRange{ 0x1f550, 0x1f567, 0 }, - UnicodeRange{ 0x1f57a, 0x1f57a, 0 }, - UnicodeRange{ 0x1f595, 0x1f596, 0 }, - UnicodeRange{ 0x1f5a4, 0x1f5a4, 0 }, - UnicodeRange{ 0x1f5fb, 0x1f64f, 0 }, - UnicodeRange{ 0x1f680, 0x1f6c5, 0 }, - UnicodeRange{ 0x1f6cc, 0x1f6cc, 0 }, - UnicodeRange{ 0x1f6d0, 0x1f6d2, 0 }, - UnicodeRange{ 0x1f6d5, 0x1f6d7, 0 }, - UnicodeRange{ 0x1f6dc, 0x1f6df, 0 }, - UnicodeRange{ 0x1f6eb, 0x1f6ec, 0 }, - UnicodeRange{ 0x1f6f4, 0x1f6fc, 0 }, - UnicodeRange{ 0x1f7e0, 0x1f7eb, 0 }, - UnicodeRange{ 0x1f7f0, 0x1f7f0, 0 }, - UnicodeRange{ 0x1f90c, 0x1f93a, 0 }, - UnicodeRange{ 0x1f93c, 0x1f945, 0 }, - UnicodeRange{ 0x1f947, 0x1f9ff, 0 }, - UnicodeRange{ 0x1fa70, 0x1fa7c, 0 }, - UnicodeRange{ 0x1fa80, 0x1fa88, 0 }, - UnicodeRange{ 0x1fa90, 0x1fabd, 0 }, - UnicodeRange{ 0x1fabf, 0x1fac5, 0 }, - UnicodeRange{ 0x1face, 0x1fadb, 0 }, - UnicodeRange{ 0x1fae0, 0x1fae8, 0 }, - UnicodeRange{ 0x1faf0, 0x1faf8, 0 }, - UnicodeRange{ 0x20000, 0x2fffd, 0 }, - UnicodeRange{ 0x30000, 0x3fffd, 0 }, - UnicodeRange{ 0xe0100, 0xe01ef, 1 }, - UnicodeRange{ 0xf0000, 0xffffd, 1 }, - UnicodeRange{ 0x100000, 0x10fffd, 1 }, - }; -} +// On top of that, this code is optimized for processing input as fast as possible, so it's a little low-level. +#pragma warning(disable : 26438) // Avoid 'goto' (es.76). +#pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). +#pragma warning(disable : 26482) // Only index into arrays using constant expressions (bounds.2). // s_stage1/2/3/4 represents a multi-stage table, aka trie. // The highest bits of the codepoint are an index into s_stage1, which selects a row in s_stage2. @@ -361,10 +42,10 @@ namespace // This is a great reference for the s_joinRules table: // https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.html -// Generated by CodepointWidthDetector_gen.go -// on 2024-03-22T16:37:43Z, from Unicode 15.1.0, 8277 bytes +// Generated by GraphemeTableGen +// on 2024-05-31T21:13:48Z, from Unicode 15.1.0, 8479 bytes // clang-format off -static constexpr uint16_t s_stage1[] = { +static constexpr uint16_t s_stage0[] = { 0x0000, 0x0020, 0x0040, 0x0060, 0x0080, 0x009f, 0x00bf, 0x00ca, 0x00ca, 0x00d3, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00ca, 0x00eb, 0x010b, 0x011d, 0x0121, 0x011e, 0x011b, 0x0126, 0x0146, 0x0166, 0x0166, 0x0166, 0x0182, 0x01a2, 0x01ba, 0x01da, 0x01fa, 0x0146, 0x0146, 0x0218, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x022d, 0x00ca, 0x00ca, @@ -400,41 +81,41 @@ static constexpr uint16_t s_stage1[] = { 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x03de, }; -static constexpr uint16_t s_stage2[] = { +static constexpr uint16_t s_stage1[] = { 0x0000, 0x0004, 0x000c, 0x0014, 0x001c, 0x0024, 0x002a, 0x0031, 0x002a, 0x0037, 0x002a, 0x003f, 0x0047, 0x0049, 0x004f, 0x0057, 0x005f, 0x0065, 0x006d, 0x002a, 0x002a, 0x002a, 0x0073, 0x007b, 0x0083, 0x008a, 0x002a, 0x0091, 0x0098, 0x009f, 0x00a3, 0x00aa, - 0x00b2, 0x00b8, 0x00be, 0x00c5, 0x00cd, 0x00d5, 0x00dd, 0x00e5, 0x00dd, 0x00ed, 0x00dd, 0x00f5, 0x00dd, 0x00fd, 0x0105, 0x010d, 0x0115, 0x011d, 0x00dd, 0x0125, 0x012d, 0x0135, 0x013d, 0x0144, 0x014b, 0x0153, 0x0155, 0x015d, 0x0162, 0x006f, 0x016a, 0x0172, - 0x0175, 0x017d, 0x0185, 0x002a, 0x018d, 0x0191, 0x0195, 0x019a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x01a2, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x01a8, 0x01af, 0x01b6, 0x01bd, - 0x01c4, 0x002a, 0x01cc, 0x002a, 0x01d2, 0x002a, 0x002a, 0x002a, 0x01da, 0x01e0, 0x01e8, 0x01ef, 0x01f7, 0x01ff, 0x0207, 0x020d, 0x0214, 0x002a, 0x002a, 0x021b, 0x002a, 0x002a, 0x002a, 0x0047, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, - 0x0223, 0x022b, 0x0233, 0x0239, 0x0241, 0x0249, 0x0251, 0x0259, 0x0261, 0x0269, 0x0271, 0x002a, 0x0279, 0x002a, 0x0280, 0x0287, 0x002a, 0x028f, 0x0293, 0x029b, 0x002a, 0x002a, 0x02a3, 0x02ab, 0x02b3, 0x02bb, 0x02c3, 0x02cb, 0x02d3, 0x02db, 0x02e3, 0x002a, - 0x002a, 0x002a, 0x002a, 0x02eb, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x02f3, 0x02f9, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x02fd, 0x002a, 0x0304, 0x002a, 0x0043, 0x002a, 0x002a, 0x030c, 0x0310, 0x0318, 0x0318, 0x0318, 0x031e, 0x0324, - 0x032c, 0x0332, 0x0318, 0x033a, 0x0318, 0x0341, 0x0345, 0x034b, 0x0352, 0x0358, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, - 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x002a, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x035f, 0x0367, 0x002a, - 0x002a, 0x002a, 0x002a, 0x002a, 0x036a, 0x0372, 0x015f, 0x002a, 0x002a, 0x002a, 0x002a, 0x037a, 0x002a, 0x0382, 0x038a, 0x0392, 0x039a, 0x03a2, 0x03aa, 0x03af, 0x03b7, 0x03bf, 0x03c7, 0x002a, 0x002a, 0x002a, 0x03ce, 0x03d6, 0x03d7, 0x03d8, 0x03d9, 0x03da, - 0x03db, 0x03dc, 0x03d6, 0x03d7, 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03d6, 0x03d7, 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03d6, 0x03d7, 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03d6, 0x03d7, 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03d6, 0x03d7, - 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03e3, 0x03eb, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, - 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, - 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x03f3, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x03fb, 0x0401, 0x002a, 0x0407, 0x032c, 0x040f, - 0x0414, 0x0418, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0420, 0x002a, 0x002a, 0x002a, 0x0428, 0x002a, 0x042d, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, - 0x002a, 0x002a, 0x0435, 0x002a, 0x002a, 0x01c8, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x043d, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0442, 0x0448, 0x002a, 0x00a7, 0x0450, 0x002a, 0x0458, 0x0460, 0x0468, 0x0470, 0x0478, 0x0480, - 0x0488, 0x0490, 0x0493, 0x049b, 0x002a, 0x04a0, 0x012d, 0x04a8, 0x002a, 0x002a, 0x04af, 0x04b7, 0x01e8, 0x04bf, 0x002a, 0x002a, 0x04c2, 0x04ca, 0x01e8, 0x04d2, 0x04d5, 0x002a, 0x04dc, 0x002a, 0x002a, 0x002a, 0x04e2, 0x002a, 0x002a, 0x002a, 0x04ea, 0x04f2, - 0x002a, 0x04f8, 0x0500, 0x0508, 0x0510, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0514, 0x002a, 0x051c, 0x002a, 0x0523, 0x052b, 0x0532, 0x002a, 0x002a, 0x002a, 0x002a, 0x0535, 0x053d, 0x0545, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, - 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0547, 0x054f, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x01f1, 0x0552, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, - 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0559, 0x0560, 0x0564, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, - 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x056c, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, - 0x0574, 0x057c, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, - 0x002a, 0x057e, 0x0318, 0x0318, 0x0318, 0x0318, 0x0586, 0x058d, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0593, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, - 0x002a, 0x002a, 0x059b, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x05a3, - 0x05ab, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x05af, 0x05b7, 0x002a, 0x002a, 0x05bf, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, - 0x002a, 0x05c7, 0x05cf, 0x05d7, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x05df, 0x002a, 0x05e6, 0x002a, 0x0552, 0x002a, 0x002a, - 0x002a, 0x002a, 0x002a, 0x05e9, 0x05ef, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x05ef, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x05f5, 0x002a, 0x05fd, 0x002a, 0x002a, 0x002a, 0x002a, - 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0605, 0x0606, 0x0606, 0x060d, 0x0615, 0x061b, 0x0623, 0x0629, 0x0631, 0x0639, - 0x0606, 0x0606, 0x0641, 0x0648, 0x0650, 0x0657, 0x065f, 0x0667, 0x0668, 0x0669, 0x0671, 0x0679, 0x0681, 0x0686, 0x0668, 0x068e, 0x0668, 0x0696, 0x002a, 0x069e, 0x002a, 0x06a6, 0x06ae, 0x06b5, 0x06bc, 0x0606, 0x06c4, 0x06cc, 0x0668, 0x0668, 0x0606, 0x06d4, - 0x06dc, 0x06e4, 0x002a, 0x002a, 0x002a, 0x002a, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x06ec, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, - 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0318, 0x0319, 0x06f4, 0x0047, 0x06fc, 0x06fc, 0x0047, 0x0047, 0x0047, 0x0704, 0x06fc, 0x06fc, - 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x06fc, 0x0293, 0x0293, - 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x0293, 0x070c, + 0x00b2, 0x00b8, 0x00be, 0x00c5, 0x00cd, 0x00d5, 0x00dd, 0x00e5, 0x00ed, 0x00f5, 0x00fd, 0x0105, 0x00fd, 0x010d, 0x0115, 0x011d, 0x0125, 0x012d, 0x00ed, 0x0135, 0x013d, 0x0145, 0x014d, 0x0154, 0x015b, 0x0163, 0x0165, 0x016d, 0x0172, 0x006f, 0x017a, 0x0182, + 0x0185, 0x018d, 0x0195, 0x002a, 0x019d, 0x01a1, 0x01a5, 0x01aa, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x01b2, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x01b8, 0x01bf, 0x01c6, 0x01cd, + 0x01d4, 0x002a, 0x01dc, 0x002a, 0x01e2, 0x002a, 0x002a, 0x002a, 0x01ea, 0x01f0, 0x01f8, 0x01ff, 0x0207, 0x020f, 0x0217, 0x021d, 0x0224, 0x002a, 0x002a, 0x022b, 0x002a, 0x002a, 0x002a, 0x0047, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, + 0x0233, 0x023b, 0x0243, 0x0249, 0x0251, 0x0259, 0x0261, 0x0269, 0x0271, 0x0279, 0x0281, 0x002a, 0x0289, 0x002a, 0x0290, 0x0297, 0x002a, 0x029f, 0x02a3, 0x02ab, 0x002a, 0x002a, 0x02b3, 0x02bb, 0x02c3, 0x02cb, 0x02d3, 0x02db, 0x02e3, 0x02eb, 0x02f3, 0x002a, + 0x002a, 0x002a, 0x002a, 0x02fb, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0303, 0x0309, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x030d, 0x002a, 0x0314, 0x002a, 0x0043, 0x002a, 0x002a, 0x031c, 0x0320, 0x0328, 0x0328, 0x0328, 0x032e, 0x0334, + 0x033c, 0x0342, 0x0328, 0x034a, 0x0328, 0x0351, 0x0355, 0x035b, 0x0362, 0x0368, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, + 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x002a, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x036f, 0x0377, 0x002a, + 0x002a, 0x002a, 0x002a, 0x002a, 0x037a, 0x0382, 0x016f, 0x002a, 0x002a, 0x002a, 0x002a, 0x038a, 0x002a, 0x0392, 0x039a, 0x03a2, 0x03aa, 0x03b2, 0x03ba, 0x03bf, 0x03c7, 0x03cf, 0x03d7, 0x002a, 0x002a, 0x002a, 0x03de, 0x03e6, 0x03e7, 0x03e8, 0x03e9, 0x03ea, + 0x03eb, 0x03ec, 0x03e6, 0x03e7, 0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03e6, 0x03e7, 0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03e6, 0x03e7, 0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03e6, 0x03e7, 0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03e6, 0x03e7, + 0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03f3, 0x03fb, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, + 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, + 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0403, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x040b, 0x0411, 0x002a, 0x0417, 0x033c, 0x041f, + 0x0424, 0x0428, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0430, 0x002a, 0x002a, 0x002a, 0x0438, 0x002a, 0x043d, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, + 0x002a, 0x002a, 0x0445, 0x002a, 0x002a, 0x01d8, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x044d, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0452, 0x0458, 0x002a, 0x00a7, 0x0460, 0x002a, 0x0468, 0x0470, 0x0478, 0x0480, 0x0488, 0x0490, + 0x0498, 0x04a0, 0x04a3, 0x04ab, 0x002a, 0x04b0, 0x04b8, 0x04c0, 0x002a, 0x002a, 0x04c7, 0x04cf, 0x01f8, 0x04d7, 0x002a, 0x002a, 0x04da, 0x04e2, 0x01f8, 0x04ea, 0x04ed, 0x002a, 0x04f4, 0x002a, 0x002a, 0x002a, 0x04fa, 0x002a, 0x002a, 0x002a, 0x0502, 0x050a, + 0x002a, 0x0510, 0x0518, 0x0520, 0x0528, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x052c, 0x002a, 0x0534, 0x002a, 0x053b, 0x0543, 0x054a, 0x002a, 0x002a, 0x002a, 0x002a, 0x054d, 0x0555, 0x055d, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, + 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x055f, 0x0567, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0201, 0x056a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, + 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0571, 0x0578, 0x057c, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, + 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0584, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, + 0x058c, 0x0594, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, + 0x002a, 0x0596, 0x0328, 0x0328, 0x0328, 0x0328, 0x059e, 0x05a5, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x05ab, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, + 0x002a, 0x002a, 0x05b3, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x05bb, + 0x05c3, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x05c7, 0x05cf, 0x002a, 0x002a, 0x05d7, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, + 0x002a, 0x05df, 0x05e7, 0x05ef, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x05f7, 0x002a, 0x05fe, 0x002a, 0x056a, 0x002a, 0x002a, + 0x002a, 0x002a, 0x002a, 0x0601, 0x0607, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x0607, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x060d, 0x002a, 0x0615, 0x002a, 0x002a, 0x002a, 0x002a, + 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x002a, 0x061d, 0x061e, 0x061e, 0x0625, 0x062d, 0x0633, 0x063b, 0x0641, 0x0649, 0x0651, + 0x061e, 0x061e, 0x0659, 0x0660, 0x0668, 0x066f, 0x0677, 0x067f, 0x0680, 0x0681, 0x0689, 0x0691, 0x0699, 0x069e, 0x0680, 0x06a6, 0x0680, 0x06ae, 0x002a, 0x06b6, 0x002a, 0x06be, 0x06c6, 0x06cd, 0x06d4, 0x061e, 0x06dc, 0x06e4, 0x0680, 0x0680, 0x061e, 0x06ec, + 0x06f4, 0x06fc, 0x002a, 0x002a, 0x002a, 0x002a, 0x061e, 0x061e, 0x061e, 0x061e, 0x061e, 0x061e, 0x061e, 0x061e, 0x061e, 0x061e, 0x061e, 0x061e, 0x061e, 0x061e, 0x061e, 0x0704, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, + 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0328, 0x0329, 0x070c, 0x0047, 0x0714, 0x0714, 0x0047, 0x0047, 0x0047, 0x071c, 0x0714, 0x0714, + 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x0714, 0x02a3, 0x02a3, + 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x0724, }; -static constexpr uint16_t s_stage3[] = { +static constexpr uint16_t s_stage2[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0011, 0x0018, 0x0020, 0x0022, 0x002a, 0x0008, 0x0029, 0x0030, @@ -460,214 +141,217 @@ static constexpr uint16_t s_stage3[] = { 0x00a4, 0x00f1, 0x0008, 0x0008, 0x00a8, 0x00f9, 0x00fd, 0x00a2, 0x0008, 0x0008, 0x0008, 0x0101, 0x0008, 0x0008, 0x0008, 0x0008, 0x0109, 0x0089, 0x0008, 0x0008, 0x0008, 0x0008, 0x00ac, 0x0089, - 0x0089, 0x0111, 0x0089, 0x0089, 0x0089, 0x00a4, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0119, 0x0089, 0x011f, 0x00ad, - 0x0008, 0x0127, 0x0008, 0x0008, 0x0008, 0x0101, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x012b, 0x00dc, 0x0132, 0x00ce, - 0x0008, 0x0127, 0x0008, 0x0008, 0x0138, 0x0140, 0x0147, 0x00cc, - 0x0008, 0x0008, 0x0008, 0x014b, 0x0008, 0x00af, 0x0153, 0x0008, - 0x0008, 0x0127, 0x0008, 0x0008, 0x00ac, 0x00dc, 0x0132, 0x00a9, - 0x0008, 0x0127, 0x0008, 0x0008, 0x0008, 0x00cb, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x00a8, 0x015b, 0x00e3, 0x00ce, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a3, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x012b, 0x00b0, 0x0162, 0x0168, - 0x0008, 0x0127, 0x0008, 0x0008, 0x0008, 0x00b0, 0x00e3, 0x0168, - 0x0008, 0x0127, 0x0008, 0x00ca, 0x0008, 0x00a4, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x015e, 0x00b0, 0x016e, 0x00ce, - 0x0008, 0x0127, 0x0008, 0x0008, 0x0008, 0x0101, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0175, 0x017c, 0x0089, - 0x0008, 0x0008, 0x0127, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0181, 0x00a5, 0x00ce, 0x008a, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0181, 0x00a3, 0x0008, 0x008a, 0x0008, + 0x0089, 0x0111, 0x0089, 0x0089, 0x0089, 0x00a4, 0x0008, 0x0119, + 0x011e, 0x011e, 0x011e, 0x011e, 0x0124, 0x0089, 0x012a, 0x00ad, + 0x011e, 0x0132, 0x0008, 0x0008, 0x011e, 0x0101, 0x0008, 0x0119, + 0x011e, 0x011e, 0x013a, 0x0141, 0x0147, 0x00dc, 0x014e, 0x00ce, + 0x0154, 0x0132, 0x0008, 0x015b, 0x015d, 0x0101, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x015f, 0x0165, 0x016c, 0x00cc, + 0x0008, 0x0008, 0x0008, 0x0170, 0x0008, 0x0101, 0x0008, 0x0119, + 0x011e, 0x011e, 0x013a, 0x0178, 0x0147, 0x00af, 0x0180, 0x0008, + 0x0008, 0x0132, 0x0008, 0x0008, 0x0187, 0x00dc, 0x014e, 0x00a9, + 0x0154, 0x0132, 0x0008, 0x018f, 0x0008, 0x00cb, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x00a8, 0x0197, 0x00e3, 0x00ce, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a3, 0x0008, 0x0119, + 0x011e, 0x011e, 0x013a, 0x011e, 0x0147, 0x00b0, 0x019e, 0x01a4, + 0x01ac, 0x0132, 0x0008, 0x0008, 0x0008, 0x00b0, 0x00e3, 0x01a4, + 0x0008, 0x0132, 0x0008, 0x00ca, 0x0008, 0x00a4, 0x0008, 0x0119, + 0x011e, 0x011e, 0x011e, 0x011e, 0x017d, 0x00b0, 0x01b4, 0x00ce, + 0x0008, 0x0132, 0x0008, 0x0008, 0x0008, 0x0101, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x01bb, 0x01c2, 0x0089, + 0x0008, 0x0008, 0x0132, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x01c7, 0x00a5, 0x00ce, 0x008a, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x01c7, 0x00a3, 0x0008, 0x008a, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a6, 0x0008, 0x0008, - 0x0139, 0x0176, 0x00b0, 0x00a9, 0x0089, 0x00ad, 0x0089, 0x0089, - 0x0089, 0x00a3, 0x0138, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0189, 0x0089, 0x018d, 0x0008, 0x0008, 0x00a8, - 0x0192, 0x0199, 0x01a0, 0x00e4, 0x0008, 0x01a6, 0x01ad, 0x0008, - 0x01b5, 0x0008, 0x0008, 0x0008, 0x0008, 0x01bd, 0x01bd, 0x01bd, - 0x01bd, 0x01bd, 0x01bd, 0x01bd, 0x01bd, 0x01c5, 0x01c5, 0x01c5, - 0x01c5, 0x01c5, 0x01cd, 0x01cd, 0x01cd, 0x01cd, 0x01cd, 0x01cd, - 0x01cd, 0x01cd, 0x0008, 0x0008, 0x0008, 0x00a9, 0x0008, 0x0008, - 0x0008, 0x0008, 0x01d5, 0x0008, 0x0008, 0x0008, 0x0148, 0x0008, - 0x0008, 0x0127, 0x0008, 0x0008, 0x0008, 0x0127, 0x0008, 0x0008, + 0x015e, 0x01bc, 0x00b0, 0x00a9, 0x0089, 0x00ad, 0x0089, 0x0089, + 0x0089, 0x00a3, 0x015d, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x01cf, 0x0089, 0x01d3, 0x0008, 0x0008, 0x00a8, + 0x01d8, 0x01df, 0x01e6, 0x00e4, 0x0008, 0x01ec, 0x01f3, 0x0008, + 0x01fb, 0x0008, 0x0008, 0x0008, 0x0008, 0x0203, 0x0203, 0x0203, + 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x020b, 0x020b, 0x020b, + 0x020b, 0x020b, 0x0213, 0x0213, 0x0213, 0x0213, 0x0213, 0x0213, + 0x0213, 0x0213, 0x0008, 0x0008, 0x0008, 0x00a9, 0x0008, 0x0008, + 0x0008, 0x0008, 0x021b, 0x0008, 0x0008, 0x0008, 0x016d, 0x0008, + 0x0008, 0x0132, 0x0008, 0x0008, 0x0008, 0x0132, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00aa, 0x0089, 0x0089, 0x00a4, - 0x00f1, 0x0008, 0x0008, 0x0008, 0x0008, 0x01db, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0168, 0x0008, 0x0008, 0x0008, + 0x00f1, 0x0008, 0x0008, 0x0008, 0x0008, 0x0221, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x01a4, 0x0008, 0x0008, 0x0008, 0x0008, 0x00cc, 0x0008, 0x0008, 0x0008, 0x0008, 0x0089, 0x00a4, 0x0089, 0x00a4, 0x0008, 0x0008, 0x00ce, 0x00a4, 0x0008, 0x0008, - 0x0008, 0x0008, 0x00a9, 0x008a, 0x01e2, 0x0089, 0x0089, 0x00dc, + 0x0008, 0x0008, 0x00a9, 0x008a, 0x0228, 0x0089, 0x0089, 0x00dc, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0089, 0x0089, 0x008a, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a3, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00aa, 0x0089, 0x00a3, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a0, 0x00a4, 0x0008, 0x00a5, - 0x0008, 0x0008, 0x0008, 0x00ad, 0x018e, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x00ad, 0x01d4, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a8, 0x0089, 0x00a4, 0x0008, 0x0008, 0x0008, 0x0008, - 0x00aa, 0x0089, 0x0089, 0x0008, 0x0008, 0x011b, 0x0089, 0x0089, - 0x0142, 0x00f2, 0x00a6, 0x0008, 0x01ea, 0x01f2, 0x01f7, 0x0022, - 0x01ff, 0x0207, 0x020d, 0x0008, 0x0214, 0x0008, 0x0008, 0x021c, - 0x0222, 0x002c, 0x007a, 0x0025, 0x0008, 0x0008, 0x0008, 0x0008, + 0x00aa, 0x0089, 0x0089, 0x0008, 0x0008, 0x0126, 0x0089, 0x0089, + 0x0167, 0x00f2, 0x00a6, 0x0008, 0x0230, 0x0238, 0x023d, 0x0022, + 0x0245, 0x024d, 0x0253, 0x0008, 0x025a, 0x0008, 0x0008, 0x0262, + 0x0268, 0x002c, 0x007a, 0x0025, 0x0008, 0x0008, 0x0008, 0x0008, 0x002c, 0x0008, 0x0008, 0x0089, 0x0089, 0x0089, 0x0089, 0x00cd, - 0x0008, 0x022a, 0x004c, 0x0073, 0x0008, 0x0231, 0x002d, 0x0008, - 0x0214, 0x0008, 0x0008, 0x0033, 0x0060, 0x0092, 0x0026, 0x0092, - 0x0028, 0x0008, 0x004c, 0x0239, 0x023f, 0x0008, 0x0246, 0x0008, - 0x0028, 0x0008, 0x0008, 0x024c, 0x0008, 0x007a, 0x0008, 0x0008, - 0x0008, 0x0040, 0x0254, 0x024f, 0x0259, 0x0068, 0x025e, 0x007d, - 0x0032, 0x0008, 0x0264, 0x002e, 0x0008, 0x026c, 0x026a, 0x0008, - 0x0008, 0x026a, 0x0008, 0x002b, 0x004c, 0x002b, 0x0008, 0x0008, - 0x007a, 0x0008, 0x0008, 0x002e, 0x0274, 0x0008, 0x027c, 0x0008, - 0x0008, 0x0284, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0285, 0x0008, 0x0008, 0x0008, 0x028d, 0x0295, 0x029d, 0x0008, + 0x0008, 0x0270, 0x004c, 0x0073, 0x0008, 0x0277, 0x002d, 0x0008, + 0x025a, 0x0008, 0x0008, 0x0033, 0x0060, 0x0092, 0x0026, 0x0092, + 0x0028, 0x0008, 0x004c, 0x027f, 0x0285, 0x0008, 0x028c, 0x0008, + 0x0028, 0x0008, 0x0008, 0x0292, 0x0008, 0x007a, 0x0008, 0x0008, + 0x0008, 0x0040, 0x029a, 0x0295, 0x029f, 0x0068, 0x02a4, 0x007d, + 0x0032, 0x0008, 0x02aa, 0x002e, 0x0008, 0x02b2, 0x02b0, 0x0008, + 0x0008, 0x02b0, 0x0008, 0x002b, 0x004c, 0x002b, 0x0008, 0x0008, + 0x007a, 0x0008, 0x0008, 0x002e, 0x02ba, 0x0008, 0x02c2, 0x0008, + 0x0008, 0x02ca, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x02cb, 0x0008, 0x0008, 0x0008, 0x02d3, 0x02db, 0x02e3, 0x0008, 0x0008, 0x0008, 0x0008, 0x0092, 0x0092, 0x0092, 0x0092, 0x0092, - 0x0092, 0x0092, 0x0092, 0x02a5, 0x0092, 0x0092, 0x0092, 0x0092, + 0x0092, 0x0092, 0x0092, 0x02eb, 0x0092, 0x0092, 0x0092, 0x0092, 0x0098, 0x0092, 0x0092, 0x0008, 0x0008, 0x0008, 0x0008, 0x0098, - 0x02ab, 0x02b1, 0x0032, 0x02b7, 0x02be, 0x0028, 0x0008, 0x01f3, - 0x007a, 0x0008, 0x02c6, 0x02ce, 0x02d5, 0x02dd, 0x02e3, 0x02ea, - 0x02ea, 0x02ea, 0x02ea, 0x02e7, 0x02f2, 0x02f6, 0x02ea, 0x02fe, - 0x0301, 0x02ea, 0x02eb, 0x0309, 0x0008, 0x0311, 0x0315, 0x0313, - 0x031d, 0x02ea, 0x0321, 0x0322, 0x0328, 0x032a, 0x032f, 0x0305, - 0x032c, 0x0335, 0x033b, 0x0343, 0x031d, 0x034b, 0x027f, 0x0214, - 0x0353, 0x0244, 0x002b, 0x0357, 0x035f, 0x0366, 0x0008, 0x036e, - 0x0008, 0x004e, 0x0092, 0x0008, 0x0008, 0x0376, 0x0008, 0x0214, - 0x0008, 0x0353, 0x037e, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0243, 0x0008, 0x0386, 0x0008, 0x0008, 0x038e, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0392, 0x0028, 0x0008, 0x0008, 0x0008, + 0x02f1, 0x02f7, 0x0032, 0x02fd, 0x0304, 0x0028, 0x0008, 0x0239, + 0x007a, 0x0008, 0x030c, 0x0314, 0x031b, 0x0323, 0x0329, 0x0330, + 0x0330, 0x0330, 0x0330, 0x032d, 0x0338, 0x033c, 0x0330, 0x0344, + 0x0347, 0x0330, 0x0331, 0x034f, 0x0008, 0x0357, 0x035b, 0x0359, + 0x0363, 0x0330, 0x0367, 0x0368, 0x036e, 0x0370, 0x0375, 0x034b, + 0x0372, 0x037b, 0x0381, 0x0389, 0x0363, 0x0391, 0x02c5, 0x025a, + 0x0399, 0x028a, 0x002b, 0x039d, 0x03a5, 0x03ac, 0x0008, 0x03b4, + 0x0008, 0x004e, 0x0092, 0x0008, 0x0008, 0x03bc, 0x0008, 0x025a, + 0x0008, 0x0399, 0x03c4, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0289, 0x0008, 0x03cc, 0x0008, 0x0008, 0x03d4, 0x0008, + 0x0008, 0x0008, 0x0008, 0x03d8, 0x0028, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00ce, 0x00a6, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x00ce, 0x039a, 0x039a, 0x039a, 0x03a0, - 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, 0x03a4, 0x0008, - 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, - 0x03ac, 0x0008, 0x0008, 0x0008, 0x039a, 0x039a, 0x039a, 0x039a, - 0x039a, 0x03b4, 0x03bc, 0x03bf, 0x03c6, 0x039a, 0x039a, 0x039a, - 0x039a, 0x039a, 0x039a, 0x039a, 0x039b, 0x03ce, 0x039a, 0x039a, - 0x039a, 0x039a, 0x03d6, 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, - 0x03c6, 0x039a, 0x039b, 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, - 0x039a, 0x03a4, 0x03de, 0x039a, 0x039a, 0x039a, 0x039b, 0x039a, - 0x039a, 0x039a, 0x039a, 0x0092, 0x039a, 0x039a, 0x039a, 0x039a, - 0x039a, 0x039a, 0x03bd, 0x03e5, 0x039a, 0x039a, 0x039a, 0x039a, - 0x03a3, 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, 0x039b, + 0x0008, 0x0008, 0x0008, 0x00ce, 0x03e0, 0x03e0, 0x03e0, 0x03e6, + 0x03e0, 0x03e0, 0x03e0, 0x03e0, 0x03e0, 0x03e0, 0x03ea, 0x0008, + 0x03e0, 0x03e0, 0x03e0, 0x03e0, 0x03e0, 0x03e0, 0x03e0, 0x03e0, + 0x03f2, 0x0008, 0x0008, 0x0008, 0x03e0, 0x03e0, 0x03e0, 0x03e0, + 0x03e0, 0x03fa, 0x0402, 0x0405, 0x040c, 0x03e0, 0x03e0, 0x03e0, + 0x03e0, 0x03e0, 0x03e0, 0x03e0, 0x03e1, 0x0414, 0x03e0, 0x03e0, + 0x03e0, 0x03e0, 0x041c, 0x03e0, 0x03e0, 0x03e0, 0x03e0, 0x03e0, + 0x040c, 0x03e0, 0x03e1, 0x03e0, 0x03e0, 0x03e0, 0x03e0, 0x03e0, + 0x03e0, 0x03ea, 0x0424, 0x03e0, 0x03e0, 0x03e0, 0x03e1, 0x03e0, + 0x03e0, 0x03e0, 0x03e0, 0x0092, 0x03e0, 0x03e0, 0x03e0, 0x03e0, + 0x03e0, 0x03e0, 0x0403, 0x042b, 0x03e0, 0x03e0, 0x03e0, 0x03e0, + 0x03e9, 0x03e0, 0x03e0, 0x03e0, 0x03e0, 0x03e0, 0x03e0, 0x03e1, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00ce, - 0x011b, 0x018e, 0x0008, 0x0008, 0x0008, 0x00a8, 0x0008, 0x0008, - 0x0008, 0x0008, 0x03ed, 0x00ca, 0x0008, 0x0008, 0x00a0, 0x03f4, + 0x0126, 0x01d4, 0x0008, 0x0008, 0x0008, 0x00a8, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0433, 0x00ca, 0x0008, 0x0008, 0x00a0, 0x043a, 0x0008, 0x0008, 0x00a6, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x00aa, 0x0089, 0x018e, 0x0008, 0x0008, 0x0008, 0x0089, 0x0089, - 0x00a6, 0x00ce, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a8, 0x018e, - 0x0008, 0x0008, 0x00ce, 0x0089, 0x00a4, 0x0008, 0x01bd, 0x01bd, - 0x01bd, 0x03fc, 0x00a4, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x00aa, 0x0089, 0x01d4, 0x0008, 0x0008, 0x0008, 0x0089, 0x0089, + 0x00a6, 0x00ce, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a8, 0x01d4, + 0x0008, 0x0008, 0x00ce, 0x0089, 0x00a4, 0x0008, 0x0203, 0x0203, + 0x0203, 0x0442, 0x00a4, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a0, 0x0089, 0x00cd, 0x0008, 0x0008, 0x0008, 0x00f1, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00ad, 0x008a, 0x0008, 0x00ca, - 0x0401, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0407, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x040f, 0x0416, 0x00cc, - 0x0008, 0x0008, 0x0008, 0x0008, 0x00a0, 0x0168, 0x0008, 0x0008, - 0x0008, 0x0008, 0x00a0, 0x041c, 0x0008, 0x0008, 0x0424, 0x0425, - 0x0425, 0x0429, 0x0425, 0x0425, 0x0425, 0x0424, 0x0425, 0x0425, - 0x0429, 0x0425, 0x0425, 0x0425, 0x0424, 0x0425, 0x0425, 0x042e, - 0x0008, 0x01c5, 0x01c5, 0x0436, 0x043d, 0x01cd, 0x01cd, 0x01cd, - 0x01cd, 0x01cd, 0x0441, 0x0008, 0x0008, 0x0008, 0x0138, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0089, 0x0449, 0x039a, 0x03dc, 0x0089, - 0x0089, 0x039a, 0x039a, 0x039f, 0x039a, 0x039b, 0x03a4, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0451, 0x039a, - 0x039a, 0x039a, 0x039a, 0x03dd, 0x0008, 0x0008, 0x0008, 0x0459, - 0x0008, 0x0008, 0x0008, 0x0008, 0x039b, 0x0008, 0x0000, 0x0461, + 0x0447, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x044d, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0455, 0x045c, 0x00cc, + 0x0008, 0x0008, 0x0008, 0x0008, 0x00a0, 0x01a4, 0x0008, 0x0008, + 0x0008, 0x0008, 0x00a0, 0x0462, 0x0008, 0x0008, 0x046a, 0x046b, + 0x046b, 0x046f, 0x046b, 0x046b, 0x046b, 0x046a, 0x046b, 0x046b, + 0x046f, 0x046b, 0x046b, 0x046b, 0x046a, 0x046b, 0x046b, 0x0474, + 0x0008, 0x020b, 0x020b, 0x047c, 0x0483, 0x0213, 0x0213, 0x0213, + 0x0213, 0x0213, 0x0487, 0x0008, 0x0008, 0x0008, 0x015d, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0089, 0x0089, 0x03e0, 0x0422, 0x0089, + 0x0089, 0x03e0, 0x03e0, 0x03e5, 0x03e0, 0x03e1, 0x03ea, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x048b, 0x03e0, + 0x03e0, 0x03e0, 0x03e0, 0x0423, 0x0008, 0x0008, 0x0008, 0x0493, + 0x0008, 0x0008, 0x0008, 0x0008, 0x03e1, 0x0008, 0x0000, 0x049b, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00f1, 0x0008, 0x0008, 0x0008, 0x0008, 0x00cd, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x00a8, 0x00a5, 0x041b, 0x00aa, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0140, 0x0008, 0x0008, 0x0008, - 0x0008, 0x00aa, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0402, + 0x0008, 0x0008, 0x0008, 0x00a8, 0x00a5, 0x0461, 0x00aa, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0165, 0x0008, 0x0008, 0x0008, + 0x0008, 0x00aa, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0448, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a9, - 0x01d5, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x021b, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a5, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0089, - 0x008a, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0126, 0x00ce, - 0x00a5, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0089, 0x0469, + 0x008a, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0131, 0x00ce, + 0x00a5, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0089, 0x04a3, 0x00cb, 0x00eb, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a5, 0x0008, 0x0008, 0x0008, 0x00ce, 0x0089, 0x00a3, 0x0008, - 0x0168, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00ca, 0x0008, + 0x01a4, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00ca, 0x0008, 0x00a5, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a0, 0x0089, - 0x0471, 0x0478, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x00aa, 0x0089, 0x0138, 0x00cc, 0x0008, 0x0008, 0x0008, 0x0008, + 0x04ab, 0x04b2, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x00aa, 0x0089, 0x015d, 0x00cc, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00ce, 0x0089, 0x00a5, 0x0008, 0x0008, - 0x00dc, 0x0147, 0x00ce, 0x0008, 0x0480, 0x00a3, 0x00a3, 0x0008, + 0x00a4, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x019a, + 0x00dc, 0x016c, 0x00ce, 0x0008, 0x04ba, 0x00a3, 0x00a3, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a9, 0x0089, 0x008a, - 0x0008, 0x0008, 0x0138, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a4, + 0x0008, 0x0008, 0x015d, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a4, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00ce, - 0x018e, 0x0089, 0x00cd, 0x0008, 0x0008, 0x0401, 0x0008, 0x0008, + 0x01d4, 0x0089, 0x00cd, 0x0008, 0x0008, 0x0447, 0x0008, 0x0008, 0x0008, 0x0008, 0x00cd, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a0, 0x0089, 0x0008, 0x0008, 0x0008, 0x00a9, - 0x018c, 0x00a4, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00aa, + 0x01d2, 0x00a4, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00aa, 0x0089, 0x00a5, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x00af, 0x0483, 0x0489, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x00ad, 0x00ac, 0x03ef, 0x0008, 0x0008, 0x0008, - 0x00ad, 0x00a5, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a0, 0x0491, + 0x00af, 0x04bd, 0x04c3, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x00ad, 0x00ac, 0x0435, 0x0008, 0x0008, 0x0008, + 0x00ad, 0x00a5, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a0, 0x04cb, 0x00ce, 0x0008, 0x00ad, 0x00a4, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0498, 0x049e, 0x0089, 0x00a6, 0x0008, 0x0008, 0x0008, 0x0008, + 0x04d2, 0x04d8, 0x0089, 0x00a6, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00ce, 0x008a, 0x0089, 0x0008, 0x0008, 0x00ac, 0x0089, 0x0089, 0x00ad, 0x008a, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x04a6, 0x04ad, 0x04b4, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x04e0, 0x04e7, 0x04ee, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00a1, 0x00f9, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0476, 0x0008, 0x04b8, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x00aa, 0x015b, 0x00a5, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0222, 0x0222, 0x00a7, - 0x0089, 0x018e, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x04b0, 0x0008, 0x04f2, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x00aa, 0x0197, 0x00a5, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0268, 0x0268, 0x00a7, + 0x0089, 0x01d4, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x008a, 0x0008, 0x00ce, 0x00ad, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x00ce, 0x00a5, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x04c0, 0x0008, 0x00a6, 0x0008, 0x039a, 0x039a, 0x039a, 0x039a, - 0x039a, 0x039a, 0x039a, 0x0008, 0x039a, 0x039a, 0x03ac, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x039a, 0x03dd, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x039e, 0x04c8, 0x039a, 0x039a, - 0x039a, 0x039a, 0x03d3, 0x0008, 0x04cf, 0x0008, 0x0008, 0x04d7, - 0x0008, 0x03a8, 0x0008, 0x039a, 0x039a, 0x039a, 0x039a, 0x039a, - 0x039a, 0x039a, 0x03a4, 0x0008, 0x0008, 0x0008, 0x0168, 0x04df, + 0x04fa, 0x0008, 0x00a6, 0x0008, 0x03e0, 0x03e0, 0x03e0, 0x03e0, + 0x03e0, 0x03e0, 0x03e0, 0x0008, 0x03e0, 0x03e0, 0x03f2, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x03e0, 0x0423, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x03e4, 0x0502, 0x03e0, 0x03e0, + 0x03e0, 0x03e0, 0x0419, 0x0008, 0x0509, 0x0008, 0x0008, 0x0511, + 0x0008, 0x03ee, 0x0008, 0x03e0, 0x03e0, 0x03e0, 0x03e0, 0x03e0, + 0x03e0, 0x03e0, 0x03ea, 0x0008, 0x0008, 0x0008, 0x01a4, 0x0519, 0x0008, 0x0008, 0x0008, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, - 0x018e, 0x0089, 0x0089, 0x008a, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x00a9, 0x01d9, 0x04e7, 0x04ec, 0x0116, - 0x00a4, 0x0008, 0x0008, 0x0008, 0x01d5, 0x0008, 0x0008, 0x0148, + 0x01d4, 0x0089, 0x0089, 0x008a, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x00a9, 0x021f, 0x0521, 0x0526, 0x052b, + 0x00a4, 0x0008, 0x0008, 0x0008, 0x021b, 0x0008, 0x0008, 0x016d, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x008a, 0x00a0, 0x0089, - 0x0089, 0x0089, 0x0089, 0x0089, 0x00a3, 0x00f1, 0x0008, 0x03f4, + 0x0089, 0x0089, 0x0089, 0x0089, 0x00a3, 0x00f1, 0x0008, 0x043a, 0x0008, 0x0008, 0x00a0, 0x00ad, 0x0089, 0x0008, 0x0008, 0x008a, - 0x0089, 0x0089, 0x04f3, 0x00b3, 0x00a5, 0x0008, 0x0008, 0x00ce, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0138, 0x0008, + 0x0089, 0x0089, 0x052d, 0x00b3, 0x00a5, 0x0008, 0x0008, 0x00ce, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x015d, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00aa, 0x0008, 0x0008, 0x008a, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x00aa, 0x00a5, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x04fb, 0x02ea, 0x02ea, - 0x02ea, 0x02ea, 0x02ea, 0x02ea, 0x02ea, 0x02ea, 0x02eb, 0x02ea, - 0x02ea, 0x02ea, 0x02ea, 0x02ea, 0x02ea, 0x0092, 0x0503, 0x0092, - 0x0092, 0x0092, 0x050b, 0x0092, 0x0092, 0x0092, 0x0092, 0x0092, - 0x0513, 0x051b, 0x051d, 0x0092, 0x0525, 0x052c, 0x0531, 0x0092, - 0x0534, 0x02ea, 0x02ea, 0x02ea, 0x02ea, 0x0539, 0x053f, 0x053f, - 0x053f, 0x0547, 0x02ea, 0x039a, 0x054f, 0x039a, 0x03bd, 0x0555, - 0x055a, 0x039a, 0x055d, 0x0565, 0x02ea, 0x02f4, 0x02ea, 0x02ea, - 0x02ea, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x0566, 0x02ed, 0x056e, - 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x056f, - 0x02f2, 0x02f2, 0x02f6, 0x02ea, 0x02f2, 0x02f2, 0x02f2, 0x02f2, - 0x0575, 0x02f6, 0x02ea, 0x02f2, 0x02f2, 0x057c, 0x0584, 0x02f2, - 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f3, 0x058c, - 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, - 0x058f, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02f2, - 0x0596, 0x0241, 0x059e, 0x02f2, 0x02f2, 0x02f2, 0x02ea, 0x02ea, - 0x0312, 0x02ea, 0x02ea, 0x0560, 0x02ea, 0x04fb, 0x02ea, 0x02ea, - 0x02ea, 0x02ea, 0x02ea, 0x02ea, 0x02ea, 0x02ef, 0x02f2, 0x02f2, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x02f4, 0x04fb, - 0x0591, 0x02ee, 0x02ea, 0x0562, 0x02ee, 0x02f5, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x05a6, 0x02ea, 0x0008, 0x0008, - 0x0386, 0x02ea, 0x02f2, 0x02f6, 0x0566, 0x02ea, 0x0008, 0x05a6, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x02ea, 0x0008, - 0x05a8, 0x0008, 0x0008, 0x0008, 0x0008, 0x02ea, 0x0008, 0x0008, - 0x0008, 0x0241, 0x02ea, 0x02ea, 0x0008, 0x05b0, 0x02f2, 0x02f2, - 0x02f2, 0x02f2, 0x02f2, 0x05b5, 0x05b9, 0x02f2, 0x02f2, 0x02f2, - 0x02f2, 0x02f2, 0x02f2, 0x02f2, 0x02ea, 0x02ea, 0x02ea, 0x02ea, - 0x02ea, 0x02ea, 0x02f2, 0x02f5, 0x02f2, 0x0566, 0x02f2, 0x02f2, - 0x02f2, 0x02f2, 0x02f2, 0x056e, 0x02f4, 0x02ec, 0x02f2, 0x02f6, - 0x02f2, 0x0566, 0x02f2, 0x0566, 0x02ea, 0x02ea, 0x02ea, 0x02ea, - 0x02ea, 0x02ea, 0x02ea, 0x0309, 0x05c1, 0x0000, 0x0000, 0x0000, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0535, 0x0330, 0x0330, + 0x0330, 0x0330, 0x0330, 0x0330, 0x0330, 0x0330, 0x0331, 0x0330, + 0x0330, 0x0330, 0x0330, 0x0330, 0x0330, 0x0092, 0x053d, 0x0092, + 0x0092, 0x0092, 0x0545, 0x0092, 0x0092, 0x0092, 0x0092, 0x0092, + 0x054d, 0x0555, 0x0557, 0x0092, 0x055f, 0x0566, 0x056b, 0x0092, + 0x056e, 0x0330, 0x0330, 0x0330, 0x0330, 0x0573, 0x0579, 0x0579, + 0x0579, 0x0581, 0x0330, 0x03e0, 0x0589, 0x03e0, 0x0403, 0x058f, + 0x0594, 0x03e0, 0x0597, 0x059f, 0x0330, 0x033a, 0x0330, 0x0330, + 0x0330, 0x0338, 0x0338, 0x0338, 0x0338, 0x05a0, 0x0333, 0x05a8, + 0x0338, 0x0338, 0x0338, 0x0338, 0x0338, 0x0338, 0x0338, 0x05a9, + 0x0338, 0x0338, 0x033c, 0x0330, 0x0338, 0x0338, 0x0338, 0x0338, + 0x05af, 0x033c, 0x0330, 0x0338, 0x0338, 0x05b6, 0x05be, 0x0338, + 0x0338, 0x0338, 0x0338, 0x0338, 0x0338, 0x0338, 0x0339, 0x05c6, + 0x0338, 0x0338, 0x0338, 0x0338, 0x0338, 0x0338, 0x0338, 0x0338, + 0x05c9, 0x0338, 0x0338, 0x0338, 0x0338, 0x0338, 0x0338, 0x0338, + 0x05d0, 0x0287, 0x05d8, 0x0338, 0x0338, 0x0338, 0x0330, 0x0330, + 0x0358, 0x0330, 0x0330, 0x059a, 0x0330, 0x0535, 0x0330, 0x0330, + 0x0330, 0x0330, 0x0330, 0x0330, 0x0330, 0x0335, 0x0338, 0x0338, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x033a, 0x0535, + 0x05cb, 0x0334, 0x0330, 0x059c, 0x0334, 0x033b, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x05e0, 0x0330, 0x0008, 0x0008, + 0x03cc, 0x0330, 0x0338, 0x033c, 0x05a0, 0x0330, 0x0008, 0x05e0, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0330, 0x0008, + 0x05e2, 0x0008, 0x0008, 0x0008, 0x0008, 0x0330, 0x0008, 0x0008, + 0x0008, 0x0287, 0x0330, 0x0330, 0x0008, 0x05ea, 0x0338, 0x0338, + 0x0338, 0x0338, 0x0338, 0x05ef, 0x05f3, 0x0338, 0x0338, 0x0338, + 0x0338, 0x0338, 0x0338, 0x0338, 0x0330, 0x0330, 0x0330, 0x0330, + 0x0330, 0x0330, 0x0338, 0x033b, 0x0338, 0x05a0, 0x0338, 0x0338, + 0x0338, 0x0338, 0x0338, 0x05a8, 0x033a, 0x0332, 0x0338, 0x033c, + 0x0338, 0x05a0, 0x0338, 0x05a0, 0x0330, 0x0330, 0x0330, 0x0330, + 0x0330, 0x0330, 0x0330, 0x034f, 0x05fb, 0x0000, 0x0000, 0x0000, 0x0089, 0x0089, 0x0089, 0x0089, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0000, 0x0000, 0x0092, 0x0092, 0x0092, 0x0092, - 0x0092, 0x0092, 0x0092, 0x05c9, + 0x0092, 0x0092, 0x0092, 0x0603, }; -static constexpr uint8_t s_stage4[] = { +static constexpr uint8_t s_stage3[] = { 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x41, 0x40, 0xc0, 0x40, 0x40, 0xc0, 0x40, 0x40, - 0xc0, 0x4b, 0xc0, 0x40, 0x40, 0x01, 0xcb, 0x40, + 0xc0, 0x4c, 0xc0, 0x40, 0x40, 0x01, 0xcc, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xc0, 0x40, 0x40, 0x40, 0x40, 0x40, 0xc0, 0xc0, @@ -681,215 +365,251 @@ static constexpr uint8_t s_stage4[] = { 0x40, 0xc0, 0x40, 0x40, 0x40, 0x40, 0xc0, 0x40, 0x40, 0xc0, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0x40, 0xc0, 0x40, - 0xc0, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, - 0x07, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, + 0xc0, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, - 0x40, 0x40, 0x40, 0x07, 0x07, 0x07, 0x07, 0x07, - 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x07, 0x07, - 0x07, 0x07, 0x07, 0x07, 0x07, 0x40, 0x07, 0x07, - 0x40, 0x07, 0x07, 0x40, 0x07, 0x09, 0x09, 0x09, - 0x09, 0x09, 0x09, 0x40, 0x40, 0x07, 0x07, 0x07, - 0x40, 0x01, 0x40, 0x40, 0x40, 0x07, 0x40, 0x40, - 0x40, 0x40, 0x40, 0x40, 0x40, 0x07, 0x07, 0x07, - 0x07, 0x07, 0x09, 0x40, 0x07, 0x07, 0x07, 0x07, - 0x07, 0x40, 0x40, 0x07, 0x40, 0x07, 0x07, 0x07, - 0x07, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - 0x09, 0x40, 0x40, 0x40, 0x40, 0x40, 0x07, 0x40, - 0x40, 0x07, 0x07, 0x40, 0x07, 0x07, 0x07, 0x07, - 0x07, 0x40, 0x07, 0x07, 0x07, 0x40, 0x40, 0x40, - 0x40, 0x09, 0x09, 0x40, 0x40, 0x40, 0x40, 0x40, - 0x40, 0x07, 0x07, 0x09, 0x07, 0x07, 0x07, 0x07, - 0x07, 0x40, 0x40, 0x07, 0x07, 0x07, 0x40, 0x07, - 0x07, 0x07, 0x07, 0x07, 0x0a, 0x07, 0x07, 0x40, - 0x40, 0x07, 0x07, 0x40, 0x40, 0x40, 0x40, 0x07, - 0x40, 0x07, 0x07, 0x40, 0x40, 0x07, 0x07, 0x0a, - 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x07, 0x40, - 0x07, 0x07, 0x07, 0x40, 0x40, 0x40, 0x40, 0x07, - 0x40, 0x40, 0x07, 0x07, 0x07, 0x40, 0x40, 0x40, - 0x07, 0x40, 0x40, 0x07, 0x07, 0x40, 0x07, 0x07, - 0x0a, 0x40, 0x40, 0x07, 0x07, 0x07, 0x40, 0x40, - 0x40, 0x07, 0x07, 0x40, 0x07, 0x07, 0x07, 0x0a, - 0x40, 0x40, 0x40, 0x40, 0x40, 0x07, 0x07, 0x40, - 0x07, 0x07, 0x07, 0x0a, 0x49, 0x40, 0x40, 0x07, - 0x40, 0x40, 0x40, 0x40, 0x07, 0x07, 0x07, 0x07, - 0x07, 0x40, 0x07, 0x40, 0x47, 0x07, 0x07, 0x07, - 0x07, 0x40, 0x40, 0x40, 0x00, 0x00, 0x07, 0x07, - 0x07, 0x07, 0x07, 0x07, 0x40, 0x40, 0x40, 0x40, - 0x07, 0x07, 0x40, 0x00, 0x00, 0x00, 0x40, 0x40, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x40, - 0x07, 0x00, 0x07, 0x07, 0x07, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x07, 0x40, 0x00, 0x40, 0x40, 0x00, - 0x00, 0x00, 0x07, 0x40, 0x40, 0x82, 0x82, 0x82, - 0x82, 0x82, 0x82, 0x82, 0x82, 0x43, 0x43, 0x43, - 0x43, 0x43, 0x43, 0x43, 0x43, 0x44, 0x44, 0x44, - 0x44, 0x44, 0x44, 0x44, 0x44, 0x40, 0x40, 0x07, - 0x07, 0x07, 0x07, 0x40, 0x40, 0x40, 0x07, 0x07, - 0x07, 0x01, 0x07, 0x00, 0x07, 0x00, 0x00, 0x07, - 0x07, 0x07, 0x40, 0x40, 0x40, 0x01, 0x07, 0x08, - 0x01, 0x01, 0xc0, 0x40, 0x40, 0xc0, 0xc0, 0xc0, - 0xc0, 0x40, 0x40, 0xc0, 0xc0, 0x40, 0x40, 0x41, - 0x41, 0x01, 0x01, 0x01, 0x01, 0x01, 0x40, 0xc0, - 0x40, 0xc0, 0xc0, 0x40, 0xc0, 0x40, 0x40, 0x40, - 0xc0, 0x4b, 0x40, 0xc0, 0x40, 0x4b, 0x40, 0x40, - 0x40, 0x40, 0x40, 0x40, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x41, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x40, 0x40, 0x40, 0xc0, 0x40, 0xc0, - 0x40, 0x40, 0xc0, 0xcb, 0x40, 0x40, 0x40, 0xc0, - 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0xcb, 0xcb, 0xcb, - 0xcb, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x4b, - 0x4b, 0x40, 0x40, 0x40, 0x40, 0x40, 0xc0, 0x40, - 0xc0, 0x40, 0x40, 0x40, 0xc0, 0x40, 0x40, 0xc0, - 0x40, 0x40, 0x40, 0xc0, 0x40, 0x40, 0xc0, 0xc0, - 0xc0, 0xc0, 0xc0, 0x40, 0xc0, 0x40, 0x40, 0x40, - 0xc0, 0x40, 0x40, 0x40, 0xc0, 0xc0, 0x40, 0x40, - 0xc0, 0xc0, 0xc0, 0xc0, 0x40, 0x40, 0x8b, 0x8b, - 0x40, 0x40, 0x40, 0x40, 0x4b, 0x80, 0x80, 0x40, - 0x40, 0x40, 0x40, 0x40, 0x4b, 0x40, 0x40, 0x40, - 0x40, 0x40, 0x40, 0x40, 0x4b, 0x40, 0x8b, 0x8b, - 0x8b, 0x8b, 0x4b, 0x4b, 0x4b, 0x8b, 0x4b, 0x4b, - 0x8b, 0x40, 0x40, 0x40, 0x40, 0x4b, 0x4b, 0x4b, - 0x40, 0x40, 0x40, 0x40, 0x40, 0xc0, 0xc0, 0xcb, - 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x4b, 0x4b, 0x40, - 0x40, 0x40, 0x40, 0xc0, 0xc0, 0x40, 0x40, 0xcb, - 0xc0, 0x40, 0x40, 0x40, 0x40, 0xc0, 0xc0, 0x40, - 0x40, 0xc0, 0x40, 0x40, 0xc0, 0xc0, 0x40, 0x40, - 0x40, 0x4b, 0x4b, 0x8b, 0x8b, 0x40, 0x4b, 0x4b, - 0x4b, 0x4b, 0x4b, 0xcb, 0xc0, 0x4b, 0xcb, 0x4b, - 0x4b, 0x4b, 0x4b, 0xcb, 0xcb, 0x4b, 0x4b, 0x4b, - 0x40, 0x8b, 0x8b, 0x4b, 0x4b, 0x4b, 0x4b, 0xcb, - 0x4b, 0xcb, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, - 0x4b, 0x4b, 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, - 0x8b, 0x8b, 0x4b, 0x4b, 0x4b, 0x4b, 0xcb, 0xcb, - 0x4b, 0xcb, 0xcb, 0xcb, 0x4b, 0xcb, 0xcb, 0x4b, - 0xcb, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x40, - 0x40, 0x4b, 0x4b, 0x4b, 0x8b, 0x4b, 0x4b, 0x4b, - 0x4b, 0x4b, 0x4b, 0xcb, 0xcb, 0x4b, 0x4b, 0x8b, - 0x8b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x8b, 0x8b, - 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0x8b, 0xcb, - 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0x8b, - 0x8b, 0xcb, 0x8b, 0xcb, 0xcb, 0x8b, 0xcb, 0xcb, - 0x8b, 0xcb, 0xcb, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, - 0x8b, 0x40, 0x40, 0x4b, 0x4b, 0x4b, 0x40, 0x4b, - 0x40, 0x4b, 0x40, 0x8b, 0x40, 0x40, 0x40, 0x40, - 0x40, 0x40, 0x40, 0x4b, 0x40, 0x40, 0x4b, 0x40, - 0x40, 0x40, 0x40, 0x8b, 0x40, 0x8b, 0x40, 0x40, - 0x40, 0x8b, 0x8b, 0x8b, 0x40, 0x8b, 0x40, 0x40, - 0x40, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x40, 0x40, - 0x40, 0x40, 0x40, 0x8b, 0x8b, 0x8b, 0x40, 0x40, - 0x40, 0x40, 0x40, 0x40, 0x40, 0x8b, 0x40, 0x40, - 0x40, 0x40, 0x40, 0x4b, 0x4b, 0x4b, 0x40, 0x40, - 0x40, 0x8b, 0x8b, 0x40, 0x40, 0x40, 0x40, 0x8b, - 0xc0, 0xc0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x40, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x40, 0x40, 0x40, 0x40, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x40, 0x40, 0x80, 0x80, 0x07, 0x07, - 0x07, 0x07, 0x07, 0x07, 0x8b, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x8b, 0x80, 0x40, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x40, 0x07, - 0x07, 0x80, 0x80, 0x80, 0x80, 0x80, 0x40, 0x40, - 0x40, 0x40, 0x40, 0x80, 0x80, 0x80, 0x40, 0x40, - 0x40, 0x40, 0x40, 0x40, 0x40, 0x80, 0x8b, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x40, 0x40, 0x07, - 0x40, 0x40, 0x40, 0x07, 0x40, 0x40, 0x40, 0x40, - 0x07, 0x40, 0x40, 0x40, 0x82, 0x82, 0x82, 0x82, - 0x82, 0x40, 0x40, 0x40, 0x40, 0x07, 0x07, 0x40, - 0x40, 0x40, 0x00, 0x07, 0x00, 0x40, 0x40, 0x07, - 0x40, 0x07, 0x07, 0x07, 0x40, 0x40, 0x07, 0x40, - 0x40, 0x40, 0x40, 0x40, 0x07, 0x07, 0x07, 0x40, - 0x07, 0x07, 0x40, 0x40, 0x85, 0x86, 0x86, 0x86, - 0x86, 0x86, 0x86, 0x86, 0x86, 0x85, 0x86, 0x86, - 0x86, 0x86, 0x40, 0x40, 0x40, 0x40, 0x43, 0x43, - 0x43, 0x43, 0x43, 0x43, 0x43, 0x40, 0x40, 0x40, - 0x44, 0x44, 0x44, 0x44, 0x44, 0x40, 0x40, 0x40, - 0x40, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, - 0x87, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - 0x01, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x47, - 0x47, 0x41, 0x01, 0x01, 0x01, 0x40, 0xc0, 0x40, - 0x40, 0x07, 0x07, 0x07, 0x40, 0x40, 0x09, 0x40, - 0x40, 0x07, 0x40, 0x49, 0x49, 0x40, 0x40, 0x40, - 0x40, 0x07, 0x07, 0x07, 0x07, 0x40, 0x07, 0x07, - 0x40, 0x40, 0x07, 0x07, 0x40, 0x40, 0x07, 0x07, - 0x07, 0x07, 0x49, 0x07, 0x07, 0x40, 0x40, 0x40, - 0x40, 0x07, 0x07, 0x49, 0x07, 0x07, 0x07, 0x07, - 0x40, 0x40, 0x40, 0x40, 0x49, 0x49, 0x49, 0x49, - 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x40, 0x07, - 0x07, 0x07, 0x07, 0x07, 0x07, 0x40, 0x40, 0x07, - 0x40, 0x07, 0x07, 0x40, 0x07, 0x07, 0x07, 0x07, - 0x07, 0x07, 0x49, 0x07, 0x40, 0x40, 0x40, 0x40, - 0x80, 0x80, 0x80, 0x80, 0x07, 0x40, 0x40, 0x40, - 0x80, 0x80, 0x80, 0x80, 0x40, 0x80, 0x80, 0x40, - 0x40, 0x80, 0x40, 0x40, 0x40, 0x40, 0x40, 0x80, - 0x80, 0x80, 0x40, 0x40, 0x80, 0x40, 0x40, 0x01, - 0x01, 0x01, 0x01, 0x40, 0x40, 0x40, 0x40, 0x07, - 0x07, 0x07, 0x01, 0x01, 0x01, 0x01, 0x01, 0x07, - 0x07, 0x07, 0x07, 0x07, 0x40, 0x40, 0x07, 0x07, - 0x07, 0x07, 0x07, 0x4b, 0x4b, 0x4b, 0x4b, 0x8b, - 0x4b, 0x4b, 0x4b, 0xc0, 0xc0, 0xc0, 0x40, 0x40, - 0x4b, 0x4b, 0x4b, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, - 0xc0, 0x40, 0x4b, 0xc0, 0xc0, 0x40, 0x40, 0x4b, - 0x4b, 0x4b, 0x4b, 0xcb, 0xcb, 0xc0, 0xc0, 0xc0, - 0xc0, 0xc0, 0xc0, 0xcb, 0xcb, 0xc0, 0xc0, 0xc0, - 0xc0, 0xc0, 0xc0, 0x8b, 0xc0, 0x8b, 0x8b, 0x8b, - 0x8b, 0x8b, 0x8b, 0x8b, 0xc0, 0xc0, 0xc0, 0xc0, - 0xc0, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4c, - 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x80, - 0x8b, 0x8b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x80, - 0x80, 0x8b, 0x80, 0x80, 0x80, 0x80, 0x80, 0x8b, - 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, 0x80, 0x4b, 0x4b, - 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x8b, 0x8b, 0x4b, - 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x8b, 0x8b, - 0x8b, 0x8b, 0x8b, 0x8b, 0x4b, 0x8b, 0x8b, 0x8b, - 0x4b, 0x4b, 0x4b, 0x4b, 0x8b, 0x4b, 0x4b, 0x4b, - 0x8b, 0x4b, 0x4b, 0x4b, 0x8b, 0x8b, 0x8b, 0x87, - 0x87, 0x87, 0x87, 0x87, 0x8b, 0x4b, 0x8b, 0x8b, - 0x8b, 0x8b, 0x8b, 0x8b, 0x4b, 0x4b, 0x8b, 0x8b, - 0x8b, 0x8b, 0x8b, 0x8b, 0x40, 0x40, 0x4b, 0x4b, - 0x4b, 0x8b, 0x8b, 0x8b, 0x8b, 0x4b, 0x40, 0x40, - 0x40, 0x40, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, - 0x40, 0x40, 0x40, 0x40, 0x8b, 0x8b, 0x8b, 0x8b, - 0x40, 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, 0x40, - 0x8b, 0x41, 0x01, 0x41, 0x41, 0x41, 0x41, 0x41, - 0x41, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x40, - 0x40, + 0x40, 0x40, 0x40, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x40, 0x02, 0x02, + 0x40, 0x02, 0x02, 0x40, 0x02, 0x04, 0x04, 0x04, + 0x04, 0x04, 0x04, 0x40, 0x40, 0x02, 0x02, 0x02, + 0x40, 0x01, 0x40, 0x40, 0x40, 0x02, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x04, 0x40, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x40, 0x40, 0x02, 0x40, 0x02, 0x02, 0x02, + 0x02, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x04, 0x40, 0x40, 0x40, 0x40, 0x40, 0x02, 0x40, + 0x40, 0x02, 0x02, 0x40, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x40, 0x02, 0x02, 0x02, 0x40, 0x40, 0x40, + 0x40, 0x04, 0x04, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x02, 0x02, 0x04, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x40, 0x40, 0x40, 0x40, 0x40, 0x4b, 0x4b, + 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x02, 0x02, + 0x02, 0x40, 0x02, 0x02, 0x02, 0x02, 0x02, 0x0a, + 0x02, 0x02, 0x40, 0x40, 0x02, 0x02, 0x40, 0x40, + 0x40, 0x40, 0x4b, 0x40, 0x4b, 0x4b, 0x4b, 0x4b, + 0x4b, 0x4b, 0x40, 0x4b, 0x40, 0x40, 0x40, 0x4b, + 0x4b, 0x40, 0x40, 0x02, 0x40, 0x02, 0x02, 0x40, + 0x40, 0x02, 0x02, 0x0a, 0x40, 0x40, 0x40, 0x40, + 0x4b, 0x4b, 0x40, 0x4b, 0x4b, 0x40, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x02, 0x40, 0x02, 0x02, 0x02, + 0x40, 0x40, 0x40, 0x40, 0x02, 0x40, 0x40, 0x02, + 0x02, 0x02, 0x40, 0x40, 0x40, 0x02, 0x40, 0x40, + 0x4b, 0x40, 0x4b, 0x4b, 0x40, 0x4b, 0x4b, 0x4b, + 0x02, 0x02, 0x40, 0x02, 0x02, 0x0a, 0x40, 0x40, + 0x4b, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x40, + 0x4b, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x02, + 0x02, 0x02, 0x40, 0x40, 0x40, 0x02, 0x02, 0x40, + 0x02, 0x02, 0x02, 0x0a, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x02, 0x02, 0x40, 0x4b, 0x4b, 0x4b, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x02, 0x40, 0x02, 0x02, + 0x02, 0x0a, 0x44, 0x40, 0x40, 0x02, 0x40, 0x40, + 0x40, 0x40, 0x02, 0x02, 0x02, 0x02, 0x02, 0x40, + 0x02, 0x40, 0x42, 0x02, 0x02, 0x02, 0x02, 0x40, + 0x40, 0x40, 0x00, 0x00, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x40, 0x40, 0x40, 0x40, 0x02, 0x02, + 0x40, 0x00, 0x00, 0x00, 0x40, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x40, 0x40, 0x02, 0x00, + 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x40, 0x40, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x40, 0x85, 0x85, 0x85, 0x85, 0x85, + 0x85, 0x85, 0x85, 0x46, 0x46, 0x46, 0x46, 0x46, + 0x46, 0x46, 0x46, 0x47, 0x47, 0x47, 0x47, 0x47, + 0x47, 0x47, 0x47, 0x40, 0x40, 0x02, 0x02, 0x02, + 0x02, 0x40, 0x40, 0x40, 0x02, 0x02, 0x02, 0x01, + 0x02, 0x00, 0x02, 0x00, 0x00, 0x02, 0x02, 0x02, + 0x40, 0x40, 0x40, 0x01, 0x02, 0x0d, 0x01, 0x01, + 0xc0, 0x40, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, 0x40, + 0x40, 0xc0, 0xc0, 0x40, 0x40, 0x41, 0x41, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x40, 0xc0, 0x40, 0xc0, + 0xc0, 0x40, 0xc0, 0x40, 0x40, 0x40, 0xc0, 0x4c, + 0x40, 0xc0, 0x40, 0x4c, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x40, 0x01, 0x01, 0x01, 0x01, 0x01, 0x41, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x40, 0x40, 0x40, 0xc0, 0x40, 0xc0, 0x40, 0x40, + 0xc0, 0xcc, 0x40, 0x40, 0x40, 0xc0, 0x40, 0xc0, + 0xc0, 0xc0, 0xc0, 0xcc, 0xcc, 0xcc, 0xcc, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x4c, 0x4c, 0x40, + 0x40, 0x40, 0x40, 0x40, 0xc0, 0x40, 0xc0, 0x40, + 0x40, 0x40, 0xc0, 0x40, 0x40, 0xc0, 0x40, 0x40, + 0x40, 0xc0, 0x40, 0x40, 0xc0, 0xc0, 0xc0, 0xc0, + 0xc0, 0x40, 0xc0, 0x40, 0x40, 0x40, 0xc0, 0x40, + 0x40, 0x40, 0xc0, 0xc0, 0x40, 0x40, 0xc0, 0xc0, + 0xc0, 0xc0, 0x40, 0x40, 0x8c, 0x8c, 0x40, 0x40, + 0x40, 0x40, 0x4c, 0x80, 0x80, 0x40, 0x40, 0x40, + 0x40, 0x40, 0x4c, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x40, 0x4c, 0x40, 0x8c, 0x8c, 0x8c, 0x8c, + 0x4c, 0x4c, 0x4c, 0x8c, 0x4c, 0x4c, 0x8c, 0x40, + 0x40, 0x40, 0x40, 0x4c, 0x4c, 0x4c, 0x40, 0x40, + 0x40, 0x40, 0x40, 0xc0, 0xc0, 0xcc, 0xc0, 0xc0, + 0xc0, 0xc0, 0xc0, 0x4c, 0x4c, 0x40, 0x40, 0x40, + 0x40, 0xc0, 0xc0, 0x40, 0x40, 0xcc, 0xc0, 0x40, + 0x40, 0x40, 0x40, 0xc0, 0xc0, 0x40, 0x40, 0xc0, + 0x40, 0x40, 0xc0, 0xc0, 0x40, 0x40, 0x40, 0x4c, + 0x4c, 0x8c, 0x8c, 0x40, 0x4c, 0x4c, 0x4c, 0x4c, + 0x4c, 0xcc, 0xc0, 0x4c, 0xcc, 0x4c, 0x4c, 0x4c, + 0x4c, 0xcc, 0xcc, 0x4c, 0x4c, 0x4c, 0x40, 0x8c, + 0x8c, 0x4c, 0x4c, 0x4c, 0x4c, 0xcc, 0x4c, 0xcc, + 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, + 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, + 0x4c, 0x4c, 0x4c, 0x4c, 0xcc, 0xcc, 0x4c, 0xcc, + 0xcc, 0xcc, 0x4c, 0xcc, 0xcc, 0x4c, 0xcc, 0x4c, + 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x40, 0x40, 0x4c, + 0x4c, 0x4c, 0x8c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, + 0x4c, 0xcc, 0xcc, 0x4c, 0x4c, 0x8c, 0x8c, 0x4c, + 0x4c, 0x4c, 0x4c, 0x4c, 0x8c, 0x8c, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0x8c, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x8c, 0x8c, 0xcc, + 0x8c, 0xcc, 0xcc, 0x8c, 0xcc, 0xcc, 0x8c, 0xcc, + 0xcc, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x8c, 0x40, + 0x40, 0x4c, 0x4c, 0x4c, 0x40, 0x4c, 0x40, 0x4c, + 0x40, 0x8c, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x4c, 0x40, 0x40, 0x4c, 0x40, 0x40, 0x40, + 0x40, 0x8c, 0x40, 0x8c, 0x40, 0x40, 0x40, 0x8c, + 0x8c, 0x8c, 0x40, 0x8c, 0x40, 0x40, 0x40, 0x4c, + 0x4c, 0x4c, 0x4c, 0x4c, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x8c, 0x8c, 0x8c, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x8c, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x4c, 0x4c, 0x4c, 0x40, 0x40, 0x40, 0x8c, + 0x8c, 0x40, 0x40, 0x40, 0x40, 0x8c, 0xc0, 0xc0, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x40, 0x80, 0x80, 0x80, 0x80, 0x80, 0x40, 0x40, + 0x40, 0x40, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x40, 0x40, 0x80, 0x80, 0x02, 0x02, 0x02, 0x02, + 0x02, 0x02, 0x8c, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x8c, 0x80, 0x40, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x40, 0x02, 0x02, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x80, 0x80, 0x80, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x80, 0x8c, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x40, 0x40, 0x02, 0x40, 0x40, + 0x40, 0x02, 0x40, 0x40, 0x40, 0x40, 0x02, 0x40, + 0x40, 0x40, 0x85, 0x85, 0x85, 0x85, 0x85, 0x40, + 0x40, 0x40, 0x40, 0x02, 0x02, 0x40, 0x40, 0x40, + 0x00, 0x02, 0x00, 0x40, 0x40, 0x02, 0x40, 0x02, + 0x02, 0x02, 0x40, 0x40, 0x02, 0x40, 0x40, 0x40, + 0x40, 0x40, 0x02, 0x02, 0x02, 0x40, 0x02, 0x02, + 0x40, 0x40, 0x88, 0x89, 0x89, 0x89, 0x89, 0x89, + 0x89, 0x89, 0x89, 0x88, 0x89, 0x89, 0x89, 0x89, + 0x40, 0x40, 0x40, 0x40, 0x46, 0x46, 0x46, 0x46, + 0x46, 0x46, 0x46, 0x40, 0x40, 0x40, 0x47, 0x47, + 0x47, 0x47, 0x47, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x40, 0x01, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x42, 0x42, 0x41, 0x01, 0x01, 0x01, 0x40, + 0xc0, 0x40, 0x40, 0x02, 0x02, 0x02, 0x40, 0x40, + 0x04, 0x40, 0x40, 0x02, 0x40, 0x44, 0x44, 0x40, + 0x40, 0x40, 0x40, 0x02, 0x02, 0x02, 0x02, 0x40, + 0x02, 0x02, 0x40, 0x40, 0x02, 0x02, 0x40, 0x40, + 0x02, 0x02, 0x02, 0x02, 0x44, 0x02, 0x02, 0x40, + 0x40, 0x40, 0x40, 0x02, 0x02, 0x44, 0x02, 0x02, + 0x02, 0x02, 0x40, 0x40, 0x40, 0x40, 0x44, 0x44, + 0x44, 0x44, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x40, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x40, + 0x40, 0x02, 0x40, 0x02, 0x02, 0x40, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x44, 0x02, 0x40, 0x40, + 0x40, 0x40, 0x80, 0x80, 0x80, 0x80, 0x02, 0x40, + 0x40, 0x40, 0x80, 0x80, 0x80, 0x80, 0x40, 0x80, + 0x80, 0x40, 0x40, 0x80, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x80, 0x80, 0x80, 0x40, 0x40, 0x80, 0x40, + 0x40, 0x01, 0x01, 0x01, 0x01, 0x40, 0x40, 0x40, + 0x40, 0x02, 0x02, 0x02, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x40, 0x40, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x4c, 0x4c, 0x4c, + 0x4c, 0x8c, 0x4c, 0x4c, 0x4c, 0xc0, 0xc0, 0xc0, + 0x40, 0x40, 0x4c, 0x4c, 0x4c, 0xc0, 0xc0, 0xc0, + 0xc0, 0xc0, 0xc0, 0x40, 0x4c, 0xc0, 0xc0, 0x40, + 0x40, 0x4c, 0x4c, 0x4c, 0x4c, 0xcc, 0xcc, 0xc0, + 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xcc, 0xcc, 0xc0, + 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x8c, 0xc0, 0x8c, + 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0xc0, 0xc0, + 0xc0, 0xc0, 0xc0, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, + 0x4c, 0x43, 0x43, 0x43, 0x43, 0x43, 0x43, 0x43, + 0x43, 0x80, 0x8c, 0x8c, 0x4c, 0x4c, 0x4c, 0x4c, + 0x4c, 0x80, 0x80, 0x8c, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x80, + 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x8c, + 0x8c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, + 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x4c, 0x8c, + 0x8c, 0x8c, 0x4c, 0x4c, 0x4c, 0x4c, 0x8c, 0x4c, + 0x4c, 0x4c, 0x8c, 0x4c, 0x4c, 0x4c, 0x8c, 0x8c, + 0x8c, 0x82, 0x82, 0x82, 0x82, 0x82, 0x8c, 0x4c, + 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x4c, 0x4c, + 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x40, 0x40, + 0x4c, 0x4c, 0x4c, 0x8c, 0x8c, 0x8c, 0x8c, 0x4c, + 0x40, 0x40, 0x40, 0x40, 0x4c, 0x4c, 0x4c, 0x4c, + 0x4c, 0x4c, 0x40, 0x40, 0x40, 0x40, 0x8c, 0x8c, + 0x8c, 0x8c, 0x40, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, + 0x8c, 0x40, 0x8c, 0x41, 0x01, 0x41, 0x41, 0x41, + 0x41, 0x41, 0x41, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, + 0xc0, 0x40, 0x40, }; -static constexpr uint16_t s_joinRules[] = { - 0b0000010110000000, - 0b0000000000000000, - 0b0000010111101100, - 0b0000010110011000, - 0b0000010110010000, - 0b0000010110011000, - 0b0000010110010000, - 0b0000010110000000, - 0b0000110110000000, - 0b0001111111111101, - 0b0001111111111101, - 0b0000010110000000, - 0b0001010110000000, - 0b0000000000000000, - 0b0000000000000000, - 0b0000000000000000, +static constexpr uint32_t s_joinRules[2][16] = { + { + 0b00000011110011111111111111001111, + 0b00001111111111111111111111111111, + 0b00000011110011111111111111001111, + 0b00000011110011111111111101001111, + 0b00000000000000000000000000001100, + 0b00000011110000001100001111001111, + 0b00000011110011110000111111001111, + 0b00000011110011110011111111001111, + 0b00000011110011110000111111001111, + 0b00000011110011110011111111001111, + 0b00000011000011111111111111001111, + 0b00000011110011111111111111001111, + 0b00000011110011111111111111001111, + 0b00000000110011111111111111001111, + 0b00000000000000000000000000000000, + 0b00000000000000000000000000000000, + }, + { + 0b00000011110011111111111111001111, + 0b00001111111111111111111111111111, + 0b00000011110011111111111111001111, + 0b00000011110011111111111111001111, + 0b00000000000000000000000000001100, + 0b00000011110000001100001111001111, + 0b00000011110011110000111111001111, + 0b00000011110011110011111111001111, + 0b00000011110011110000111111001111, + 0b00000011110011110011111111001111, + 0b00000011000011111111111111001111, + 0b00000011110011111111111111001111, + 0b00000011110011111111111111001111, + 0b00000000110011111111111111001111, + 0b00000000000000000000000000000000, + 0b00000000000000000000000000000000, + }, }; constexpr uint8_t ucdLookup(const char32_t cp) noexcept { - const auto s1 = s_stage1[cp >> 11]; - const auto s2 = s_stage2[s1 + ((cp >> 6) & 31)]; - const auto s3 = s_stage3[s2 + ((cp >> 3) & 7)]; - const auto s4 = s_stage4[s3 + ((cp >> 0) & 7)]; - return s4; + const auto s0 = s_stage0[cp >> 11]; + const auto s1 = s_stage1[s0 + ((cp >> 6) & 31)]; + const auto s2 = s_stage2[s1 + ((cp >> 3) & 7)]; + const auto s3 = s_stage3[s2 + ((cp >> 0) & 7)]; + return s3; } -constexpr bool ucdGraphemeJoins(const uint8_t lead, const uint8_t trail) noexcept +constexpr uint8_t ucdGraphemeJoins(const uint8_t state, const uint8_t lead, const uint8_t trail) noexcept { - return s_joinRules[lead & 15] & (1 << (trail & 15)); + const auto l = lead & 15; + const auto t = trail & 15; + return (s_joinRules[state][l] >> (t * 2)) & 3; } -constexpr int ucdToCharacterWidth(const uint8_t val) noexcept +constexpr bool ucdGraphemeDone(const uint8_t state) noexcept +{ + return state == 3; +} +constexpr uint8_t ucdToCharacterWidth(const uint8_t val) noexcept { return val >> 6; } // clang-format on -[[msvc::forceinline]] constexpr std::wstring_view::iterator utf16NextOrFFFD(std::wstring_view::iterator it, const std::wstring_view::iterator& end, char32_t& out) +[[msvc::forceinline]] constexpr const wchar_t* utf16NextOrFFFD(const wchar_t* it, const wchar_t* end, char32_t& out) { + __assume(it != nullptr); + __assume(end != nullptr); + char32_t c = *it++; // Is any surrogate? @@ -915,8 +635,11 @@ constexpr int ucdToCharacterWidth(const uint8_t val) noexcept return it; } -[[msvc::forceinline]] constexpr std::wstring_view::iterator utf16PrevOrFFFD(std::wstring_view::iterator it, const std::wstring_view::iterator& beg, char32_t& out) +[[msvc::forceinline]] constexpr const wchar_t* utf16PrevOrFFFD(const wchar_t* it, const wchar_t* beg, char32_t& out) { + __assume(it != nullptr); + __assume(beg != nullptr); + char32_t c = *--it; // Is any surrogate? @@ -949,190 +672,318 @@ CodepointWidthDetector& CodepointWidthDetector::Singleton() noexcept return s_codepointWidthDetector; } -size_t CodepointWidthDetector::GraphemeNext(const std::wstring_view& str, size_t offset, int* width) noexcept +bool CodepointWidthDetector::GraphemeNext(GraphemeState& s, const std::wstring_view& str) noexcept { - if constexpr (!Feature_Graphemes::IsEnabled()) + const auto beg = str.data(); + const auto end = beg + str.size(); + auto clusterBeg = s.beg + s.len; + + // If it's a new string argument, we'll restart at the new string's beginning. + if (clusterBeg < beg || clusterBeg > end) { - return _graphemeNextOld(str, offset, width); + clusterBeg = beg; } - int widthIgnored; - if (!width) + if (clusterBeg >= end) { - width = &widthIgnored; + return false; } - const auto beg = str.begin(); - const auto end = str.end(); - auto it = beg + std::min(offset, str.size()); - - if (it == end) + if (_mode != TextMeasurementMode::Graphemes) { - *width = 1; - return offset; + return _graphemeNextWcswidth(s, end, clusterBeg); } + auto clusterEnd = clusterBeg; + uint8_t state = s._state; + uint8_t totalWidth = 0; + uint8_t lead; char32_t cp; - it = utf16NextOrFFFD(it, end, cp); - auto lead = ucdLookup(cp); - int totalWidth = 0; + // The _state is stored ~flipped, so that we can differentiate + // between it being unset (0) and it being set to 0 (~0 = 255). + if (state) + { + state = ~state; + lead = s._last; + totalWidth = s._totalWidth; + goto fetchNext; + } + + clusterEnd = utf16NextOrFFFD(clusterEnd, end, cp); + lead = ucdLookup(cp); for (;;) { - auto w = ucdToCharacterWidth(lead); - if (w == 3) { - w = _checkFallbackViaCache(cp); - } + auto w = ucdToCharacterWidth(lead); + if (w == 3) + { + w = _ambiguousWidth; + } - totalWidth += w; + // U+FE0F Variation Selector-16 is used to turn unqualified Emojis into qualified ones. + // By convention, this turns them from being ambiguous width (= narrow) into wide ones. + // We achieve this here by explicitly giving this codepoint a wide width. + // Later down below we'll clamp totalWidth back to <= 2. + if (cp == 0xFE0F) + { + w = 2; + } + + totalWidth += w; + } - if (it == end) + if (clusterEnd >= end) { break; } - const auto it2 = utf16NextOrFFFD(it, end, cp); + fetchNext: + const auto clusterEndNext = utf16NextOrFFFD(clusterEnd, end, cp); const auto trail = ucdLookup(cp); + state = ucdGraphemeJoins(state, lead, trail); - if (!ucdGraphemeJoins(lead, trail)) + if (ucdGraphemeDone(state)) { + state = 255; + lead = 0; break; } - it = it2; + clusterEnd = clusterEndNext; lead = trail; } - *width = totalWidth < 1 ? 1 : (totalWidth > 2 ? 2 : totalWidth); - return it - beg; + state = ~state; + totalWidth = totalWidth > 2 ? 2 : totalWidth; + + s.beg = clusterBeg; + s.len = static_cast(clusterEnd - clusterBeg); + s.width = totalWidth < 1 ? 1 : totalWidth; + s._state = state; + s._last = lead; + s._totalWidth = totalWidth; + return state != 0; } // This code is identical to GraphemeNext() but with the order of operations reversed since we're iterating backwards. -size_t CodepointWidthDetector::GraphemePrev(const std::wstring_view& str, size_t offset, int* width) noexcept +bool CodepointWidthDetector::GraphemePrev(GraphemeState& s, const std::wstring_view& str) noexcept { - if constexpr (!Feature_Graphemes::IsEnabled()) + const auto beg = str.data(); + const auto end = beg + str.size(); + auto clusterEnd = s.beg; + + // If it's a new string argument, we'll restart at the new string's end. + if (clusterEnd < beg || clusterEnd > end) { - return _graphemePrevOld(str, offset, width); + clusterEnd = end; } - int widthIgnored; - if (!width) + if (clusterEnd <= beg) { - width = &widthIgnored; + return false; } - const auto beg = str.begin(); - auto it = beg + std::min(offset, str.size()); - - if (it == beg) + if (_mode != TextMeasurementMode::Graphemes) { - *width = 1; - return 0; + return _graphemePrevWcswidth(s, beg, clusterEnd); } + auto clusterBeg = clusterEnd; + uint8_t state = s._state; + uint8_t totalWidth = 0; + uint8_t trail; char32_t cp; - it = utf16PrevOrFFFD(it, beg, cp); - auto trail = ucdLookup(cp); - int totalWidth = 0; + // The _state is stored ~flipped, so that we can differentiate + // between it being unset (0) and it being set to 0 (~0 = 255). + if (state) + { + state = ~state; + trail = s._last; + totalWidth = s._totalWidth; + goto fetchNext; + } + + clusterBeg = utf16PrevOrFFFD(clusterBeg, beg, cp); + trail = ucdLookup(cp); for (;;) { - auto w = ucdToCharacterWidth(trail); - if (w == 3) { - w = _checkFallbackViaCache(cp); - } + auto w = ucdToCharacterWidth(trail); + if (w == 3) + { + w = _ambiguousWidth; + } - totalWidth += w; + // U+FE0F Variation Selector-16 is used to turn unqualified Emojis into qualified ones. + // By convention, this turns them from being ambiguous width (= narrow) into wide ones. + // We achieve this here by explicitly giving this codepoint a wide width. + // Later down below we'll clamp totalWidth back to <= 2. + if (cp == 0xFE0F) + { + w = 2; + } - if (it == beg) + totalWidth += w; + } + + if (clusterBeg <= beg) { break; } - const auto it2 = utf16PrevOrFFFD(it, beg, cp); + fetchNext: + const auto clusterBegNext = utf16PrevOrFFFD(clusterBeg, beg, cp); const auto lead = ucdLookup(cp); + state = ucdGraphemeJoins(state, lead, trail); - if (!ucdGraphemeJoins(lead, trail)) + if (ucdGraphemeDone(state)) { + state = 255; + trail = 0; break; } - it = it2; + clusterBeg = clusterBegNext; trail = lead; } - *width = totalWidth < 1 ? 1 : (totalWidth > 2 ? 2 : totalWidth); - return it - beg; + state = ~state; + totalWidth = totalWidth > 2 ? 2 : totalWidth; + + s.beg = clusterBeg; + s.len = static_cast(clusterEnd - clusterBeg); + s.width = totalWidth < 1 ? 1 : totalWidth; + s._state = state; + s._last = trail; + s._totalWidth = totalWidth; + return state != 0; } -__declspec(noinline) size_t CodepointWidthDetector::_graphemeNextOld(const std::wstring_view& str, size_t offset, int* width) noexcept +__declspec(noinline) bool CodepointWidthDetector::_graphemeNextWcswidth(GraphemeState& s, const wchar_t* end, const wchar_t* clusterBeg) noexcept { - const auto beg = str.begin(); - const auto end = str.end(); - auto it = beg + std::min(offset, str.size()); - - if (it == end) + if (_mode != TextMeasurementMode::Wcswidth) { - return 0; + return _graphemeNextConsole(s, end, clusterBeg); } - char32_t cp; - it = utf16NextOrFFFD(it, end, cp); + auto clusterEnd = clusterBeg; + uint8_t state = s._state; + uint8_t totalWidth = 0; - if (width) + for (;;) { - *width = _getWidthOld(cp); + char32_t cp; + const auto it2 = utf16NextOrFFFD(clusterEnd, end, cp); + const auto val = ucdLookup(cp); + + auto w = ucdToCharacterWidth(val); + if (w == 3) + { + w = _ambiguousWidth; + } + + if (state != 0 && w != 0) + { + state = 0; + break; + } + + state = 1; + totalWidth += w; + clusterEnd = it2; + + if (clusterEnd >= end) + { + break; + } } - return it - beg; + s.beg = clusterBeg; + s.len = static_cast(clusterEnd - clusterBeg); + s.width = totalWidth < 1 ? 1 : (totalWidth > 2 ? 2 : totalWidth); + s._state = state; + return state != 0; } -__declspec(noinline) size_t CodepointWidthDetector::_graphemePrevOld(const std::wstring_view& str, size_t offset, int* width) noexcept +__declspec(noinline) bool CodepointWidthDetector::_graphemePrevWcswidth(GraphemeState& s, const wchar_t* beg, const wchar_t* clusterEnd) noexcept { - const auto beg = str.begin(); - auto it = beg + std::min(offset, str.size()); + if (_mode != TextMeasurementMode::Wcswidth) + { + return _graphemeNextConsole(s, beg, clusterEnd); + } + + auto clusterBeg = clusterEnd; + int totalWidth = 0; - if (it == beg) + for (;;) { - return 0; + char32_t cp; + clusterBeg = utf16PrevOrFFFD(clusterBeg, beg, cp); + const auto val = ucdLookup(cp); + + auto w = ucdToCharacterWidth(val); + if (w == 3) + { + w = _ambiguousWidth; + } + + totalWidth += w; + + if (w != 0 || clusterBeg <= beg) + { + break; + } } + s.beg = clusterBeg; + s.len = static_cast(clusterEnd - clusterBeg); + s.width = totalWidth; + return totalWidth != 0; +} + +bool CodepointWidthDetector::_graphemeNextConsole(GraphemeState& s, const wchar_t* end, const wchar_t* clusterBeg) noexcept +{ char32_t cp; - it = utf16PrevOrFFFD(it, beg, cp); + const auto clusterEnd = utf16NextOrFFFD(clusterBeg, end, cp); - if (width) + const auto val = ucdLookup(cp); + auto width = ucdToCharacterWidth(val); + if (width == 3) { - *width = _getWidthOld(cp); + width = _checkFallbackViaCache(cp); } - return it - beg; + s.beg = clusterBeg; + s.len = static_cast(clusterEnd - clusterBeg); + s.width = width; + return clusterEnd < end; } -int CodepointWidthDetector::_getWidthOld(const char32_t codepoint) noexcept +bool CodepointWidthDetector::_graphemePrevConsole(GraphemeState& s, const wchar_t* beg, const wchar_t* clusterEnd) noexcept { -#pragma warning(suppress : 26447) // The function is declared 'noexcept' but calls function 'lower_bound<...>()' which may throw exceptions (f.6). - const auto it = std::lower_bound(s_wideAndAmbiguousTable.begin(), s_wideAndAmbiguousTable.end(), codepoint); - int width = 1; + char32_t cp; + const auto clusterBeg = utf16PrevOrFFFD(clusterEnd, beg, cp); - if (it != s_wideAndAmbiguousTable.end() && codepoint >= it->lowerBound && codepoint <= it->upperBound) + const auto val = ucdLookup(cp); + auto width = ucdToCharacterWidth(val); + if (width == 3) { - width = 2; - if (it->isAmbiguous) - { - width = _checkFallbackViaCache(codepoint); - } + width = _checkFallbackViaCache(cp); } - return width; + s.beg = clusterBeg; + s.len = static_cast(clusterEnd - clusterBeg); + s.width = width; + return clusterBeg > beg; } -// Call the function specified via SetFallbackMethod() to turn CodepointWidth::Ambiguous into Narrow/Wide. -// Caches the results in _fallbackCache. This is _lookupGlyphWidth's even-slower-path. -__declspec(noinline) int CodepointWidthDetector::_checkFallbackViaCache(const char32_t codepoint) noexcept +// Call the function specified via SetFallbackMethod() to turn ambiguous (width = 3) into narrow/wide. +// Caches the results in _fallbackCache. +__declspec(noinline) uint8_t CodepointWidthDetector::_checkFallbackViaCache(const char32_t codepoint) noexcept try { // Ambiguous glyphs are considered narrow by default. See microsoft/terminal#2066 for more info. @@ -1147,7 +998,7 @@ try } wchar_t buf[2]; - size_t len = 0; + size_t len; if (codepoint <= 0xffff) { buf[0] = static_cast(codepoint); @@ -1170,6 +1021,11 @@ catch (...) return 1; } +TextMeasurementMode CodepointWidthDetector::GetMode() const noexcept +{ + return _mode; +} + // Method Description: // - Sets a function that should be used as the fallback mechanism for // determining a particular glyph's width, should the glyph be an ambiguous @@ -1185,16 +1041,8 @@ void CodepointWidthDetector::SetFallbackMethod(std::function -// Return Value: -// - -void CodepointWidthDetector::ClearFallbackCache() noexcept +void CodepointWidthDetector::Reset(const TextMeasurementMode mode) noexcept { -#pragma warning(suppress : 26447) // The function is declared 'noexcept' but calls function 'clear()' which may throw exceptions (f.6). + _mode = mode; _fallbackCache.clear(); } diff --git a/src/types/CodepointWidthDetector_gen.go b/src/types/CodepointWidthDetector_gen.go deleted file mode 100644 index ad58a5a39c2..00000000000 --- a/src/types/CodepointWidthDetector_gen.go +++ /dev/null @@ -1,557 +0,0 @@ -package main - -import ( - "bytes" - "encoding/xml" - "fmt" - "math" - "os" - "slices" - "strconv" - "strings" - "time" - "unsafe" -) - -type CharacterWidth int - -const ( - cwZeroWidth CharacterWidth = iota - cwNarrow - cwWide - cwAmbiguous -) - -type ClusterBreak int - -const ( - cbOther ClusterBreak = iota // GB999 - cbControl // GB3, GB4, GB5 - cbHangulL // GB6, GB7, GB8 - cbHangulV // GB6, GB7, GB8 - cbHangulT // GB6, GB7, GB8 - cbHangulLV // GB6, GB7, GB8 - cbHangulLVT // GB6, GB7, GB8 - cbExtend // GB9, GB9a - cbZeroWidthJoiner // GB9, GB11 - cbPrepend // GB9b - cbConjunctLinker // GB9c - cbExtendedPictographic // GB11 - cbRegionalIndicator // GB12, GB13 - - cbCount -) - -type HexInt int - -func (h *HexInt) UnmarshalXMLAttr(attr xml.Attr) error { - v, err := strconv.ParseUint(attr.Value, 16, 32) - if err != nil { - return err - } - *h = HexInt(v) - return nil -} - -type UCD struct { - Description string `xml:"description"` - Repertoire struct { - Group []struct { - GeneralCategory string `xml:"gc,attr"` - GraphemeClusterBreak string `xml:"GCB,attr"` - IndicConjunctBreak string `xml:"InCB,attr"` - ExtendedPictographic string `xml:"ExtPict,attr"` - EastAsian string `xml:"ea,attr"` - - // This maps the following tags: - // , , , - Char []struct { - Codepoint HexInt `xml:"cp,attr"` - FirstCodepoint HexInt `xml:"first-cp,attr"` - LastCodepoint HexInt `xml:"last-cp,attr"` - - GeneralCategory string `xml:"gc,attr"` - GraphemeClusterBreak string `xml:"GCB,attr"` - IndicConjunctBreak string `xml:"InCB,attr"` - ExtendedPictographic string `xml:"ExtPict,attr"` - EastAsian string `xml:"ea,attr"` - } `xml:",any"` - } `xml:"group"` - } `xml:"repertoire"` -} - -func main() { - if err := run(); err != nil { - fmt.Println(err) - os.Exit(1) - } -} - -func run() error { - if len(os.Args) <= 1 { - fmt.Println(`Usage: - go run CodepointWidthDetector_gen.go - -You can download the latest ucd.nounihan.grouped.xml from: - https://www.unicode.org/Public/UCD/latest/ucdxml/ucd.nounihan.grouped.zip`) - os.Exit(1) - } - - data, err := os.ReadFile(os.Args[1]) - if err != nil { - return fmt.Errorf("failed to read XML: %w", err) - } - - ucd := &UCD{} - err = xml.Unmarshal(data, ucd) - if err != nil { - return fmt.Errorf("failed to parse XML: %w", err) - } - - values, err := extractValuesFromUCD(ucd) - if err != nil { - return err - } - - // More stages = Less size. The trajectory roughly follows a+b*c^stages, where c < 1. - // 4 still gives ~30% savings over 3 stages and going beyond 5 gives diminishing returns (<10%). - trie := buildBestTrie(values, 2, 8, 4) - rules := buildJoinRules() - totalSize := trie.TotalSize + int(unsafe.Sizeof(rules)) - - for cp, expected := range values { - var v TrieType - for _, s := range trie.Stages { - v = s.Values[int(v)+((cp>>s.Shift)&s.Mask)] - } - if v != expected { - return fmt.Errorf("trie sanity check failed for %U", cp) - } - } - - buf := &strings.Builder{} - - _, _ = fmt.Fprintf(buf, "// Generated by CodepointWidthDetector_gen.go\n") - _, _ = fmt.Fprintf(buf, "// on %s, from %s, %d bytes\n", time.Now().UTC().Format(time.RFC3339), ucd.Description, totalSize) - _, _ = fmt.Fprintf(buf, "// clang-format off\n") - - for i, s := range trie.Stages { - width := 16 - if i != 0 { - width = s.Mask + 1 - } - _, _ = fmt.Fprintf(buf, "static constexpr uint%d_t s_stage%d[] = {", s.Bits, i+1) - for j, value := range s.Values { - if j%width == 0 { - buf.WriteString("\n ") - } - _, _ = fmt.Fprintf(buf, " %#0*x,", s.Bits/4, value) - } - buf.WriteString("\n};\n") - } - - buf.WriteString("static constexpr uint16_t s_joinRules[] = {\n") - for _, r := range rules { - _, _ = fmt.Fprintf(buf, " %#016b,\n", r) - } - buf.WriteString("};\n") - - _, _ = fmt.Fprintf(buf, "constexpr uint%d_t ucdLookup(const char32_t cp) noexcept\n", trie.Stages[len(trie.Stages)-1].Bits) - buf.WriteString("{\n") - for i, s := range trie.Stages { - _, _ = fmt.Fprintf(buf, " const auto s%d = s_stage%d[", i+1, i+1) - if i == 0 { - _, _ = fmt.Fprintf(buf, "cp >> %d", s.Shift) - } else { - _, _ = fmt.Fprintf(buf, "s%d + ((cp >> %d) & %d)", i, s.Shift, s.Mask) - } - buf.WriteString("];\n") - } - _, _ = fmt.Fprintf(buf, " return s%d;\n", len(trie.Stages)) - buf.WriteString("}\n") - - buf.WriteString(`constexpr bool ucdGraphemeJoins(const uint8_t lead, const uint8_t trail) noexcept -{ - return s_joinRules[lead & 15] & (1 << (trail & 15)); -} -constexpr int ucdToCharacterWidth(const uint8_t val) noexcept -{ - return val >> 6; -} -// clang-format on -`) - - _, _ = os.Stdout.WriteString(buf.String()) - return nil -} - -type TrieType uint32 - -func extractValuesFromUCD(ucd *UCD) ([]TrieType, error) { - values := make([]TrieType, 1114112) - fillRange(values, trieValue(cbOther, cwNarrow)) - - for _, group := range ucd.Repertoire.Group { - for _, char := range group.Char { - generalCategory := coalesce(char.GeneralCategory, group.GeneralCategory) - graphemeClusterBreak := coalesce(char.GraphemeClusterBreak, group.GraphemeClusterBreak) - indicConjunctBreak := coalesce(char.IndicConjunctBreak, group.IndicConjunctBreak) - extendedPictographic := coalesce(char.ExtendedPictographic, group.ExtendedPictographic) - eastAsian := coalesce(char.EastAsian, group.EastAsian) - - firstCp, lastCp := int(char.FirstCodepoint), int(char.LastCodepoint) - if char.Codepoint != 0 { - firstCp, lastCp = int(char.Codepoint), int(char.Codepoint) - } - - var cb ClusterBreak - switch graphemeClusterBreak { - case "XX": // Anything else - cb = cbOther - case "CR", "LF", "CN": // Carriage Return, Line Feed, Control - // We ignore GB3 which demands that CR × LF do not break apart, because - // a) these control characters won't normally reach our text storage - // b) otherwise we're in a raw write mode and historically conhost stores them in separate cells - cb = cbControl - case "EX", "SM": // Extend, SpacingMark - cb = cbExtend - case "PP": // Prepend - cb = cbPrepend - case "ZWJ": // Zero Width Joiner - cb = cbZeroWidthJoiner - case "RI": // Regional Indicator - cb = cbRegionalIndicator - case "L": // Hangul Syllable Type L - cb = cbHangulL - case "V": // Hangul Syllable Type V - cb = cbHangulV - case "T": // Hangul Syllable Type T - cb = cbHangulT - case "LV": // Hangul Syllable Type LV - cb = cbHangulLV - case "LVT": // Hangul Syllable Type LVT - cb = cbHangulLVT - default: - return nil, fmt.Errorf("unrecognized GCB %s for %U to %U", graphemeClusterBreak, firstCp, lastCp) - } - if extendedPictographic == "Y" { - // Currently every single Extended_Pictographic codepoint happens to be GCB=XX. - // This is fantastic for us because it means we can stuff it into the ClusterBreak enum - // and treat it as an alias of EXTEND, but with the special GB11 properties. - if cb != cbOther { - return nil, fmt.Errorf("unexpected GCB %s with ExtPict=Y for %U to %U", graphemeClusterBreak, firstCp, lastCp) - } - cb = cbExtendedPictographic - } - if indicConjunctBreak == "Linker" { - // Similarly here, we can treat it as an alias for EXTEND, but with the GB9c properties. - if cb != cbExtend { - return nil, fmt.Errorf("unexpected GCB %s with InCB=Linker for %U to %U", graphemeClusterBreak, firstCp, lastCp) - } - cb = cbConjunctLinker - } - - var width CharacterWidth - switch eastAsian { - case "N", "Na", "H": // neutral, narrow, half-width - width = cwNarrow - case "F", "W": // full-width, wide - width = cwWide - case "A": // ambiguous - width = cwAmbiguous - default: - return nil, fmt.Errorf("unrecognized ea %s for %U to %U", eastAsian, firstCp, lastCp) - } - // There's no "ea" attribute for "zero width" so we need to do that ourselves. This matches: - // Mc: Mark, spacing combining - // Me: Mark, enclosing - // Mn: Mark, non-spacing - // Cf: Control, format - if strings.HasPrefix(generalCategory, "M") || generalCategory == "Cf" { - width = cwZeroWidth - } - - fillRange(values[firstCp:lastCp+1], trieValue(cb, width)) - } - } - - // Box-drawing and block elements are ambiguous according to their EastAsian attribute, - // but by convention terminals always consider them to be narrow. - fillRange(values[0x2500:0x259F+1], trieValue(cbOther, cwNarrow)) - // U+FE0F Variation Selector-16 is used to turn unqualified Emojis into qualified ones. - // By convention, this also turns them from being ambiguous, = narrow by default, into wide ones. - fillRange(values[0xFE0F:0xFE0F+1], trieValue(cbExtend, cwWide)) - - return values, nil -} - -func trieValue(cb ClusterBreak, width CharacterWidth) TrieType { - return TrieType(byte(cb) | byte(width)<<6) -} - -func coalesce(a, b string) string { - if a != "" { - return a - } - return b -} - -type Stage struct { - Values []TrieType - Shift int - Mask int - Bits int -} - -type Trie struct { - Stages []*Stage - TotalSize int -} - -func buildBestTrie(uncompressed []TrieType, minShift, maxShift, stages int) *Trie { - delta := maxShift - minShift + 1 - results := make(chan *Trie) - bestTrie := &Trie{TotalSize: math.MaxInt} - - iters := 1 - for i := 1; i < stages; i++ { - iters *= delta - } - - for i := 0; i < iters; i++ { - go func(i int) { - // Given minShift=2, maxShift=3, depth=3 this generates - // [2 2 2] - // [3 2 2] - // [2 3 2] - // [3 3 2] - // [2 2 3] - // [3 2 3] - // [2 3 3] - // [3 3 3] - shifts := make([]int, stages-1) - for j := range shifts { - shifts[j] = minShift + i%delta - i /= delta - } - results <- buildTrie(uncompressed, shifts) - }(i) - } - - for i := 0; i < iters; i++ { - t := <-results - if bestTrie.TotalSize > t.TotalSize { - bestTrie = t - } - } - return bestTrie -} - -func buildTrie(uncompressed []TrieType, shifts []int) *Trie { - var cumulativeShift int - var stages []*Stage - - for _, shift := range shifts { - chunkSize := 1 << shift - cache := map[string]TrieType{} - compressed := make([]TrieType, 0, len(uncompressed)/8) - offsets := make([]TrieType, 0, len(uncompressed)/chunkSize) - - for i := 0; i < len(uncompressed); i += chunkSize { - chunk := uncompressed[i:min(len(uncompressed), i+chunkSize)] - // Cast the integer slice to a string so that it can be hashed. - key := unsafe.String((*byte)(unsafe.Pointer(&chunk[0])), len(chunk)*int(unsafe.Sizeof(chunk[0]))) - offset, exists := cache[key] - - if !exists { - // For a 4-stage trie searching for existing occurrences of chunk in compressed yields a ~10% - // compression improvement. Checking for overlaps with the tail end of compressed yields another ~15%. - // FYI I tried to shuffle the order of compressed chunks but found that this has a negligible impact. - if existing := findExisting(compressed, chunk); existing != -1 { - offset = TrieType(existing) - cache[key] = offset - } else { - overlap := measureOverlap(compressed, chunk) - compressed = append(compressed, chunk[overlap:]...) - offset = TrieType(len(compressed) - len(chunk)) - cache[key] = offset - } - } - - offsets = append(offsets, offset) - } - - stages = append(stages, &Stage{ - Values: compressed, - Shift: cumulativeShift, - Mask: chunkSize - 1, - }) - - uncompressed = offsets - cumulativeShift += shift - } - - stages = append(stages, &Stage{ - Values: uncompressed, - Shift: cumulativeShift, - Mask: math.MaxInt32, - }) - slices.Reverse(stages) - - for _, s := range stages { - m := slices.Max(s.Values) - if m <= 0xff { - s.Bits = 8 - } else if m <= 0xffff { - s.Bits = 16 - } else { - s.Bits = 32 - } - } - - totalSize := 0 - for _, s := range stages { - totalSize += (s.Bits / 8) * len(s.Values) - } - - return &Trie{ - Stages: stages, - TotalSize: totalSize, - } -} - -// Finds needle in haystack. Returns -1 if it couldn't be found. -func findExisting(haystack, needle []TrieType) int { - if len(haystack) == 0 || len(needle) == 0 { - return -1 - } - - s := int(unsafe.Sizeof(TrieType(0))) - h := unsafe.Slice((*byte)(unsafe.Pointer(&haystack[0])), len(haystack)*s) - n := unsafe.Slice((*byte)(unsafe.Pointer(&needle[0])), len(needle)*s) - i := 0 - - for { - i = bytes.Index(h[i:], n) - if i == -1 { - return -1 - } - if i%s == 0 { - return i / s - } - } -} - -// Given two slices, this returns the amount by which prev's end overlaps with next's start. -// That is, given [0,1,2,3,4] and [2,3,4,5] this returns 3 because [2,3,4] is the "overlap". -func measureOverlap(prev, next []TrieType) int { - for overlap := min(len(prev), len(next)); overlap >= 0; overlap-- { - if slices.Equal(prev[len(prev)-overlap:], next[:overlap]) { - return overlap - } - } - return 0 -} - -func buildJoinRules() [16]uint16 { - // UAX #29 states: - // > Note: Testing two adjacent characters is insufficient for determining a boundary. - // - // I completely agree, but I really hate it. So this code trades off correctness for simplicity - // by using a simple lookup table anyway. Under most circumstances users won't notice, - // because as far as I can see this only behaves different for degenerate ("invalid") Unicode. - // It reduces our code complexity significantly and is way *way* faster. - // - // This is a great reference for the resulting table: - // https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.html - - // NOTE: We build the table in reverse, because rules with lower numbers take priority. - // (This is primarily relevant for GB9b vs. GB4.) - - // Otherwise, break everywhere. - // GB999: Any ÷ Any - var rules [16]uint16 - - // Do not break within emoji flag sequences. That is, do not break between regional indicator - // (RI) symbols if there is an odd number of RI characters before the break point. - // GB13: [^RI] (RI RI)* RI × RI - // GB12: sot (RI RI)* RI × RI - // - // We cheat here by not checking that the number of RIs is even. Meh! - rules[cbRegionalIndicator] |= 1 << cbRegionalIndicator - - // Do not break within emoji modifier sequences or emoji zwj sequences. - // GB11: \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic} - // - // We cheat here by not checking that the ZWJ is preceded by an ExtPic. Meh! - rules[cbZeroWidthJoiner] |= 1 << cbExtendedPictographic - - // Do not break within certain combinations with Indic_Conjunct_Break (InCB)=Linker. - // GB9c: \p{InCB=Consonant} [\p{InCB=Extend}\p{InCB=Linker}]* \p{InCB=Linker} [\p{InCB=Extend}\p{InCB=Linker}]* × \p{InCB=Consonant} - // - // I'm sure GB9c is great for these languages, but honestly the definition is complete whack. - // Just look at that chonker! This isn't a "cheat" like the others above, this is a reinvention: - // We treat it as having both ClusterBreak.PREPEND and ClusterBreak.EXTEND properties. - rules[cbConjunctLinker] = math.MaxUint16 - for i := range rules { - rules[i] |= 1 << cbConjunctLinker - } - - // Do not break before SpacingMarks, or after Prepend characters. - // GB9b: Prepend × - rules[cbPrepend] = math.MaxUint16 - - // Do not break before SpacingMarks, or after Prepend characters. - // GB9a: × SpacingMark - // Do not break before extending characters or ZWJ. - // GB9: × (Extend | ZWJ) - for i := range rules { - // CodepointWidthDetector_gen.py sets SpacingMarks to ClusterBreak.EXTEND as well, - // since they're entirely identical to GB9's Extend. - rules[i] |= (1 << cbExtend) | (1 << cbZeroWidthJoiner) - } - - // Do not break Hangul syllable sequences. - // GB8: (LVT | T) x T - rules[cbHangulLVT] |= 1 << cbHangulT - rules[cbHangulT] |= 1 << cbHangulT - // GB7: (LV | V) x (V | T) - rules[cbHangulLV] |= 1 << cbHangulT - rules[cbHangulLV] |= 1 << cbHangulV - rules[cbHangulV] |= 1 << cbHangulV - rules[cbHangulV] |= 1 << cbHangulT - // GB6: L x (L | V | LV | LVT) - rules[cbHangulL] |= 1 << cbHangulL - rules[cbHangulL] |= 1 << cbHangulV - rules[cbHangulL] |= 1 << cbHangulLV - rules[cbHangulL] |= 1 << cbHangulLVT - - // Do not break between a CR and LF. Otherwise, break before and after controls. - // GB5: ÷ (Control | CR | LF) - for i := range rules { - rules[i] &= ^(uint16(1) << cbControl) - } - // GB4: (Control | CR | LF) ÷ - rules[cbControl] = 0 - - // We ignore GB3 which demands that CR × LF do not break apart, because - // a) these control characters won't normally reach our text storage - // b) otherwise we're in a raw write mode and historically conhost stores them in separate cells - - // We also ignore GB1 and GB2 which demand breaks at the start and end, - // because that's not part of the loops in GraphemeNext/Prev and not this table. - - // Set any bits to 0 which are outside the valid [cbOther,cbCount) range. - for i := range rules { - rules[i] &= 1< pfnFallback) noexcept; - void ClearFallbackCache() noexcept; + void Reset(TextMeasurementMode mode) noexcept; private: - __declspec(noinline) int _checkFallbackViaCache(char32_t codepoint) noexcept; - - size_t _graphemeNextOld(const std::wstring_view& str, size_t offset, int* width) noexcept; - size_t _graphemePrevOld(const std::wstring_view& str, size_t offset, int* width) noexcept; - int _getWidthOld(char32_t cp) noexcept; + __declspec(noinline) bool _graphemeNextWcswidth(GraphemeState& s, const wchar_t* end, const wchar_t* clusterBeg) noexcept; + __declspec(noinline) bool _graphemePrevWcswidth(GraphemeState& s, const wchar_t* beg, const wchar_t* clusterEnd) noexcept; + __declspec(noinline) bool _graphemeNextConsole(GraphemeState& s, const wchar_t* end, const wchar_t* clusterBeg) noexcept; + __declspec(noinline) bool _graphemePrevConsole(GraphemeState& s, const wchar_t* beg, const wchar_t* clusterEnd) noexcept; + __declspec(noinline) uint8_t _checkFallbackViaCache(char32_t codepoint) noexcept; std::unordered_map _fallbackCache; std::function _pfnFallbackMethod; + TextMeasurementMode _mode = TextMeasurementMode::Graphemes; + uint8_t _ambiguousWidth = 1; }; diff --git a/src/types/ut_types/CodepointWidthDetectorTests.cpp b/src/types/ut_types/CodepointWidthDetectorTests.cpp index 7a1f986fdf4..ab5a62a088a 100644 --- a/src/types/ut_types/CodepointWidthDetectorTests.cpp +++ b/src/types/ut_types/CodepointWidthDetectorTests.cpp @@ -28,8 +28,8 @@ // aka: RIs should have an even number. // Same thing here. Any RI joins with any preceding RI. -// Generated by CodepointWidthDetector_gen.go -// on 2024-03-22T00:17:11Z, from Unicode 15.1.0 +// Generated by GraphemeTestTableGen +// on 2024-05-27T21:06:21Z, from Unicode 15.1.0 struct GraphemeBreakTest { const wchar_t* comment; @@ -1044,7 +1044,7 @@ static constexpr GraphemeBreakTest s_graphemeBreakTests[] = { { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x093C\x0308\x200D" }, { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x093C", L"\x0378" }, { L"÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x093C\x0308", L"\x0378" }, - //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x094D", L" " }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x094D", L" " }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x094D\x0308", L" " }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x094D", L"\r" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3]", L"\x094D\x0308", L"\r" }, @@ -1054,33 +1054,33 @@ static constexpr GraphemeBreakTest s_graphemeBreakTests[] = { { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3]", L"\x094D\x0308", L"\x01" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x094D\x034F" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]", L"\x094D\x0308\x034F" }, - //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x094D", L"\U0001F1E6" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x094D", L"\U0001F1E6" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]", L"\x094D\x0308", L"\U0001F1E6" }, - //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x094D", L"\x0600" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x094D", L"\x0600" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]", L"\x094D\x0308", L"\x0600" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x094D\x0A03" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]", L"\x094D\x0308\x0A03" }, - //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x094D", L"\x1100" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x094D", L"\x1100" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]", L"\x094D\x0308", L"\x1100" }, - //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x094D", L"\x1160" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x094D", L"\x1160" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]", L"\x094D\x0308", L"\x1160" }, - //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x094D", L"\x11A8" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x094D", L"\x11A8" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]", L"\x094D\x0308", L"\x11A8" }, - //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x094D", L"\xAC00" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x094D", L"\xAC00" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]", L"\x094D\x0308", L"\xAC00" }, - //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x094D", L"\xAC01" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x094D", L"\xAC01" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]", L"\x094D\x0308", L"\xAC01" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x094D\x0900" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]", L"\x094D\x0308\x0900" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x094D\x0903" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]", L"\x094D\x0308\x0903" }, - //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x094D", L"\x0904" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x094D", L"\x0904" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]", L"\x094D\x0308", L"\x0904" }, - //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x094D", L"\x0D4E" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x094D", L"\x0D4E" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]", L"\x094D\x0308", L"\x0D4E" }, //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x094D", L"\x0915" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x094D\x0308", L"\x0915" }, - //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x094D", L"\x231A" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x094D", L"\x231A" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]", L"\x094D\x0308", L"\x231A" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x094D\x0300" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]", L"\x094D\x0308\x0300" }, @@ -1090,7 +1090,7 @@ static constexpr GraphemeBreakTest s_graphemeBreakTests[] = { { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]", L"\x094D\x0308\x094D" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x094D\x200D" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"\x094D\x0308\x200D" }, - //{ L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x094D", L"\x0378" }, + { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x094D", L"\x0378" }, { L"÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3]", L"\x094D\x0308", L"\x0378" }, { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x200D", L" " }, { L"÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]", L"\x200D\x0308", L" " }, @@ -1199,7 +1199,7 @@ static constexpr GraphemeBreakTest s_graphemeBreakTests[] = { //{ L"÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]", L"a", L"\U0001F1E6\U0001F1E7", L"\U0001F1E8", L"b" }, { L"÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]", L"a", L"\U0001F1E6\U0001F1E7\x200D", L"\U0001F1E8", L"b" }, { L"÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]", L"a", L"\U0001F1E6\x200D", L"\U0001F1E7\U0001F1E8", L"b" }, - //{ L"÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]", L"a", L"\U0001F1E6\U0001F1E7", L"\U0001F1E8\U0001F1E9", L"b" }, + { L"÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]", L"a", L"\U0001F1E6\U0001F1E7", L"\U0001F1E8\U0001F1E9", L"b" }, { L"÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]", L"a\x200D" }, { L"÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]", L"a\x0308", L"b" }, { L"÷ [0.2] LATIN SMALL LETTER A (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]", L"a\x0903", L"b" }, @@ -1219,10 +1219,14 @@ static constexpr GraphemeBreakTest s_graphemeBreakTests[] = { { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0915\x093C\x200D\x094D\x0924" }, //{ L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0915\x093C\x094D\x200D\x0924" }, { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER YA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0915\x094D\x0924\x094D\x092F" }, - //{ L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER A (Other) ÷ [0.3]", L"\x0915\x094D", L"a" }, + { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER A (Other) ÷ [0.3]", L"\x0915\x094D", L"a" }, //{ L"÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"a\x094D", L"\x0924" }, //{ L"÷ [0.2] QUESTION MARK (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"?\x094D", L"\x0924" }, { L"÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]", L"\x0915\x094D\x094D\x0924" }, + + // These are additional cases which the official break tests don't cover: + { L"multiple combining marks", L"a\u0363", L"e\u0364\u0364", L"i\u0365" }, + { L"multiple US flag regional indicators", L"\U0001F1FA\U0001F1F8", L"\U0001F1FA\U0001F1F8" }, }; class CodepointWidthDetectorTests diff --git a/src/types/ut_types/CodepointWidthDetectorTests_gen.go b/src/types/ut_types/CodepointWidthDetectorTests_gen.go deleted file mode 100644 index 6b4a6087a38..00000000000 --- a/src/types/ut_types/CodepointWidthDetectorTests_gen.go +++ /dev/null @@ -1,144 +0,0 @@ -package main - -import ( - "bufio" - "bytes" - "fmt" - "io" - "net/http" - "os" - "regexp" - "strconv" - "strings" - "time" -) - -func main() { - if err := run(); err != nil { - fmt.Println(err) - os.Exit(1) - } -} - -func run() error { - data, err := fetch(`https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt`) - if err != nil { - return err - } - - testString := strings.Builder{} - - scanner := bufio.NewScanner(bytes.NewReader(data)) - firstLine := true - - for scanner.Scan() { - line := scanner.Text() - test, comment, _ := strings.Cut(line, "#") - test = strings.TrimSpace(test) - comment = strings.TrimSpace(comment) - - if firstLine { - firstLine = false - - re, err := regexp.Compile(`^GraphemeBreakTest-(\d+\.\d+\.\d+)\.txt$`) - if err != nil { - return err - } - - m := re.FindStringSubmatch(comment) - if len(m) == 0 { - return fmt.Errorf("failed to find version number, got: %s", comment) - } - - _, _ = fmt.Fprintf( - &testString, - `// Generated by CodepointWidthDetector_gen.go -// on %s, from Unicode %s -struct GraphemeBreakTest -{ - const wchar_t* comment; - const wchar_t* graphemes[4]; -}; -static constexpr GraphemeBreakTest s_graphemeBreakTests[] = { -`, - time.Now().UTC().Format(time.RFC3339), - m[1], - ) - } - // # GraphemeBreakTest-15.1.0.txt - - if len(test) == 0 || len(comment) == 0 { - continue - } - - graphemes := strings.Split(test, "÷") - for i, g := range graphemes { - graphemes[i] = strings.TrimSpace(g) - } - - testString.WriteString("") - _, _ = fmt.Fprintf(&testString, ` { L"%s"`, comment) - - for _, g := range graphemes { - if len(g) == 0 { - continue - } - - testString.WriteString(`, L"`) - - codepoints := strings.Split(g, "×") - for _, c := range codepoints { - i, err := strconv.ParseUint(strings.TrimSpace(c), 16, 32) - if err != nil { - return err - } - if i == 0x07 { - testString.WriteString(`\a`) - } else if i == 0x08 { - testString.WriteString(`\b`) - } else if i == 0x09 { - testString.WriteString(`\t`) - } else if i == 0x0A { - testString.WriteString(`\n`) - } else if i == 0x0B { - testString.WriteString(`\v`) - } else if i == 0x0C { - testString.WriteString(`\f`) - } else if i == 0x0D { - testString.WriteString(`\r`) - } else if i >= 0x20 && i <= 0x7e { - testString.WriteRune(rune(i)) - } else if i <= 0xff { - _, _ = fmt.Fprintf(&testString, `\x%02X`, i) - } else if i <= 0xffff { - _, _ = fmt.Fprintf(&testString, `\x%04X`, i) - } else { - _, _ = fmt.Fprintf(&testString, `\U%08X`, i) - } - } - - testString.WriteString(`"`) - } - - testString.WriteString(" },\n") - } - - testString.WriteString("};\n") - _, _ = os.Stdout.WriteString(testString.String()) - return nil -} - -func fetch(url string) ([]byte, error) { - res, err := http.Get(url) - if err != nil { - return nil, err - } - defer res.Body.Close() - - body, err := io.ReadAll(res.Body) - if err != nil { - return nil, err - } - - return body, nil -} diff --git a/src/winconpty/winconpty.cpp b/src/winconpty/winconpty.cpp index 16bb278b7da..4bbc6b910ea 100644 --- a/src/winconpty/winconpty.cpp +++ b/src/winconpty/winconpty.cpp @@ -134,19 +134,38 @@ HRESULT _CreatePseudoConsole(const HANDLE hToken, // GH4061: Ensure that the path to executable in the format is escaped so C:\Program.exe cannot collide with C:\Program Files // This is plenty of space to hold the formatted string - wchar_t cmd[MAX_PATH]{}; const BOOL bInheritCursor = (dwFlags & PSEUDOCONSOLE_INHERIT_CURSOR) == PSEUDOCONSOLE_INHERIT_CURSOR; const BOOL bResizeQuirk = (dwFlags & PSEUDOCONSOLE_RESIZE_QUIRK) == PSEUDOCONSOLE_RESIZE_QUIRK; - swprintf_s(cmd, - MAX_PATH, - L"\"%s\" --headless %s%s--width %hd --height %hd --signal 0x%tx --server 0x%tx", - _ConsoleHostPath(), - bInheritCursor ? L"--inheritcursor " : L"", - bResizeQuirk ? L"--resizeQuirk " : L"", - size.X, - size.Y, - std::bit_cast(signalPipeConhostSide.get()), - std::bit_cast(serverHandle.get())); + + const wchar_t* textMeasurement; + switch (dwFlags & PSEUDOCONSOLE_GLYPH_WIDTH__MASK) + { + case PSEUDOCONSOLE_GLYPH_WIDTH_GRAPHEMES: + textMeasurement = L"--textMeasurement graphemes "; + break; + case PSEUDOCONSOLE_GLYPH_WIDTH_WCSWIDTH: + textMeasurement = L"--textMeasurement wcswidth "; + break; + case PSEUDOCONSOLE_GLYPH_WIDTH_CONSOLE: + textMeasurement = L"--textMeasurement console "; + break; + default: + textMeasurement = L""; + break; + } + + wil::unique_process_heap_string cmd; + RETURN_IF_FAILED(wil::str_printf_nothrow( + cmd, + L"\"%s\" --headless %s%s%s--width %hd --height %hd --signal 0x%tx --server 0x%tx", + _ConsoleHostPath(), + bInheritCursor ? L"--inheritcursor " : L"", + bResizeQuirk ? L"--resizeQuirk " : L"", + textMeasurement, + size.X, + size.Y, + std::bit_cast(signalPipeConhostSide.get()), + std::bit_cast(serverHandle.get()))); STARTUPINFOEXW siEx{ 0 }; siEx.StartupInfo.cb = sizeof(STARTUPINFOEXW); @@ -202,7 +221,7 @@ HRESULT _CreatePseudoConsole(const HANDLE hToken, { // Call create process RETURN_IF_WIN32_BOOL_FALSE(CreateProcessW(_ConsoleHostPath(), - cmd, + cmd.get(), nullptr, nullptr, TRUE, @@ -217,7 +236,7 @@ HRESULT _CreatePseudoConsole(const HANDLE hToken, // Call create process RETURN_IF_WIN32_BOOL_FALSE(CreateProcessAsUserW(hToken, _ConsoleHostPath(), - cmd, + cmd.get(), nullptr, nullptr, TRUE, diff --git a/src/winconpty/winconpty.h b/src/winconpty/winconpty.h index a38fdef8594..bd1a6cd1909 100644 --- a/src/winconpty/winconpty.h +++ b/src/winconpty/winconpty.h @@ -55,8 +55,11 @@ typedef struct _PseudoConsole #ifndef PSEUDOCONSOLE_RESIZE_QUIRK #define PSEUDOCONSOLE_RESIZE_QUIRK (0x2) #endif -#ifndef PSEUDOCONSOLE_WIN32_INPUT_MODE -#define PSEUDOCONSOLE_WIN32_INPUT_MODE (0x4) +#ifndef PSEUDOCONSOLE_GLYPH_WIDTH__MASK +#define PSEUDOCONSOLE_GLYPH_WIDTH__MASK 0x18 +#define PSEUDOCONSOLE_GLYPH_WIDTH_GRAPHEMES 0x08 +#define PSEUDOCONSOLE_GLYPH_WIDTH_WCSWIDTH 0x10 +#define PSEUDOCONSOLE_GLYPH_WIDTH_CONSOLE 0x18 #endif // Implementations of the various PseudoConsole functions. From 07efae4beebdf34cea15f6ea28b4f74e7f6c95af Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 31 May 2024 23:30:34 +0200 Subject: [PATCH 06/14] AuditMode fix --- src/buffer/out/textBuffer.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/buffer/out/textBuffer.cpp b/src/buffer/out/textBuffer.cpp index f881ff67e32..f2cef88273a 100644 --- a/src/buffer/out/textBuffer.cpp +++ b/src/buffer/out/textBuffer.cpp @@ -409,6 +409,7 @@ void TextBuffer::_PrepareForDoubleByteSequence(const DbcsAttribute dbcsAttribute size_t TextBuffer::GraphemeNext(const std::wstring_view& chars, size_t position) noexcept { auto& cwd = CodepointWidthDetector::Singleton(); +#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). GraphemeState state{ .beg = chars.data() + position }; cwd.GraphemeNext(state, chars); return position + state.len; @@ -418,6 +419,7 @@ size_t TextBuffer::GraphemeNext(const std::wstring_view& chars, size_t position) size_t TextBuffer::GraphemePrev(const std::wstring_view& chars, size_t position) noexcept { auto& cwd = CodepointWidthDetector::Singleton(); +#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). GraphemeState state{ .beg = chars.data() + position }; cwd.GraphemePrev(state, chars); return position - state.len; @@ -469,6 +471,7 @@ size_t TextBuffer::FitTextIntoColumns(const std::wstring_view& chars, til::Coord col--; } +#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). GraphemeState state{ .beg = chars.data() + dist }; while (dist < len) From 23894780f8115054d836715af4eea8536038f0bb Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Sun, 2 Jun 2024 03:39:42 +0200 Subject: [PATCH 07/14] Fixed tests, Some simplifications --- src/features.xml | 10 -- src/tools/GraphemeTableGen/Program.cs | 6 +- src/types/CodepointWidthDetector.cpp | 42 ++++---- src/types/inc/CodepointWidthDetector.hpp | 31 +++--- .../ut_types/CodepointWidthDetectorTests.cpp | 98 +++++++++++-------- 5 files changed, 99 insertions(+), 88 deletions(-) diff --git a/src/features.xml b/src/features.xml index e1b2024cf6e..c2af5ed009a 100644 --- a/src/features.xml +++ b/src/features.xml @@ -155,14 +155,4 @@ - - Feature_Graphemes - Enables support for grapheme clusters - AlwaysDisabled - - Dev - Canary - - - diff --git a/src/tools/GraphemeTableGen/Program.cs b/src/tools/GraphemeTableGen/Program.cs index fd838bc8e85..da4fd42525c 100644 --- a/src/tools/GraphemeTableGen/Program.cs +++ b/src/tools/GraphemeTableGen/Program.cs @@ -184,17 +184,17 @@ buf.Append($" return s{trie.Stages.Count - 1};\n"); buf.Append("}\n"); -buf.Append("constexpr uint8_t ucdGraphemeJoins(const uint8_t state, const uint8_t lead, const uint8_t trail) noexcept\n"); +buf.Append("constexpr int ucdGraphemeJoins(const int state, const int lead, const int trail) noexcept\n"); buf.Append("{\n"); buf.Append(" const auto l = lead & 15;\n"); buf.Append(" const auto t = trail & 15;\n"); buf.Append($" return (s_joinRules[state][l] >> (t * {BitOperations.PopCount(Ω)})) & {Ω};\n"); buf.Append("}\n"); -buf.Append("constexpr bool ucdGraphemeDone(const uint8_t state) noexcept\n"); +buf.Append("constexpr bool ucdGraphemeDone(const int state) noexcept\n"); buf.Append("{\n"); buf.Append($" return state == {Ω};\n"); buf.Append("}\n"); -buf.Append("constexpr uint8_t ucdToCharacterWidth(const uint8_t val) noexcept\n"); +buf.Append("constexpr int ucdToCharacterWidth(const int val) noexcept\n"); buf.Append("{\n"); buf.Append(" return val >> 6;\n"); buf.Append("}\n"); diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index ff8aa1df47e..7be83e7de95 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -581,7 +581,7 @@ static constexpr uint32_t s_joinRules[2][16] = { 0b00000000000000000000000000000000, }, }; -constexpr uint8_t ucdLookup(const char32_t cp) noexcept +constexpr int ucdLookup(const char32_t cp) noexcept { const auto s0 = s_stage0[cp >> 11]; const auto s1 = s_stage1[s0 + ((cp >> 6) & 31)]; @@ -589,17 +589,17 @@ constexpr uint8_t ucdLookup(const char32_t cp) noexcept const auto s3 = s_stage3[s2 + ((cp >> 0) & 7)]; return s3; } -constexpr uint8_t ucdGraphemeJoins(const uint8_t state, const uint8_t lead, const uint8_t trail) noexcept +constexpr int ucdGraphemeJoins(const int state, const int lead, const int trail) noexcept { const auto l = lead & 15; const auto t = trail & 15; return (s_joinRules[state][l] >> (t * 2)) & 3; } -constexpr bool ucdGraphemeDone(const uint8_t state) noexcept +constexpr bool ucdGraphemeDone(const int state) noexcept { return state == 3; } -constexpr uint8_t ucdToCharacterWidth(const uint8_t val) noexcept +constexpr int ucdToCharacterWidth(const int val) noexcept { return val >> 6; } @@ -695,9 +695,9 @@ bool CodepointWidthDetector::GraphemeNext(GraphemeState& s, const std::wstring_v } auto clusterEnd = clusterBeg; - uint8_t state = s._state; - uint8_t totalWidth = 0; - uint8_t lead; + int state = s._state; + int totalWidth = 0; + int lead; char32_t cp; // The _state is stored ~flipped, so that we can differentiate @@ -746,7 +746,8 @@ bool CodepointWidthDetector::GraphemeNext(GraphemeState& s, const std::wstring_v if (ucdGraphemeDone(state)) { - state = 255; + // We'll later do a `state = ~state` which will result in `state == 0`. + state = ~0; lead = 0; break; } @@ -764,7 +765,7 @@ bool CodepointWidthDetector::GraphemeNext(GraphemeState& s, const std::wstring_v s._state = state; s._last = lead; s._totalWidth = totalWidth; - return state != 0; + return clusterEnd < end; } // This code is identical to GraphemeNext() but with the order of operations reversed since we're iterating backwards. @@ -791,9 +792,9 @@ bool CodepointWidthDetector::GraphemePrev(GraphemeState& s, const std::wstring_v } auto clusterBeg = clusterEnd; - uint8_t state = s._state; - uint8_t totalWidth = 0; - uint8_t trail; + int state = s._state; + int totalWidth = 0; + int trail; char32_t cp; // The _state is stored ~flipped, so that we can differentiate @@ -842,7 +843,8 @@ bool CodepointWidthDetector::GraphemePrev(GraphemeState& s, const std::wstring_v if (ucdGraphemeDone(state)) { - state = 255; + // We'll later do a `state = ~state` which will result in `state == 0`. + state = ~0; trail = 0; break; } @@ -860,7 +862,7 @@ bool CodepointWidthDetector::GraphemePrev(GraphemeState& s, const std::wstring_v s._state = state; s._last = trail; s._totalWidth = totalWidth; - return state != 0; + return clusterBeg > beg; } __declspec(noinline) bool CodepointWidthDetector::_graphemeNextWcswidth(GraphemeState& s, const wchar_t* end, const wchar_t* clusterBeg) noexcept @@ -871,8 +873,8 @@ __declspec(noinline) bool CodepointWidthDetector::_graphemeNextWcswidth(Grapheme } auto clusterEnd = clusterBeg; - uint8_t state = s._state; - uint8_t totalWidth = 0; + int state = s._state; + int totalWidth = 0; for (;;) { @@ -906,7 +908,7 @@ __declspec(noinline) bool CodepointWidthDetector::_graphemeNextWcswidth(Grapheme s.len = static_cast(clusterEnd - clusterBeg); s.width = totalWidth < 1 ? 1 : (totalWidth > 2 ? 2 : totalWidth); s._state = state; - return state != 0; + return clusterEnd < end; } __declspec(noinline) bool CodepointWidthDetector::_graphemePrevWcswidth(GraphemeState& s, const wchar_t* beg, const wchar_t* clusterEnd) noexcept @@ -942,7 +944,7 @@ __declspec(noinline) bool CodepointWidthDetector::_graphemePrevWcswidth(Grapheme s.beg = clusterBeg; s.len = static_cast(clusterEnd - clusterBeg); s.width = totalWidth; - return totalWidth != 0; + return clusterBeg > beg; } bool CodepointWidthDetector::_graphemeNextConsole(GraphemeState& s, const wchar_t* end, const wchar_t* clusterBeg) noexcept @@ -983,7 +985,7 @@ bool CodepointWidthDetector::_graphemePrevConsole(GraphemeState& s, const wchar_ // Call the function specified via SetFallbackMethod() to turn ambiguous (width = 3) into narrow/wide. // Caches the results in _fallbackCache. -__declspec(noinline) uint8_t CodepointWidthDetector::_checkFallbackViaCache(const char32_t codepoint) noexcept +__declspec(noinline) int CodepointWidthDetector::_checkFallbackViaCache(const char32_t codepoint) noexcept try { // Ambiguous glyphs are considered narrow by default. See microsoft/terminal#2066 for more info. @@ -1011,7 +1013,7 @@ try len = 2; } - const uint8_t width = _pfnFallbackMethod({ &buf[0], len }) ? 2 : 1; + const int width = _pfnFallbackMethod({ &buf[0], len }) ? 2 : 1; _fallbackCache.insert_or_assign(codepoint, width); return width; } diff --git a/src/types/inc/CodepointWidthDetector.hpp b/src/types/inc/CodepointWidthDetector.hpp index 3ebb98db202..cf1e70aa9ac 100644 --- a/src/types/inc/CodepointWidthDetector.hpp +++ b/src/types/inc/CodepointWidthDetector.hpp @@ -23,26 +23,29 @@ struct GraphemeState { // These are the [out] parameters for GraphemeNext/Prev. // - // If a previous call to GraphemeNext/Prev returned false (= reached the end of the string), then on the first call - // with the next string argument, beg/len will contain the parts of the grapheme cluster that are found in that + // If a previous call returned false (= reached the end of the string), then on the first call with + // the next string, beg/len will contain the parts of the grapheme cluster that are found in that // new string argument. That's true even if the two strings don't join to form a single cluster. - // In that case beg/len will simply be an empty string. It's basically an indicator in that case for - // "yup, that cluster in the last string was complete after all". + // In that case beg/len will simply be an empty string. It basically tells you + // "Yup, that cluster in the last string was complete after all". // - // width on the other hand will be updated to always contain the width of the complete cluster, - // even if the cluster is split across multiple string arguments. + // However, width will always be updated to represent the width of the current cluster. + // + // For instance, if the first string is a narrow emoji and the second one is U+FE0F, the first call will return + // the emoji with a width of 1, and the second call will return U+FE0F with a width of 2. + // You know these two belong together because the first call returned false. + // The total width is not 1+2 but rather just 2. const wchar_t* beg = nullptr; size_t len = 0; // width will always be either 1 or 2. - int32_t width = 0; + int width = 0; // If GraphemeNext/Prev return false (= reached the end of the string), they'll fill these struct // members with some info so that we can check if it joins with the start of the next string argument. // _state is stored ~flipped, so that we can differentiate between it being unset (0) and it being set to 0 (~0 = 255). - uint8_t _state = 0; - uint8_t _last = 0; - uint8_t _totalWidth = 0; - uint8_t _unused = 0; + int _state = 0; + int _last = 0; + int _totalWidth = 0; }; struct CodepointWidthDetector @@ -62,10 +65,10 @@ struct CodepointWidthDetector __declspec(noinline) bool _graphemePrevWcswidth(GraphemeState& s, const wchar_t* beg, const wchar_t* clusterEnd) noexcept; __declspec(noinline) bool _graphemeNextConsole(GraphemeState& s, const wchar_t* end, const wchar_t* clusterBeg) noexcept; __declspec(noinline) bool _graphemePrevConsole(GraphemeState& s, const wchar_t* beg, const wchar_t* clusterEnd) noexcept; - __declspec(noinline) uint8_t _checkFallbackViaCache(char32_t codepoint) noexcept; + __declspec(noinline) int _checkFallbackViaCache(char32_t codepoint) noexcept; - std::unordered_map _fallbackCache; + std::unordered_map _fallbackCache; std::function _pfnFallbackMethod; TextMeasurementMode _mode = TextMeasurementMode::Graphemes; - uint8_t _ambiguousWidth = 1; + int _ambiguousWidth = 1; }; diff --git a/src/types/ut_types/CodepointWidthDetectorTests.cpp b/src/types/ut_types/CodepointWidthDetectorTests.cpp index ab5a62a088a..e1f9879951b 100644 --- a/src/types/ut_types/CodepointWidthDetectorTests.cpp +++ b/src/types/ut_types/CodepointWidthDetectorTests.cpp @@ -6,9 +6,6 @@ #include "../types/inc/CodepointWidthDetector.hpp" -// Due to the Feature_Graphemes::IsEnabled() feature flagging, some code may be disabled. -#pragma warning(disable : 4702) // unreachable code - // FYI at the time of writing you may have to generate this table in cmd with // go run CodepointWidthDetectorTests_gen.go > temp.txt // because PowerShell garbles Unicode text between piped commands. @@ -1235,11 +1232,6 @@ class CodepointWidthDetectorTests TEST_METHOD(GraphemeBreakTest) { - if constexpr (!Feature_Graphemes::IsEnabled()) - { - return; - } - WEX::TestExecution::DisableVerifyExceptions disableVerifyExceptions{}; WEX::TestExecution::SetVerifyOutput verifyOutputScope{ WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures }; @@ -1267,20 +1259,26 @@ class CodepointWidthDetectorTests } actual.clear(); - for (size_t beg = 0; beg < text.size();) + for (GraphemeState state;;) { - const auto end = cwd.GraphemeNext(text, beg, nullptr); - actual.emplace_back(til::clamp_slice_abs(text, beg, end)); - beg = end; + const auto ok = cwd.GraphemeNext(state, text); + actual.emplace_back(state.beg, state.len); + if (!ok) + { + break; + } } VERIFY_ARE_EQUAL(expected, actual, test.comment); actual.clear(); - for (size_t end = text.size(); end > 0;) + for (GraphemeState state;;) { - const auto beg = cwd.GraphemePrev(text, end, nullptr); - actual.emplace_back(til::clamp_slice_abs(text, beg, end)); - end = beg; + const auto ok = cwd.GraphemePrev(state, text); + actual.emplace_back(state.beg, state.len); + if (!ok) + { + break; + } } std::reverse(actual.begin(), actual.end()); VERIFY_ARE_EQUAL(expected, actual, test.comment); @@ -1289,11 +1287,6 @@ class CodepointWidthDetectorTests TEST_METHOD(BasicGraphemes) { - if constexpr (!Feature_Graphemes::IsEnabled()) - { - return; - } - static constexpr std::wstring_view text{ L"a\u0363e\u0364\u0364i\u0365" }; auto& cwd = CodepointWidthDetector::Singleton(); @@ -1303,13 +1296,15 @@ class CodepointWidthDetectorTests std::vector actualAdvances; std::vector actualWidths; - for (size_t beg = 0; beg < text.size();) + for (GraphemeState state;;) { - int width; - const auto end = cwd.GraphemeNext(text, beg, &width); - actualAdvances.emplace_back(end - beg); - actualWidths.emplace_back(width); - beg = end; + const auto ok = cwd.GraphemeNext(state, text); + actualAdvances.emplace_back(state.len); + actualWidths.emplace_back(state.width); + if (!ok) + { + break; + } } VERIFY_ARE_EQUAL(expectedAdvances, actualAdvances); @@ -1318,13 +1313,15 @@ class CodepointWidthDetectorTests actualAdvances.clear(); actualWidths.clear(); - for (size_t end = text.size(); end > 0;) + for (GraphemeState state;;) { - int width; - const auto beg = cwd.GraphemePrev(text, end, &width); - actualAdvances.emplace_back(end - beg); - actualWidths.emplace_back(width); - end = beg; + const auto ok = cwd.GraphemePrev(state, text); + actualAdvances.emplace_back(state.len); + actualWidths.emplace_back(state.width); + if (!ok) + { + break; + } } std::reverse(actualAdvances.begin(), actualAdvances.end()); @@ -1336,18 +1333,37 @@ class CodepointWidthDetectorTests TEST_METHOD(DevanagariConjunctLinker) { - if constexpr (!Feature_Graphemes::IsEnabled()) - { - return; - } + static constexpr std::wstring_view text{ L"\u0915\u094D\u094D\u0924" }; + auto& cwd = CodepointWidthDetector::Singleton(); + + GraphemeState state; + cwd.GraphemeNext(state, text); + VERIFY_ARE_EQUAL(4u, state.len); + VERIFY_ARE_EQUAL(2, state.width); + } + + TEST_METHOD(ChunkedText) + { static constexpr std::wstring_view text{ L"\u0915\u094D\u094D\u0924" }; auto& cwd = CodepointWidthDetector::Singleton(); + bool ok = false; + GraphemeState state; + + ok = cwd.GraphemeNext(state, L"\u2620"); + VERIFY_IS_FALSE(ok); + VERIFY_ARE_EQUAL(1u, state.len); + VERIFY_ARE_EQUAL(1, state.width); + + ok = cwd.GraphemeNext(state, L"\uFE0F"); + VERIFY_IS_FALSE(ok); + VERIFY_ARE_EQUAL(1u, state.len); + VERIFY_ARE_EQUAL(2, state.width); - int width; - const auto end = cwd.GraphemeNext(text, 0, &width); - VERIFY_ARE_EQUAL(4u, end); - VERIFY_ARE_EQUAL(2, width); + ok = cwd.GraphemeNext(state, L"a"); + VERIFY_IS_TRUE(ok); + VERIFY_ARE_EQUAL(0u, state.len); + VERIFY_ARE_EQUAL(2, state.width); } }; From eb3094ded234f9f7542df76fd2f89c11b593a7ef Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Sun, 2 Jun 2024 14:24:39 +0200 Subject: [PATCH 08/14] Fix build --- src/host/ut_host/ConsoleArgumentsTests.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/host/ut_host/ConsoleArgumentsTests.cpp b/src/host/ut_host/ConsoleArgumentsTests.cpp index 0f60f33237e..e638705fdba 100644 --- a/src/host/ut_host/ConsoleArgumentsTests.cpp +++ b/src/host/ut_host/ConsoleArgumentsTests.cpp @@ -31,7 +31,6 @@ class ConsoleArgumentsTests TEST_METHOD(HeadlessArgTests); TEST_METHOD(SignalHandleTests); - TEST_METHOD(FeatureArgTests); }; ConsoleArguments CreateAndParse(std::wstring& commandline, HANDLE hVtIn, HANDLE hVtOut) From af6392302a53144563de241dadc8fb5d0ea840f6 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Sun, 2 Jun 2024 18:14:59 +0200 Subject: [PATCH 09/14] Less diff, Less problems --- .github/actions/spelling/expect/expect.txt | 6 - src/buffer/out/Row.cpp | 6 +- src/host/ConsoleArguments.cpp | 83 ++++++++--- src/host/ConsoleArguments.hpp | 24 ++- src/host/VtIo.cpp | 2 +- src/host/inputBuffer.cpp | 3 +- src/host/stream.cpp | 7 + src/host/ut_host/ConsoleArgumentsTests.cpp | 141 ++++++++++++++++++ src/propslib/RegistrySerialization.cpp | 2 +- src/terminal/adapter/adaptDispatch.cpp | 90 ++++++----- .../adapter/ut_adapter/adapterTest.cpp | 19 +++ src/winconpty/winconpty.cpp | 28 ++-- 12 files changed, 329 insertions(+), 82 deletions(-) diff --git a/.github/actions/spelling/expect/expect.txt b/.github/actions/spelling/expect/expect.txt index 186133a074e..c97135a39b6 100644 --- a/.github/actions/spelling/expect/expect.txt +++ b/.github/actions/spelling/expect/expect.txt @@ -295,7 +295,6 @@ CREATESTRUCT CREATESTRUCTW createvpack crisman -CRLFs crloew CRTLIBS csbi @@ -597,7 +596,6 @@ ffd FFDE FFFD FFFDb -FFrom fgbg FGCOLOR FGHIJ @@ -1165,7 +1163,6 @@ NOMINMAX NOMOVE NONALERT nonbreaking -noncharacter nonclient NONINFRINGEMENT NONPREROTATED @@ -1478,7 +1475,6 @@ READMODE rectread redef redefinable -Redir redist REDSCROLL REFCLSID @@ -1889,7 +1885,6 @@ UPDATEDISPLAY UPDOWN UPKEY upss -UPSS uregex URegular usebackq @@ -1984,7 +1979,6 @@ wchars WCIA WCIW WCSHELPER -wcsicmp wcsrev wcswidth wddm diff --git a/src/buffer/out/Row.cpp b/src/buffer/out/Row.cpp index 9d1469c1c4f..4bef233c179 100644 --- a/src/buffer/out/Row.cpp +++ b/src/buffer/out/Row.cpp @@ -635,7 +635,7 @@ catch (...) throw; } -void ROW::WriteHelper::ReplaceText() noexcept +[[msvc::forceinline]] void ROW::WriteHelper::ReplaceText() noexcept { // This function starts with a fast-pass for ASCII. ASCII is still predominant in technical areas. // @@ -662,7 +662,7 @@ void ROW::WriteHelper::ReplaceText() noexcept charsConsumed = ch - chBeg; } -void ROW::WriteHelper::_replaceTextUnicode(size_t ch, size_t off) noexcept +[[msvc::forceinline]] void ROW::WriteHelper::_replaceTextUnicode(size_t ch, size_t off) noexcept { auto& cwd = CodepointWidthDetector::Singleton(); const auto len = chars.size(); @@ -849,7 +849,7 @@ catch (...) } #pragma warning(pop) -void ROW::WriteHelper::Finish() +[[msvc::forceinline]] void ROW::WriteHelper::Finish() { colEndDirty = row._adjustForward(colEndDirty); diff --git a/src/host/ConsoleArguments.cpp b/src/host/ConsoleArguments.cpp index 660dea62c3d..422f563bd40 100644 --- a/src/host/ConsoleArguments.cpp +++ b/src/host/ConsoleArguments.cpp @@ -7,21 +7,23 @@ #include using namespace Microsoft::Console::Utils; -static constexpr std::wstring_view VT_MODE_ARG{ L"--vtmode" }; -static constexpr std::wstring_view HEADLESS_ARG{ L"--headless" }; -static constexpr std::wstring_view SERVER_HANDLE_ARG{ L"--server" }; -static constexpr std::wstring_view SIGNAL_HANDLE_ARG{ L"--signal" }; -static constexpr std::wstring_view HANDLE_PREFIX{ L"0x" }; -static constexpr std::wstring_view CLIENT_COMMANDLINE_ARG{ L"--" }; -static constexpr std::wstring_view FORCE_V1_ARG{ L"-ForceV1" }; -static constexpr std::wstring_view FORCE_NO_HANDOFF_ARG{ L"-ForceNoHandoff" }; -static constexpr std::wstring_view FILEPATH_LEADER_PREFIX{ L"\\??\\" }; -static constexpr std::wstring_view WIDTH_ARG{ L"--width" }; -static constexpr std::wstring_view HEIGHT_ARG{ L"--height" }; -static constexpr std::wstring_view INHERIT_CURSOR_ARG{ L"--inheritcursor" }; -static constexpr std::wstring_view RESIZE_QUIRK{ L"--resizeQuirk" }; +const std::wstring_view ConsoleArguments::VT_MODE_ARG = L"--vtmode"; +const std::wstring_view ConsoleArguments::HEADLESS_ARG = L"--headless"; +const std::wstring_view ConsoleArguments::SERVER_HANDLE_ARG = L"--server"; +const std::wstring_view ConsoleArguments::SIGNAL_HANDLE_ARG = L"--signal"; +const std::wstring_view ConsoleArguments::HANDLE_PREFIX = L"0x"; +const std::wstring_view ConsoleArguments::CLIENT_COMMANDLINE_ARG = L"--"; +const std::wstring_view ConsoleArguments::FORCE_V1_ARG = L"-ForceV1"; +const std::wstring_view ConsoleArguments::FORCE_NO_HANDOFF_ARG = L"-ForceNoHandoff"; +const std::wstring_view ConsoleArguments::FILEPATH_LEADER_PREFIX = L"\\??\\"; +const std::wstring_view ConsoleArguments::WIDTH_ARG = L"--width"; +const std::wstring_view ConsoleArguments::HEIGHT_ARG = L"--height"; +const std::wstring_view ConsoleArguments::INHERIT_CURSOR_ARG = L"--inheritcursor"; +const std::wstring_view ConsoleArguments::RESIZE_QUIRK = L"--resizeQuirk"; +const std::wstring_view ConsoleArguments::FEATURE_ARG = L"--feature"; +const std::wstring_view ConsoleArguments::FEATURE_PTY_ARG = L"pty"; +const std::wstring_view ConsoleArguments::COM_SERVER_ARG = L"-Embedding"; static constexpr std::wstring_view GLYPH_WIDTH{ L"--textMeasurement" }; -static constexpr std::wstring_view COM_SERVER_ARG{ L"-Embedding" }; // NOTE: Thinking about adding more commandline args that control conpty, for // the Terminal? Make sure you add them to the commandline in // ConsoleEstablishHandoff. We use that to initialize the ConsoleArguments for a @@ -203,6 +205,37 @@ void ConsoleArguments::s_ConsumeArg(_Inout_ std::vector& args, _In return (hasNext) ? S_OK : E_INVALIDARG; } +// Routine Description: +// Similar to s_GetArgumentValue. +// Attempts to get the next arg as a "feature" arg - this can be used for +// feature detection. +// If the next arg is not recognized, then we don't support that feature. +// Currently, the only supported feature arg is `pty`, to identify pty support. +// Arguments: +// args: A collection of wstrings representing command-line arguments +// index: the index of the argument of which to get the value for. The value +// should be at (index+1). index will be decremented by one on success. +// pSetting: receives the string at index+1 +// Return Value: +// S_OK if we parsed the string successfully, otherwise E_INVALIDARG indicating +// failure. +[[nodiscard]] HRESULT ConsoleArguments::s_HandleFeatureValue(_Inout_ std::vector& args, _Inout_ size_t& index) +{ + auto hr = E_INVALIDARG; + auto hasNext = (index + 1) < args.size(); + if (hasNext) + { + s_ConsumeArg(args, index); + auto value = args[index]; + if (value == FEATURE_PTY_ARG) + { + hr = S_OK; + } + s_ConsumeArg(args, index); + } + return (hasNext) ? hr : E_INVALIDARG; +} + // Method Description: // Routine Description: // Given the commandline of tokens `args`, tries to find the argument at @@ -353,10 +386,13 @@ void ConsoleArguments::s_ConsumeArg(_Inout_ std::vector& args, _In std::vector args; auto hr = S_OK; + // Make a mutable copy of the commandline for tokenizing + auto copy = _commandline; + // Tokenize the commandline auto argc = 0; wil::unique_hlocal_ptr argv; - argv.reset(CommandLineToArgvW(_commandline.c_str(), &argc)); + argv.reset(CommandLineToArgvW(copy.c_str(), &argc)); RETURN_LAST_ERROR_IF(argv == nullptr); for (auto i = 1; i < argc; ++i) @@ -371,7 +407,7 @@ void ConsoleArguments::s_ConsumeArg(_Inout_ std::vector& args, _In { hr = E_INVALIDARG; - const std::wstring_view arg{ args[i] }; + auto arg = args[i]; if (arg.substr(0, HANDLE_PREFIX.length()) == HANDLE_PREFIX || arg == SERVER_HANDLE_ARG) @@ -380,7 +416,7 @@ void ConsoleArguments::s_ConsumeArg(_Inout_ std::vector& args, _In // --server 0x4 (new method) // 0x4 (legacy method) // If we see >1 of these, it's invalid. - std::wstring serverHandleVal{ arg }; + auto serverHandleVal = arg; if (arg == SERVER_HANDLE_ARG) { @@ -450,6 +486,10 @@ void ConsoleArguments::s_ConsumeArg(_Inout_ std::vector& args, _In { hr = s_GetArgumentValue(args, i, &_height); } + else if (arg == FEATURE_ARG) + { + hr = s_HandleFeatureValue(args, i); + } else if (arg == HEADLESS_ARG) { _headless = true; @@ -580,12 +620,17 @@ HANDLE ConsoleArguments::GetVtOutHandle() const return _vtOutHandle; } -const std::wstring& ConsoleArguments::GetClientCommandline() const +std::wstring ConsoleArguments::GetOriginalCommandLine() const +{ + return _commandline; +} + +std::wstring ConsoleArguments::GetClientCommandline() const { return _clientCommandline; } -const std::wstring& ConsoleArguments::GetVtMode() const +std::wstring ConsoleArguments::GetVtMode() const { return _vtMode; } diff --git a/src/host/ConsoleArguments.hpp b/src/host/ConsoleArguments.hpp index 6d41ed60ffe..3d26ae64905 100644 --- a/src/host/ConsoleArguments.hpp +++ b/src/host/ConsoleArguments.hpp @@ -44,8 +44,9 @@ class ConsoleArguments bool HasSignalHandle() const; HANDLE GetSignalHandle() const; - const std::wstring& GetClientCommandline() const; - const std::wstring& GetVtMode() const; + std::wstring GetOriginalCommandLine() const; + std::wstring GetClientCommandline() const; + std::wstring GetVtMode() const; const std::wstring& GetTextMeasurement() const; bool GetForceV1() const; bool GetForceNoHandoff() const; @@ -59,6 +60,23 @@ class ConsoleArguments void EnableConptyModeForTests(); #endif + static const std::wstring_view VT_MODE_ARG; + static const std::wstring_view HEADLESS_ARG; + static const std::wstring_view SERVER_HANDLE_ARG; + static const std::wstring_view SIGNAL_HANDLE_ARG; + static const std::wstring_view HANDLE_PREFIX; + static const std::wstring_view CLIENT_COMMANDLINE_ARG; + static const std::wstring_view FORCE_V1_ARG; + static const std::wstring_view FORCE_NO_HANDOFF_ARG; + static const std::wstring_view FILEPATH_LEADER_PREFIX; + static const std::wstring_view WIDTH_ARG; + static const std::wstring_view HEIGHT_ARG; + static const std::wstring_view INHERIT_CURSOR_ARG; + static const std::wstring_view RESIZE_QUIRK; + static const std::wstring_view FEATURE_ARG; + static const std::wstring_view FEATURE_PTY_ARG; + static const std::wstring_view COM_SERVER_ARG; + private: #ifdef UNIT_TESTING // This accessor used to create a copy of this class for unit testing comparison ease. @@ -134,6 +152,8 @@ class ConsoleArguments [[nodiscard]] static HRESULT s_GetArgumentValue(_Inout_ std::vector& args, _Inout_ size_t& index, _Out_opt_ short* const pSetting); + [[nodiscard]] static HRESULT s_HandleFeatureValue(_Inout_ std::vector& args, + _Inout_ size_t& index); [[nodiscard]] static HRESULT s_ParseHandleArg(const std::wstring& handleAsText, _Inout_ DWORD& handleAsVal); diff --git a/src/host/VtIo.cpp b/src/host/VtIo.cpp index 4399a97c09e..3170d3a8185 100644 --- a/src/host/VtIo.cpp +++ b/src/host/VtIo.cpp @@ -9,11 +9,11 @@ #include "../renderer/vt/Xterm256Engine.hpp" #include "../renderer/base/renderer.hpp" +#include "../types/inc/CodepointWidthDetector.hpp" #include "../types/inc/utils.hpp" #include "handle.h" // LockConsole #include "input.h" // ProcessCtrlEvents #include "output.h" // CloseConsoleProcessState -#include "../types/inc/CodepointWidthDetector.hpp" using namespace Microsoft::Console; using namespace Microsoft::Console::Render; diff --git a/src/host/inputBuffer.cpp b/src/host/inputBuffer.cpp index 6f60ab1c5bc..a843a01082a 100644 --- a/src/host/inputBuffer.cpp +++ b/src/host/inputBuffer.cpp @@ -810,8 +810,7 @@ bool InputBuffer::_CoalesceEvent(const INPUT_RECORD& inEvent) noexcept // You can't update the repeat count of such a A,B pair, because they're stored as A,A,B,B (down-down, up-up). // I believe the proper approach is to store pairs of characters as pairs, update their combined // repeat count and only when they're being read de-coalesce them into their alternating form. - // TODO:GH#8000 IsGlyphFullWidth was replaced with til::is_surrogate to get rid off the former. Neither approach is fully correct. - !til::is_surrogate(inKey.uChar.UnicodeChar)) + !IsGlyphFullWidth(inKey.uChar.UnicodeChar)) { lastKey.wRepeatCount += inKey.wRepeatCount; return true; diff --git a/src/host/stream.cpp b/src/host/stream.cpp index 62a099f4784..11a7f6a4115 100644 --- a/src/host/stream.cpp +++ b/src/host/stream.cpp @@ -2,11 +2,18 @@ // Licensed under the MIT license. #include "precomp.h" + +#include "_stream.h" #include "stream.h" #include "handle.h" #include "misc.h" #include "readDataRaw.hpp" + +#include "ApiRoutines.h" + +#include "../types/inc/GlyphWidth.hpp" + #include "../interactivity/inc/ServiceLocator.hpp" using Microsoft::Console::Interactivity::ServiceLocator; diff --git a/src/host/ut_host/ConsoleArgumentsTests.cpp b/src/host/ut_host/ConsoleArgumentsTests.cpp index e638705fdba..d640469f236 100644 --- a/src/host/ut_host/ConsoleArgumentsTests.cpp +++ b/src/host/ut_host/ConsoleArgumentsTests.cpp @@ -31,6 +31,7 @@ class ConsoleArgumentsTests TEST_METHOD(HeadlessArgTests); TEST_METHOD(SignalHandleTests); + TEST_METHOD(FeatureArgTests); }; ConsoleArguments CreateAndParse(std::wstring& commandline, HANDLE hVtIn, HANDLE hVtOut) @@ -1143,3 +1144,143 @@ void ConsoleArgumentsTests::SignalHandleTests() false), // runAsComServer false); // successful parse? } + +void ConsoleArgumentsTests::FeatureArgTests() +{ + // Just some assorted positive values that could be valid handles. No specific correlation to anything. + auto hInSample = UlongToHandle(0x10); + auto hOutSample = UlongToHandle(0x24); + + std::wstring commandline; + + commandline = L"conhost.exe --feature pty"; + ArgTestsRunner(L"#1 Normal case, pass a supported feature", + commandline, + hInSample, + hOutSample, + ConsoleArguments(commandline, + L"", + hInSample, + hOutSample, + L"", // vtMode + 0, // width + 0, // height + false, // forceV1 + false, // forceNoHandoff + false, // headless + true, // createServerHandle + 0, // serverHandle + 0, // signalHandle + false, // inheritCursor + false), // runAsComServer + true); // successful parse? + commandline = L"conhost.exe --feature tty"; + ArgTestsRunner(L"#2 Error case, pass an unsupported feature", + commandline, + hInSample, + hOutSample, + ConsoleArguments(commandline, + L"", + hInSample, + hOutSample, + L"", // vtMode + 0, // width + 0, // height + false, // forceV1 + false, // forceNoHandoff + false, // headless + true, // createServerHandle + 0, // serverHandle + 0, // signalHandle + false, // inheritCursor + false), // runAsComServer + false); // successful parse? + + commandline = L"conhost.exe --feature pty --feature pty"; + ArgTestsRunner(L"#3 Many supported features", + commandline, + hInSample, + hOutSample, + ConsoleArguments(commandline, + L"", + hInSample, + hOutSample, + L"", // vtMode + 0, // width + 0, // height + false, // forceV1 + false, // forceNoHandoff + false, // headless + true, // createServerHandle + 0, // serverHandle + 0, // signalHandle + false, // inheritCursor + false), // runAsComServer + true); // successful parse? + + commandline = L"conhost.exe --feature pty --feature tty"; + ArgTestsRunner(L"#4 At least one unsupported feature", + commandline, + hInSample, + hOutSample, + ConsoleArguments(commandline, + L"", + hInSample, + hOutSample, + L"", // vtMode + 0, // width + 0, // height + false, // forceV1 + false, // forceNoHandoff + false, // headless + true, // createServerHandle + 0, // serverHandle + 0, // signalHandle + false, // inheritCursor + false), // runAsComServer + false); // successful parse? + + commandline = L"conhost.exe --feature pty --feature"; + ArgTestsRunner(L"#5 no value to the feature flag", + commandline, + hInSample, + hOutSample, + ConsoleArguments(commandline, + L"", + hInSample, + hOutSample, + L"", // vtMode + 0, // width + 0, // height + false, // forceV1 + false, // forceNoHandoff + false, // headless + true, // createServerHandle + 0, // serverHandle + 0, // signalHandle + false, // inheritCursor + false), // runAsComServer + false); // successful parse? + + commandline = L"conhost.exe --feature pty --feature --signal foo"; + ArgTestsRunner(L"#6 a invalid feature value that is otherwise a valid arg", + commandline, + hInSample, + hOutSample, + ConsoleArguments(commandline, + L"", + hInSample, + hOutSample, + L"", // vtMode + 0, // width + 0, // height + false, // forceV1 + false, // forceNoHandoff + false, // headless + true, // createServerHandle + 0, // serverHandle + 0, // signalHandle + false, // inheritCursor + false), // runAsComServer + false); // successful parse? +} diff --git a/src/propslib/RegistrySerialization.cpp b/src/propslib/RegistrySerialization.cpp index 8f737981a2c..37ff1c34711 100644 --- a/src/propslib/RegistrySerialization.cpp +++ b/src/propslib/RegistrySerialization.cpp @@ -62,7 +62,7 @@ const RegistrySerialization::_RegPropertyMap RegistrySerialization::s_PropertyMa { _RegPropertyType::Boolean, CONSOLE_REGISTRY_TERMINALSCROLLING, SET_FIELD_AND_SIZE(_TerminalScrolling) }, { _RegPropertyType::Boolean, CONSOLE_REGISTRY_USEDX, SET_FIELD_AND_SIZE(_fUseDx) }, { _RegPropertyType::Boolean, CONSOLE_REGISTRY_COPYCOLOR, SET_FIELD_AND_SIZE(_fCopyColor) }, - { _RegPropertyType::Dword, L"TextMeasurement", SET_FIELD_AND_SIZE(_textMeasurement) }, + { _RegPropertyType::Dword, L"TextMeasurement", SET_FIELD_AND_SIZE(_textMeasurement) }, #if TIL_FEATURE_CONHOSTATLASENGINE_ENABLED { _RegPropertyType::Boolean, L"EnableBuiltinGlyphs", SET_FIELD_AND_SIZE(_fEnableBuiltinGlyphs) }, #endif diff --git a/src/terminal/adapter/adaptDispatch.cpp b/src/terminal/adapter/adaptDispatch.cpp index 5da19619d6d..c5a1fd1a2d4 100644 --- a/src/terminal/adapter/adaptDispatch.cpp +++ b/src/terminal/adapter/adaptDispatch.cpp @@ -2061,111 +2061,133 @@ bool AdaptDispatch::ResetMode(const DispatchTypes::ModeParams param) // - True if handled successfully. False otherwise. bool AdaptDispatch::RequestMode(const DispatchTypes::ModeParams param) { - static constexpr auto mapTempBoolState = [](bool enabled) { return enabled ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; }; - static constexpr auto mapPermBoolState = [](bool enabled) { return enabled ? DispatchTypes::DECRPM_PermanentlyEnabled : DispatchTypes::DECRPM_PermanentlyDisabled; }; - auto state = DispatchTypes::DECRPM_Unsupported; + VTInt state = DispatchTypes::DECRPM_Unsupported; switch (param) { case DispatchTypes::ModeParams::IRM_InsertReplaceMode: - state = mapTempBoolState(_modes.test(Mode::InsertReplace)); + state = _modes.test(Mode::InsertReplace) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::LNM_LineFeedNewLineMode: // VT apps expect that the system and input modes are the same, so if // they become out of sync, we just act as if LNM mode isn't supported. if (_api.GetSystemMode(ITerminalApi::Mode::LineFeed) == _terminalInput.GetInputMode(TerminalInput::Mode::LineFeed)) { - state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::LineFeed)); + state = _terminalInput.GetInputMode(TerminalInput::Mode::LineFeed) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; } break; case DispatchTypes::ModeParams::DECCKM_CursorKeysMode: - state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::CursorKey)); + state = _terminalInput.GetInputMode(TerminalInput::Mode::CursorKey) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::DECANM_AnsiMode: - state = mapTempBoolState(_api.GetStateMachine().GetParserMode(StateMachine::Mode::Ansi)); + state = _api.GetStateMachine().GetParserMode(StateMachine::Mode::Ansi) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::DECCOLM_SetNumberOfColumns: // DECCOLM is not supported in conpty mode if (!_api.IsConsolePty()) { - state = mapTempBoolState(_modes.test(Mode::Column)); + state = _modes.test(Mode::Column) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; } break; case DispatchTypes::ModeParams::DECSCNM_ScreenMode: - state = mapTempBoolState(_renderSettings.GetRenderMode(RenderSettings::Mode::ScreenReversed)); + state = _renderSettings.GetRenderMode(RenderSettings::Mode::ScreenReversed) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::DECOM_OriginMode: - state = mapTempBoolState(_modes.test(Mode::Origin)); + state = _modes.test(Mode::Origin) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::DECAWM_AutoWrapMode: - state = mapTempBoolState(_api.GetSystemMode(ITerminalApi::Mode::AutoWrap)); + state = _api.GetSystemMode(ITerminalApi::Mode::AutoWrap) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::DECARM_AutoRepeatMode: - state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::AutoRepeat)); + state = _terminalInput.GetInputMode(TerminalInput::Mode::AutoRepeat) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::ATT610_StartCursorBlink: - state = mapTempBoolState(_pages.ActivePage().Cursor().IsBlinkingAllowed()); + state = _pages.ActivePage().Cursor().IsBlinkingAllowed() ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::DECTCEM_TextCursorEnableMode: - state = mapTempBoolState(_pages.ActivePage().Cursor().IsVisible()); + state = _pages.ActivePage().Cursor().IsVisible() ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::XTERM_EnableDECCOLMSupport: // DECCOLM is not supported in conpty mode if (!_api.IsConsolePty()) { - state = mapTempBoolState(_modes.test(Mode::AllowDECCOLM)); + state = _modes.test(Mode::AllowDECCOLM) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; } break; case DispatchTypes::ModeParams::DECPCCM_PageCursorCouplingMode: - state = mapTempBoolState(_modes.test(Mode::PageCursorCoupling)); + state = _modes.test(Mode::PageCursorCoupling) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::DECNKM_NumericKeypadMode: - state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::Keypad)); + state = _terminalInput.GetInputMode(TerminalInput::Mode::Keypad) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::DECBKM_BackarrowKeyMode: - state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::BackarrowKey)); + state = _terminalInput.GetInputMode(TerminalInput::Mode::BackarrowKey) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::DECLRMM_LeftRightMarginMode: - state = mapTempBoolState(_modes.test(Mode::AllowDECSLRM)); + state = _modes.test(Mode::AllowDECSLRM) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::DECECM_EraseColorMode: - state = mapTempBoolState(_modes.test(Mode::EraseColor)); + state = _modes.test(Mode::EraseColor) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::VT200_MOUSE_MODE: - state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::DefaultMouseTracking)); + state = _terminalInput.GetInputMode(TerminalInput::Mode::DefaultMouseTracking) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::BUTTON_EVENT_MOUSE_MODE: - state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::ButtonEventMouseTracking)); + state = _terminalInput.GetInputMode(TerminalInput::Mode::ButtonEventMouseTracking) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::ANY_EVENT_MOUSE_MODE: - state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::AnyEventMouseTracking)); + state = _terminalInput.GetInputMode(TerminalInput::Mode::AnyEventMouseTracking) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::UTF8_EXTENDED_MODE: - state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::Utf8MouseEncoding)); + state = _terminalInput.GetInputMode(TerminalInput::Mode::Utf8MouseEncoding) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::SGR_EXTENDED_MODE: - state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::SgrMouseEncoding)); + state = _terminalInput.GetInputMode(TerminalInput::Mode::SgrMouseEncoding) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::FOCUS_EVENT_MODE: - state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::FocusEvent)); + state = _terminalInput.GetInputMode(TerminalInput::Mode::FocusEvent) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::ALTERNATE_SCROLL: - state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::AlternateScroll)); + state = _terminalInput.GetInputMode(TerminalInput::Mode::AlternateScroll) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::ASB_AlternateScreenBuffer: - state = mapTempBoolState(_usingAltBuffer); + state = _usingAltBuffer ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::XTERM_BracketedPasteMode: - state = mapTempBoolState(_api.GetSystemMode(ITerminalApi::Mode::BracketedPaste)); + state = _api.GetSystemMode(ITerminalApi::Mode::BracketedPaste) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; case DispatchTypes::ModeParams::GCM_GraphemeClusterMode: - { - const auto mode = CodepointWidthDetector::Singleton().GetMode(); - state = mapPermBoolState(mode == TextMeasurementMode::Graphemes); + state = CodepointWidthDetector::Singleton().GetMode() == TextMeasurementMode::Graphemes ? DispatchTypes::DECRPM_PermanentlyEnabled : DispatchTypes::DECRPM_PermanentlyDisabled; break; - } case DispatchTypes::ModeParams::W32IM_Win32InputMode: - state = mapTempBoolState(_terminalInput.GetInputMode(TerminalInput::Mode::Win32)); + state = _terminalInput.GetInputMode(TerminalInput::Mode::Win32) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; + ; break; default: break; diff --git a/src/terminal/adapter/ut_adapter/adapterTest.cpp b/src/terminal/adapter/ut_adapter/adapterTest.cpp index c0ff50e5748..612f2d11c77 100644 --- a/src/terminal/adapter/ut_adapter/adapterTest.cpp +++ b/src/terminal/adapter/ut_adapter/adapterTest.cpp @@ -2059,6 +2059,25 @@ class AdapterTest _testGetSet->ValidateInputEvent(expectedResponse); } + TEST_METHOD(RequestPermanentModeTests) + { + BEGIN_TEST_METHOD_PROPERTIES() + TEST_METHOD_PROPERTY(L"Data:modeNumber", L"{2027}") + END_TEST_METHOD_PROPERTIES() + + VTInt modeNumber; + VERIFY_SUCCEEDED_RETURN(TestData::TryGetValue(L"modeNumber", modeNumber)); + const auto mode = DispatchTypes::DECPrivateMode(modeNumber); + + _testGetSet->PrepData(); + VERIFY_IS_TRUE(_pDispatch->ResetMode(mode)); // as a test to ensure that it stays permanently enabled (= 3) + VERIFY_IS_TRUE(_pDispatch->RequestMode(mode)); + + wchar_t expectedResponse[20]; + swprintf_s(expectedResponse, ARRAYSIZE(expectedResponse), L"\x1b[?%d;3$y", modeNumber); + _testGetSet->ValidateInputEvent(expectedResponse); + } + TEST_METHOD(RequestChecksumReportTests) { const auto requestChecksumReport = [this](const auto length) { diff --git a/src/winconpty/winconpty.cpp b/src/winconpty/winconpty.cpp index 4bbc6b910ea..e3a915447a6 100644 --- a/src/winconpty/winconpty.cpp +++ b/src/winconpty/winconpty.cpp @@ -134,6 +134,7 @@ HRESULT _CreatePseudoConsole(const HANDLE hToken, // GH4061: Ensure that the path to executable in the format is escaped so C:\Program.exe cannot collide with C:\Program Files // This is plenty of space to hold the formatted string + wchar_t cmd[MAX_PATH]{}; const BOOL bInheritCursor = (dwFlags & PSEUDOCONSOLE_INHERIT_CURSOR) == PSEUDOCONSOLE_INHERIT_CURSOR; const BOOL bResizeQuirk = (dwFlags & PSEUDOCONSOLE_RESIZE_QUIRK) == PSEUDOCONSOLE_RESIZE_QUIRK; @@ -154,18 +155,17 @@ HRESULT _CreatePseudoConsole(const HANDLE hToken, break; } - wil::unique_process_heap_string cmd; - RETURN_IF_FAILED(wil::str_printf_nothrow( - cmd, - L"\"%s\" --headless %s%s%s--width %hd --height %hd --signal 0x%tx --server 0x%tx", - _ConsoleHostPath(), - bInheritCursor ? L"--inheritcursor " : L"", - bResizeQuirk ? L"--resizeQuirk " : L"", - textMeasurement, - size.X, - size.Y, - std::bit_cast(signalPipeConhostSide.get()), - std::bit_cast(serverHandle.get()))); + swprintf_s(cmd, + MAX_PATH, + L"\"%s\" --headless %s%s%s--width %hd --height %hd --signal 0x%tx --server 0x%tx", + _ConsoleHostPath(), + bInheritCursor ? L"--inheritcursor " : L"", + bResizeQuirk ? L"--resizeQuirk " : L"", + textMeasurement, + size.X, + size.Y, + std::bit_cast(signalPipeConhostSide.get()), + std::bit_cast(serverHandle.get())); STARTUPINFOEXW siEx{ 0 }; siEx.StartupInfo.cb = sizeof(STARTUPINFOEXW); @@ -221,7 +221,7 @@ HRESULT _CreatePseudoConsole(const HANDLE hToken, { // Call create process RETURN_IF_WIN32_BOOL_FALSE(CreateProcessW(_ConsoleHostPath(), - cmd.get(), + cmd, nullptr, nullptr, TRUE, @@ -236,7 +236,7 @@ HRESULT _CreatePseudoConsole(const HANDLE hToken, // Call create process RETURN_IF_WIN32_BOOL_FALSE(CreateProcessAsUserW(hToken, _ConsoleHostPath(), - cmd.get(), + cmd, nullptr, nullptr, TRUE, From bb47e9ea9aa3fa9a51aa86a4004fd855d2422d4e Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Mon, 3 Jun 2024 14:54:33 +0200 Subject: [PATCH 10/14] Less diff, again --- src/buffer/out/Row.cpp | 65 ++++++++++++++++++++++-------------------- src/buffer/out/Row.hpp | 2 +- 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/src/buffer/out/Row.cpp b/src/buffer/out/Row.cpp index 4bef233c179..ce6ff63dec4 100644 --- a/src/buffer/out/Row.cpp +++ b/src/buffer/out/Row.cpp @@ -641,34 +641,34 @@ catch (...) // // We can infer the "end" from the amount of columns we're given (colLimit - colBeg), // because ASCII is always 1 column wide per character. - auto len = std::min(chars.size(), colLimit - colEnd); + auto it = chars.begin(); + const auto end = it + std::min(chars.size(), colLimit - colBeg); size_t ch = chBeg; - size_t off = 0; - for (; off < len; ++off) + while (it != end) { - if (chars[off] >= 0x80) [[unlikely]] + if (*it >= 0x80) [[unlikely]] { - _replaceTextUnicode(ch, off); + _replaceTextUnicode(ch, it); return; } til::at(row._charOffsets, colEnd) = gsl::narrow_cast(ch); ++colEnd; ++ch; + ++it; } colEndDirty = colEnd; charsConsumed = ch - chBeg; } -[[msvc::forceinline]] void ROW::WriteHelper::_replaceTextUnicode(size_t ch, size_t off) noexcept +[[msvc::forceinline]] void ROW::WriteHelper::_replaceTextUnicode(size_t ch, std::wstring_view::const_iterator it) noexcept { auto& cwd = CodepointWidthDetector::Singleton(); - const auto len = chars.size(); // Check if the new text joins with the existing contents of the row to form a single grapheme cluster. - if (off == 0) + if (it == chars.begin()) { auto colPrev = colBeg; while (colPrev > 0 && row._uncheckedIsTrailer(--colPrev)) @@ -706,7 +706,7 @@ catch (...) } ch += state.len; - off += state.len; + it += state.len; } } else @@ -716,35 +716,38 @@ catch (...) // and let MeasureNext() find the next proper grapheme boundary. --colEnd; --ch; - --off; + --it; } - GraphemeState state{ .beg = chars.data() + off }; - - while (off < len) + if (const auto end = chars.end(); it != end) { - cwd.GraphemeNext(state, chars); + GraphemeState state{ .beg = &*it }; - const auto colEndNew = gsl::narrow_cast(colEnd + state.width); - if (colEndNew > colLimit) + do { - colEndDirty = colLimit; - charsConsumed = ch - chBeg; - return; - } + cwd.GraphemeNext(state, chars); - // Fill our char-offset buffer with 1 entry containing the mapping from the - // current column (colEnd) to the start of the glyph in the string (ch)... - til::at(row._charOffsets, colEnd++) = gsl::narrow_cast(ch); - // ...followed by 0-N entries containing an indication that the - // columns are just a wide-glyph extension of the preceding one. - while (colEnd < colEndNew) - { - til::at(row._charOffsets, colEnd++) = gsl::narrow_cast(ch | CharOffsetsTrailer); - } + const auto colEndNew = gsl::narrow_cast(colEnd + state.width); + if (colEndNew > colLimit) + { + colEndDirty = colLimit; + charsConsumed = ch - chBeg; + return; + } - ch += state.len; - off += state.len; + // Fill our char-offset buffer with 1 entry containing the mapping from the + // current column (colEnd) to the start of the glyph in the string (ch)... + til::at(row._charOffsets, colEnd++) = gsl::narrow_cast(ch); + // ...followed by 0-N entries containing an indication that the + // columns are just a wide-glyph extension of the preceding one. + while (colEnd < colEndNew) + { + til::at(row._charOffsets, colEnd++) = gsl::narrow_cast(ch | CharOffsetsTrailer); + } + + ch += state.len; + it += state.len; + } while (it != end); } colEndDirty = colEnd; diff --git a/src/buffer/out/Row.hpp b/src/buffer/out/Row.hpp index 995ee3ac4c2..a13e6e7996a 100644 --- a/src/buffer/out/Row.hpp +++ b/src/buffer/out/Row.hpp @@ -186,7 +186,7 @@ class ROW final bool IsValid() const noexcept; void ReplaceCharacters(til::CoordType width) noexcept; void ReplaceText() noexcept; - void _replaceTextUnicode(size_t ch, size_t off) noexcept; + void _replaceTextUnicode(size_t ch, std::wstring_view::const_iterator it) noexcept; void CopyTextFrom(const std::span& charOffsets) noexcept; static void _copyOffsets(uint16_t* dst, const uint16_t* src, uint16_t size, uint16_t offset) noexcept; void Finish(); From c01f510611fa13bbc116beada2801479fa4d3527 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 5 Jun 2024 17:19:28 +0200 Subject: [PATCH 11/14] Fix settings reload, Fix chunked clustering --- src/buffer/out/Row.cpp | 6 +- src/cascadia/TerminalApp/AppLogic.cpp | 17 ++- src/types/CodepointWidthDetector.cpp | 138 +++++++++++------- src/types/inc/CodepointWidthDetector.hpp | 5 +- .../ut_types/CodepointWidthDetectorTests.cpp | 103 +++++++++---- 5 files changed, 184 insertions(+), 85 deletions(-) diff --git a/src/buffer/out/Row.cpp b/src/buffer/out/Row.cpp index ce6ff63dec4..a58f1cbf02c 100644 --- a/src/buffer/out/Row.cpp +++ b/src/buffer/out/Row.cpp @@ -687,7 +687,8 @@ catch (...) colBegDirty = colPrev; colEnd = colPrev; - const auto colEndNew = gsl::narrow_cast(colEnd + state.width); + const auto width = std::max(1, state.width); + const auto colEndNew = gsl::narrow_cast(colEnd + width); if (colEndNew > colLimit) { colEndDirty = colLimit; @@ -727,7 +728,8 @@ catch (...) { cwd.GraphemeNext(state, chars); - const auto colEndNew = gsl::narrow_cast(colEnd + state.width); + const auto width = std::max(1, state.width); + const auto colEndNew = gsl::narrow_cast(colEnd + width); if (colEndNew > colLimit) { colEndDirty = colLimit; diff --git a/src/cascadia/TerminalApp/AppLogic.cpp b/src/cascadia/TerminalApp/AppLogic.cpp index 840c1db9272..dc11b506449 100644 --- a/src/cascadia/TerminalApp/AppLogic.cpp +++ b/src/cascadia/TerminalApp/AppLogic.cpp @@ -3,18 +3,16 @@ #include "pch.h" #include "AppLogic.h" +#include "../inc/WindowingBehavior.h" +#include "AppLogic.g.cpp" +#include "FindTargetWindowResult.g.cpp" +#include "SettingsLoadEventArgs.h" #include #include #include -#include "SettingsLoadEventArgs.h" -#include "../../types/inc/CodepointWidthDetector.hpp" #include "../../types/inc/utils.hpp" -#include "../inc/WindowingBehavior.h" - -#include "AppLogic.g.cpp" -#include "FindTargetWindowResult.g.cpp" using namespace winrt::Windows::ApplicationModel; using namespace winrt::Windows::ApplicationModel::DataTransfer; @@ -435,6 +433,13 @@ namespace winrt::TerminalApp::implementation } } + if (initialLoad) + { + // Register for directory change notification. + _RegisterSettingsChange(); + return; + } + // Here, we successfully reloaded the settings, and created a new // TerminalSettings object. diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index 7be83e7de95..933f27683e1 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -696,7 +696,7 @@ bool CodepointWidthDetector::GraphemeNext(GraphemeState& s, const std::wstring_v auto clusterEnd = clusterBeg; int state = s._state; - int totalWidth = 0; + int width = 0; int lead; char32_t cp; @@ -706,7 +706,7 @@ bool CodepointWidthDetector::GraphemeNext(GraphemeState& s, const std::wstring_v { state = ~state; lead = s._last; - totalWidth = s._totalWidth; + width = s.width; goto fetchNext; } @@ -725,13 +725,13 @@ bool CodepointWidthDetector::GraphemeNext(GraphemeState& s, const std::wstring_v // U+FE0F Variation Selector-16 is used to turn unqualified Emojis into qualified ones. // By convention, this turns them from being ambiguous width (= narrow) into wide ones. // We achieve this here by explicitly giving this codepoint a wide width. - // Later down below we'll clamp totalWidth back to <= 2. + // Later down below we'll clamp width back to <= 2. if (cp == 0xFE0F) { w = 2; } - totalWidth += w; + width += w; } if (clusterEnd >= end) @@ -757,14 +757,13 @@ bool CodepointWidthDetector::GraphemeNext(GraphemeState& s, const std::wstring_v } state = ~state; - totalWidth = totalWidth > 2 ? 2 : totalWidth; + width = width > 2 ? 2 : width; s.beg = clusterBeg; s.len = static_cast(clusterEnd - clusterBeg); - s.width = totalWidth < 1 ? 1 : totalWidth; + s.width = width; s._state = state; s._last = lead; - s._totalWidth = totalWidth; return clusterEnd < end; } @@ -793,7 +792,7 @@ bool CodepointWidthDetector::GraphemePrev(GraphemeState& s, const std::wstring_v auto clusterBeg = clusterEnd; int state = s._state; - int totalWidth = 0; + int width = 0; int trail; char32_t cp; @@ -803,7 +802,7 @@ bool CodepointWidthDetector::GraphemePrev(GraphemeState& s, const std::wstring_v { state = ~state; trail = s._last; - totalWidth = s._totalWidth; + width = s.width; goto fetchNext; } @@ -822,13 +821,13 @@ bool CodepointWidthDetector::GraphemePrev(GraphemeState& s, const std::wstring_v // U+FE0F Variation Selector-16 is used to turn unqualified Emojis into qualified ones. // By convention, this turns them from being ambiguous width (= narrow) into wide ones. // We achieve this here by explicitly giving this codepoint a wide width. - // Later down below we'll clamp totalWidth back to <= 2. + // Later down below we'll clamp width back to <= 2. if (cp == 0xFE0F) { w = 2; } - totalWidth += w; + width += w; } if (clusterBeg <= beg) @@ -854,14 +853,13 @@ bool CodepointWidthDetector::GraphemePrev(GraphemeState& s, const std::wstring_v } state = ~state; - totalWidth = totalWidth > 2 ? 2 : totalWidth; + width = width > 2 ? 2 : width; s.beg = clusterBeg; s.len = static_cast(clusterEnd - clusterBeg); - s.width = totalWidth < 1 ? 1 : totalWidth; + s.width = width; s._state = state; s._last = trail; - s._totalWidth = totalWidth; return clusterBeg > beg; } @@ -873,13 +871,18 @@ __declspec(noinline) bool CodepointWidthDetector::_graphemeNextWcswidth(Grapheme } auto clusterEnd = clusterBeg; - int state = s._state; - int totalWidth = 0; + auto width = s.width; + auto state = s._state; + + if (state == 0) + { + width = 0; + } for (;;) { char32_t cp; - const auto it2 = utf16NextOrFFFD(clusterEnd, end, cp); + const auto clusterEndNext = utf16NextOrFFFD(clusterEnd, end, cp); const auto val = ucdLookup(cp); auto w = ucdToCharacterWidth(val); @@ -894,9 +897,9 @@ __declspec(noinline) bool CodepointWidthDetector::_graphemeNextWcswidth(Grapheme break; } + width += w; state = 1; - totalWidth += w; - clusterEnd = it2; + clusterEnd = clusterEndNext; if (clusterEnd >= end) { @@ -906,7 +909,7 @@ __declspec(noinline) bool CodepointWidthDetector::_graphemeNextWcswidth(Grapheme s.beg = clusterBeg; s.len = static_cast(clusterEnd - clusterBeg); - s.width = totalWidth < 1 ? 1 : (totalWidth > 2 ? 2 : totalWidth); + s.width = width; s._state = state; return clusterEnd < end; } @@ -915,72 +918,109 @@ __declspec(noinline) bool CodepointWidthDetector::_graphemePrevWcswidth(Grapheme { if (_mode != TextMeasurementMode::Wcswidth) { - return _graphemeNextConsole(s, beg, clusterEnd); + return _graphemePrevConsole(s, beg, clusterEnd); } auto clusterBeg = clusterEnd; - int totalWidth = 0; + auto width = s.width; + int state = 0; - for (;;) + // Insert a `.len = 0` result if we previously returned false but had a complete cluster (`.width > 0`). + // This will indicate to the caller that the previous cluster was complete, just like how the grapheme algorithm works. + if (s._state == 0) { - char32_t cp; - clusterBeg = utf16PrevOrFFFD(clusterBeg, beg, cp); - const auto val = ucdLookup(cp); + width = 0; - auto w = ucdToCharacterWidth(val); - if (w == 3) + for (;;) { - w = _ambiguousWidth; - } + char32_t cp; + clusterBeg = utf16PrevOrFFFD(clusterBeg, beg, cp); + const auto val = ucdLookup(cp); + + auto w = ucdToCharacterWidth(val); + if (w == 3) + { + w = _ambiguousWidth; + } - totalWidth += w; + width += w; - if (w != 0 || clusterBeg <= beg) - { - break; + const auto hasWidth = width != 0; + const auto atEnd = clusterBeg <= beg; + + if (hasWidth || atEnd) + { + state = hasWidth && atEnd; + break; + } } } s.beg = clusterBeg; s.len = static_cast(clusterEnd - clusterBeg); - s.width = totalWidth; + s.width = width; + s._state = state; return clusterBeg > beg; } bool CodepointWidthDetector::_graphemeNextConsole(GraphemeState& s, const wchar_t* end, const wchar_t* clusterBeg) noexcept { - char32_t cp; - const auto clusterEnd = utf16NextOrFFFD(clusterBeg, end, cp); + auto clusterEnd = clusterBeg; + auto width = s.width; + int state = 0; - const auto val = ucdLookup(cp); - auto width = ucdToCharacterWidth(val); - if (width == 3) + // Insert a `.len = 0` result if we previously returned false but had a complete cluster (`.width > 0`). + // This will indicate to the caller that the previous cluster was complete, just like how the grapheme algorithm works. + if (s._state == 0) { - width = _checkFallbackViaCache(cp); + char32_t cp; + clusterEnd = utf16NextOrFFFD(clusterEnd, end, cp); + + const auto val = ucdLookup(cp); + width = ucdToCharacterWidth(val); + if (width == 3) + { + width = _checkFallbackViaCache(cp); + } + + state = clusterEnd >= end; } s.beg = clusterBeg; s.len = static_cast(clusterEnd - clusterBeg); s.width = width; - return clusterEnd < end; + s._state = state; + return state == 0; } bool CodepointWidthDetector::_graphemePrevConsole(GraphemeState& s, const wchar_t* beg, const wchar_t* clusterEnd) noexcept { - char32_t cp; - const auto clusterBeg = utf16PrevOrFFFD(clusterEnd, beg, cp); + auto clusterBeg = clusterEnd; + auto width = s.width; + int state = 0; - const auto val = ucdLookup(cp); - auto width = ucdToCharacterWidth(val); - if (width == 3) + // Insert a `.len = 0` result if we previously returned false but had a complete cluster (`.width > 0`). + // This will indicate to the caller that the previous cluster was complete, just like how the grapheme algorithm works. + if (s._state == 0) { - width = _checkFallbackViaCache(cp); + char32_t cp; + clusterBeg = utf16PrevOrFFFD(clusterEnd, beg, cp); + + const auto val = ucdLookup(cp); + width = ucdToCharacterWidth(val); + if (width == 3) + { + width = _checkFallbackViaCache(cp); + } + + state = clusterBeg <= beg; } s.beg = clusterBeg; s.len = static_cast(clusterEnd - clusterBeg); s.width = width; - return clusterBeg > beg; + s._state = state; + return state == 0; } // Call the function specified via SetFallbackMethod() to turn ambiguous (width = 3) into narrow/wide. diff --git a/src/types/inc/CodepointWidthDetector.hpp b/src/types/inc/CodepointWidthDetector.hpp index cf1e70aa9ac..b63d805e50b 100644 --- a/src/types/inc/CodepointWidthDetector.hpp +++ b/src/types/inc/CodepointWidthDetector.hpp @@ -36,8 +36,8 @@ struct GraphemeState // You know these two belong together because the first call returned false. // The total width is not 1+2 but rather just 2. const wchar_t* beg = nullptr; - size_t len = 0; - // width will always be either 1 or 2. + int len = 0; + // width will always be between 0 or 2. int width = 0; // If GraphemeNext/Prev return false (= reached the end of the string), they'll fill these struct @@ -45,7 +45,6 @@ struct GraphemeState // _state is stored ~flipped, so that we can differentiate between it being unset (0) and it being set to 0 (~0 = 255). int _state = 0; int _last = 0; - int _totalWidth = 0; }; struct CodepointWidthDetector diff --git a/src/types/ut_types/CodepointWidthDetectorTests.cpp b/src/types/ut_types/CodepointWidthDetectorTests.cpp index e1f9879951b..cd849611517 100644 --- a/src/types/ut_types/CodepointWidthDetectorTests.cpp +++ b/src/types/ut_types/CodepointWidthDetectorTests.cpp @@ -1331,39 +1331,92 @@ class CodepointWidthDetectorTests VERIFY_ARE_EQUAL(expectedWidths, actualWidths); } - TEST_METHOD(DevanagariConjunctLinker) + TEST_METHOD(ChunkedText) { - static constexpr std::wstring_view text{ L"\u0915\u094D\u094D\u0924" }; + struct Test + { + TextMeasurementMode mode; + std::vector advancesNext; + std::vector widthsNext; + std::vector advancesPrev; + std::vector widthsPrev; + }; + const std::array tests{ + Test{ + .mode = TextMeasurementMode::Graphemes, + .advancesNext = { 3, 3, 0, 1 }, + .widthsNext = { 2, 2, 2, 1 }, + .advancesPrev = { 1, 0, 3, 3 }, + .widthsPrev = { 1, 1, 2, 2 }, - auto& cwd = CodepointWidthDetector::Singleton(); + }, + Test{ + .mode = TextMeasurementMode::Wcswidth, + .advancesNext = { 3, 1, 2, 0, 1 }, + .widthsNext = { 1, 1, 2, 2, 1 }, + .advancesPrev = { 1, 0, 2, 1, 3 }, + .widthsPrev = { 1, 1, 2, 0, 1 }, + }, + Test{ + .mode = TextMeasurementMode::Console, + .advancesNext = { 2, 1, 0, 1, 2, 0, 1 }, + .widthsNext = { 1, 0, 0, 0, 2, 2, 1 }, + .advancesPrev = { 1, 0, 2, 1, 0, 1, 2 }, + .widthsPrev = { 1, 1, 2, 0, 0, 0, 1 }, + }, + }; + // That's a fully qualified rainbow flag followed by a single "a" character. + static constexpr std::array chunks{ + std::wstring_view{ L"\U0001F3F3\uFE0F" }, + std::wstring_view{ L"\u200D\U0001F308" }, + std::wstring_view{ L"a" }, + }; + + CodepointWidthDetector cwd; GraphemeState state; - cwd.GraphemeNext(state, text); - VERIFY_ARE_EQUAL(4u, state.len); - VERIFY_ARE_EQUAL(2, state.width); - } + std::vector actualAdvances; + std::vector actualWidths; - TEST_METHOD(ChunkedText) - { - static constexpr std::wstring_view text{ L"\u0915\u094D\u094D\u0924" }; + for (const auto& test : tests) + { + cwd.Reset(test.mode); - auto& cwd = CodepointWidthDetector::Singleton(); - bool ok = false; - GraphemeState state; + state = {}; + actualAdvances.clear(); + actualWidths.clear(); + + for (int i = 0; i < 3; i++) + { + bool ok; + do + { + ok = cwd.GraphemeNext(state, chunks[i]); + actualAdvances.emplace_back(state.len); + actualWidths.emplace_back(state.width); + } while (ok); + } - ok = cwd.GraphemeNext(state, L"\u2620"); - VERIFY_IS_FALSE(ok); - VERIFY_ARE_EQUAL(1u, state.len); - VERIFY_ARE_EQUAL(1, state.width); + VERIFY_ARE_EQUAL(test.advancesNext, actualAdvances); + VERIFY_ARE_EQUAL(test.widthsNext, actualWidths); - ok = cwd.GraphemeNext(state, L"\uFE0F"); - VERIFY_IS_FALSE(ok); - VERIFY_ARE_EQUAL(1u, state.len); - VERIFY_ARE_EQUAL(2, state.width); + state = {}; + actualAdvances.clear(); + actualWidths.clear(); - ok = cwd.GraphemeNext(state, L"a"); - VERIFY_IS_TRUE(ok); - VERIFY_ARE_EQUAL(0u, state.len); - VERIFY_ARE_EQUAL(2, state.width); + for (int i = 2; i >= 0; i--) + { + bool ok; + do + { + ok = cwd.GraphemePrev(state, chunks[i]); + actualAdvances.emplace_back(state.len); + actualWidths.emplace_back(state.width); + } while (ok); + } + + VERIFY_ARE_EQUAL(test.advancesPrev, actualAdvances); + VERIFY_ARE_EQUAL(test.widthsPrev, actualWidths); + } } }; From 6b964d4772e88683e3ba4fe2e909d1526e626c69 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 20 Jun 2024 16:01:55 +0200 Subject: [PATCH 12/14] Address feedback + 1 "duh" moment --- src/cascadia/TerminalApp/TerminalPage.cpp | 20 ++---- src/terminal/adapter/adaptDispatch.cpp | 86 ++++++++--------------- src/tools/GraphemeTableGen/Program.cs | 82 ++++++++++----------- 3 files changed, 77 insertions(+), 111 deletions(-) diff --git a/src/cascadia/TerminalApp/TerminalPage.cpp b/src/cascadia/TerminalApp/TerminalPage.cpp index caddeac2d28..faad3d5ace3 100644 --- a/src/cascadia/TerminalApp/TerminalPage.cpp +++ b/src/cascadia/TerminalApp/TerminalPage.cpp @@ -1256,24 +1256,18 @@ namespace winrt::TerminalApp::implementation TerminalSettings settings, const bool inheritCursor) { - // The only way to create string references to literals in WinRT is through std::optional. Fun! - static std::optional textMeasurement; - static const auto textMeasurementInit = [&]() { + static const auto textMeasurement = [&]() -> std::wstring_view { switch (_settings.GlobalSettings().TextMeasurement()) { case TextMeasurement::Graphemes: - textMeasurement.emplace(L"graphemes"); - break; + return L"graphemes"; case TextMeasurement::Wcswidth: - textMeasurement.emplace(L"wcswidth"); - break; + return L"wcswidth"; case TextMeasurement::Console: - textMeasurement.emplace(L"console"); - break; + return L"console"; default: - break; + return {}; } - return true; }(); TerminalConnection::ITerminalConnection connection{ nullptr }; @@ -1347,9 +1341,9 @@ namespace winrt::TerminalApp::implementation } } - if (textMeasurement) + if (!textMeasurement.empty()) { - valueSet.Insert(L"textMeasurement", Windows::Foundation::PropertyValue::CreateString(*textMeasurement)); + valueSet.Insert(L"textMeasurement", Windows::Foundation::PropertyValue::CreateString(textMeasurement)); } if (const auto id = settings.SessionId(); id != winrt::guid{}) diff --git a/src/terminal/adapter/adaptDispatch.cpp b/src/terminal/adapter/adaptDispatch.cpp index b8623a200e7..25fa70ed205 100644 --- a/src/terminal/adapter/adaptDispatch.cpp +++ b/src/terminal/adapter/adaptDispatch.cpp @@ -2068,133 +2068,109 @@ bool AdaptDispatch::ResetMode(const DispatchTypes::ModeParams param) // - True if handled successfully. False otherwise. bool AdaptDispatch::RequestMode(const DispatchTypes::ModeParams param) { + static constexpr auto mapTemp = [](const bool b) { return b ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; }; + static constexpr auto mapPerm = [](const bool b) { return b ? DispatchTypes::DECRPM_PermanentlyEnabled : DispatchTypes::DECRPM_PermanentlyDisabled; }; + VTInt state = DispatchTypes::DECRPM_Unsupported; switch (param) { case DispatchTypes::ModeParams::IRM_InsertReplaceMode: - state = _modes.test(Mode::InsertReplace) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_modes.test(Mode::InsertReplace)); break; case DispatchTypes::ModeParams::LNM_LineFeedNewLineMode: // VT apps expect that the system and input modes are the same, so if // they become out of sync, we just act as if LNM mode isn't supported. if (_api.GetSystemMode(ITerminalApi::Mode::LineFeed) == _terminalInput.GetInputMode(TerminalInput::Mode::LineFeed)) { - state = _terminalInput.GetInputMode(TerminalInput::Mode::LineFeed) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::LineFeed)); } break; case DispatchTypes::ModeParams::DECCKM_CursorKeysMode: - state = _terminalInput.GetInputMode(TerminalInput::Mode::CursorKey) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::CursorKey)); break; case DispatchTypes::ModeParams::DECANM_AnsiMode: - state = _api.GetStateMachine().GetParserMode(StateMachine::Mode::Ansi) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_api.GetStateMachine().GetParserMode(StateMachine::Mode::Ansi)); break; case DispatchTypes::ModeParams::DECCOLM_SetNumberOfColumns: // DECCOLM is not supported in conpty mode if (!_api.IsConsolePty()) { - state = _modes.test(Mode::Column) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_modes.test(Mode::Column)); } break; case DispatchTypes::ModeParams::DECSCNM_ScreenMode: - state = _renderSettings.GetRenderMode(RenderSettings::Mode::ScreenReversed) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_renderSettings.GetRenderMode(RenderSettings::Mode::ScreenReversed)); break; case DispatchTypes::ModeParams::DECOM_OriginMode: - state = _modes.test(Mode::Origin) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_modes.test(Mode::Origin)); break; case DispatchTypes::ModeParams::DECAWM_AutoWrapMode: - state = _api.GetSystemMode(ITerminalApi::Mode::AutoWrap) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_api.GetSystemMode(ITerminalApi::Mode::AutoWrap)); break; case DispatchTypes::ModeParams::DECARM_AutoRepeatMode: - state = _terminalInput.GetInputMode(TerminalInput::Mode::AutoRepeat) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::AutoRepeat)); break; case DispatchTypes::ModeParams::ATT610_StartCursorBlink: - state = _pages.ActivePage().Cursor().IsBlinkingAllowed() ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_pages.ActivePage().Cursor().IsBlinkingAllowed()); break; case DispatchTypes::ModeParams::DECTCEM_TextCursorEnableMode: - state = _pages.ActivePage().Cursor().IsVisible() ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_pages.ActivePage().Cursor().IsVisible()); break; case DispatchTypes::ModeParams::XTERM_EnableDECCOLMSupport: // DECCOLM is not supported in conpty mode if (!_api.IsConsolePty()) { - state = _modes.test(Mode::AllowDECCOLM) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_modes.test(Mode::AllowDECCOLM)); } break; case DispatchTypes::ModeParams::DECPCCM_PageCursorCouplingMode: - state = _modes.test(Mode::PageCursorCoupling) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_modes.test(Mode::PageCursorCoupling)); break; case DispatchTypes::ModeParams::DECNKM_NumericKeypadMode: - state = _terminalInput.GetInputMode(TerminalInput::Mode::Keypad) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::Keypad)); break; case DispatchTypes::ModeParams::DECBKM_BackarrowKeyMode: - state = _terminalInput.GetInputMode(TerminalInput::Mode::BackarrowKey) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::BackarrowKey)); break; case DispatchTypes::ModeParams::DECLRMM_LeftRightMarginMode: - state = _modes.test(Mode::AllowDECSLRM) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_modes.test(Mode::AllowDECSLRM)); break; case DispatchTypes::ModeParams::DECECM_EraseColorMode: - state = _modes.test(Mode::EraseColor) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_modes.test(Mode::EraseColor)); break; case DispatchTypes::ModeParams::VT200_MOUSE_MODE: - state = _terminalInput.GetInputMode(TerminalInput::Mode::DefaultMouseTracking) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::DefaultMouseTracking)); break; case DispatchTypes::ModeParams::BUTTON_EVENT_MOUSE_MODE: - state = _terminalInput.GetInputMode(TerminalInput::Mode::ButtonEventMouseTracking) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::ButtonEventMouseTracking)); break; case DispatchTypes::ModeParams::ANY_EVENT_MOUSE_MODE: - state = _terminalInput.GetInputMode(TerminalInput::Mode::AnyEventMouseTracking) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::AnyEventMouseTracking)); break; case DispatchTypes::ModeParams::UTF8_EXTENDED_MODE: - state = _terminalInput.GetInputMode(TerminalInput::Mode::Utf8MouseEncoding) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::Utf8MouseEncoding)); break; case DispatchTypes::ModeParams::SGR_EXTENDED_MODE: - state = _terminalInput.GetInputMode(TerminalInput::Mode::SgrMouseEncoding) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::SgrMouseEncoding)); break; case DispatchTypes::ModeParams::FOCUS_EVENT_MODE: - state = _terminalInput.GetInputMode(TerminalInput::Mode::FocusEvent) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::FocusEvent)); break; case DispatchTypes::ModeParams::ALTERNATE_SCROLL: - state = _terminalInput.GetInputMode(TerminalInput::Mode::AlternateScroll) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::AlternateScroll)); break; case DispatchTypes::ModeParams::ASB_AlternateScreenBuffer: - state = _usingAltBuffer ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_usingAltBuffer); break; case DispatchTypes::ModeParams::XTERM_BracketedPasteMode: - state = _api.GetSystemMode(ITerminalApi::Mode::BracketedPaste) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_api.GetSystemMode(ITerminalApi::Mode::BracketedPaste)); break; case DispatchTypes::ModeParams::GCM_GraphemeClusterMode: - state = CodepointWidthDetector::Singleton().GetMode() == TextMeasurementMode::Graphemes ? DispatchTypes::DECRPM_PermanentlyEnabled : DispatchTypes::DECRPM_PermanentlyDisabled; + state = mapPerm(CodepointWidthDetector::Singleton().GetMode() == TextMeasurementMode::Graphemes); break; case DispatchTypes::ModeParams::W32IM_Win32InputMode: - state = _terminalInput.GetInputMode(TerminalInput::Mode::Win32) ? DispatchTypes::DECRPM_Enabled : DispatchTypes::DECRPM_Disabled; - ; + state = mapTemp(_terminalInput.GetInputMode(TerminalInput::Mode::Win32)); break; default: break; diff --git a/src/tools/GraphemeTableGen/Program.cs b/src/tools/GraphemeTableGen/Program.cs index da4fd42525c..780e41cb52c 100644 --- a/src/tools/GraphemeTableGen/Program.cs +++ b/src/tools/GraphemeTableGen/Program.cs @@ -4,10 +4,6 @@ using System.Xml.Linq; using TrieType = uint; -// UAX #29 uses "A ÷ B" to indicate that there's a potential break opportunity between A and B. -// But ÷ is not a valid identifier in Go, so we use Ω which is. -const byte Ω = 0b11; - // JoinRules doesn't quite follow UAX #29, as it states: // > Note: Testing two adjacent characters is insufficient for determining a boundary. // @@ -47,43 +43,43 @@ byte[][][] joinRules = [ // Base table - [ - /* | leading -> trailing codepoint */ - /* v | cbOther | cbControl | cbExtend | cbRI | cbPrepend | cbHangulL | cbHangulV | cbHangulT | cbHangulLV | cbHangulLVT | cbInCBLinker | cbInCBConsonant | cbExtPic | cbZWJ | */ - /* cbOther | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbControl | */ [Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */], - /* cbExtend | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbRI | */ [Ω /* | */, Ω /* | */, 0 /* | */, 1 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbPrepend | */ [0 /* | */, Ω /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */], - /* cbHangulL | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, 0 /* | */, Ω /* | */, 0 /* | */, 0 /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbHangulV | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbHangulT | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbHangulLV | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbHangulLVT | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbInCBLinker | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, 0 /* | */, Ω /* | */, 0 /* | */], - /* cbInCBConsonant | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbExtPic | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbZWJ | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, 0 /* | */, 0 /* | */], + [ + /* | leading -> trailing codepoint */ + /* v | cbOther | cbControl | cbExtend | cbRI | cbPrepend | cbHangulL | cbHangulV | cbHangulT | cbHangulLV | cbHangulLVT | cbInCBLinker | cbInCBConsonant | cbExtPic | cbZWJ | */ + /* cbOther | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbControl | */ [3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */], + /* cbExtend | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbRI | */ [3 /* | */, 3 /* | */, 0 /* | */, 1 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbPrepend | */ [0 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */], + /* cbHangulL | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbHangulV | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbHangulT | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbHangulLV | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbHangulLVT | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbInCBLinker | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 0 /* | */], + /* cbInCBConsonant | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbExtPic | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbZWJ | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 0 /* | */, 0 /* | */], ], - // Once we have encountered a Regional Indicator pair we'll enter this table. - // It's a copy of the base table, but further Regional Indicator joins are forbidden. - [ - /* | leading -> trailing codepoint */ - /* v | cbOther | cbControl | cbExtend | cbRI | cbPrepend | cbHangulL | cbHangulV | cbHangulT | cbHangulLV | cbHangulLVT | cbInCBLinker | cbInCBConsonant | cbExtPic | cbZWJ | */ - /* cbOther | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbControl | */ [Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */], - /* cbExtend | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbRI | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbPrepend | */ [0 /* | */, Ω /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */], - /* cbHangulL | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, 0 /* | */, Ω /* | */, 0 /* | */, 0 /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbHangulV | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbHangulT | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbHangulLV | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbHangulLVT | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbInCBLinker | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, 0 /* | */, Ω /* | */, 0 /* | */], - /* cbInCBConsonant | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbExtPic | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, 0 /* | */], - /* cbZWJ | */ [Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, Ω /* | */, 0 /* | */, Ω /* | */, 0 /* | */, 0 /* | */], + // Once we have encountered a Regional Indicator pair we'll enter this table. + // It's a copy of the base table, but further Regional Indicator joins are forbidden. + [ + /* | leading -> trailing codepoint */ + /* v | cbOther | cbControl | cbExtend | cbRI | cbPrepend | cbHangulL | cbHangulV | cbHangulT | cbHangulLV | cbHangulLVT | cbInCBLinker | cbInCBConsonant | cbExtPic | cbZWJ | */ + /* cbOther | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbControl | */ [3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */], + /* cbExtend | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbRI | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbPrepend | */ [0 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */], + /* cbHangulL | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbHangulV | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbHangulT | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbHangulLV | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbHangulLVT | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbInCBLinker | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 0 /* | */, 3 /* | */, 0 /* | */], + /* cbInCBConsonant | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbExtPic | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 0 /* | */], + /* cbZWJ | */ [3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 3 /* | */, 0 /* | */, 3 /* | */, 0 /* | */, 0 /* | */], ] ]; @@ -188,11 +184,11 @@ buf.Append("{\n"); buf.Append(" const auto l = lead & 15;\n"); buf.Append(" const auto t = trail & 15;\n"); -buf.Append($" return (s_joinRules[state][l] >> (t * {BitOperations.PopCount(Ω)})) & {Ω};\n"); +buf.Append($" return (s_joinRules[state][l] >> (t * 2)) & 3;\n"); buf.Append("}\n"); buf.Append("constexpr bool ucdGraphemeDone(const int state) noexcept\n"); buf.Append("{\n"); -buf.Append($" return state == {Ω};\n"); +buf.Append($" return state == 3;\n"); buf.Append("}\n"); buf.Append("constexpr int ucdToCharacterWidth(const int val) noexcept\n"); buf.Append("{\n"); @@ -351,7 +347,7 @@ static uint[][] PrepareRulesTable(byte[][][] rules) uint nextIndices = 0; foreach (var (nextIndex, trail) in row.Select((v, i) => (v, i))) { - if (nextIndex > Ω) + if (nextIndex > 3) { throw new Exception("Can't pack table index into 2 bits"); } From 623802af06de0fe4f9d991bb148babf6ffabafd2 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 25 Jun 2024 02:49:59 +0200 Subject: [PATCH 13/14] Comments + Some mild perf tuning --- src/types/CodepointWidthDetector.cpp | 428 ++++++++++++++--------- src/types/inc/CodepointWidthDetector.hpp | 10 +- 2 files changed, 274 insertions(+), 164 deletions(-) diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index 933f27683e1..3b392dd278a 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -605,6 +605,8 @@ constexpr int ucdToCharacterWidth(const int val) noexcept } // clang-format on +// Decodes the next codepoint from the given UTF-16 string. +// Returns the start of the next codepoint. Assumes `it < end`. [[msvc::forceinline]] constexpr const wchar_t* utf16NextOrFFFD(const wchar_t* it, const wchar_t* end, char32_t& out) { __assume(it != nullptr); @@ -635,6 +637,8 @@ constexpr int ucdToCharacterWidth(const int val) noexcept return it; } +// Decodes the preceding codepoint from the given UTF-16 string. +// Returns the start of the preceding codepoint. Assumes `it > beg`. [[msvc::forceinline]] constexpr const wchar_t* utf16PrevOrFFFD(const wchar_t* it, const wchar_t* beg, char32_t& out) { __assume(it != nullptr); @@ -665,6 +669,22 @@ constexpr int ucdToCharacterWidth(const int val) noexcept return it; } +// Returns `reset` if `ptr` is outside the range [beg, end). Otherwise, it returns `ptr` unmodified. +constexpr const wchar_t* resetIfOutOfRange(const wchar_t* beg, const wchar_t* end, const wchar_t* reset, const wchar_t* ptr) +{ + auto ret = ptr; + // This uses individual if-assignments to get the compiler to emit conditional moves. + if (ptr < beg) + { + ret = reset; + } + if (ptr > end) + { + ret = reset; + } + return ret; +} + static CodepointWidthDetector s_codepointWidthDetector; CodepointWidthDetector& CodepointWidthDetector::Singleton() noexcept @@ -674,206 +694,249 @@ CodepointWidthDetector& CodepointWidthDetector::Singleton() noexcept bool CodepointWidthDetector::GraphemeNext(GraphemeState& s, const std::wstring_view& str) noexcept { - const auto beg = str.data(); - const auto end = beg + str.size(); - auto clusterBeg = s.beg + s.len; - - // If it's a new string argument, we'll restart at the new string's beginning. - if (clusterBeg < beg || clusterBeg > end) + if (_mode == TextMeasurementMode::Graphemes) { - clusterBeg = beg; + return _graphemeNext(s, str); } - - if (clusterBeg >= end) + if (_mode == TextMeasurementMode::Wcswidth) { - return false; + return _graphemeNextWcswidth(s, str); } + return _graphemeNextConsole(s, str); +} - if (_mode != TextMeasurementMode::Graphemes) +bool CodepointWidthDetector::GraphemePrev(GraphemeState& s, const std::wstring_view& str) noexcept +{ + if (_mode == TextMeasurementMode::Graphemes) + { + return _graphemePrev(s, str); + } + if (_mode == TextMeasurementMode::Wcswidth) { - return _graphemeNextWcswidth(s, end, clusterBeg); + return _graphemePrevWcswidth(s, str); } + return _graphemePrevConsole(s, str); +} + +// Parses the next grapheme cluster from the given string. The algorithm largely follows "UAX #29: Unicode Text Segmentation", +// but takes some mild liberties. Returns false if the end of the string was reached. Updates `s` with the cluster. +bool CodepointWidthDetector::_graphemeNext(GraphemeState& s, const std::wstring_view& str) const noexcept +{ + const auto beg = str.data(); + const auto end = beg + str.size(); + auto clusterBeg = s.beg + s.len; + auto width = s.width; + auto state = s._state; + auto lead = s._last; + + // If it's a new string argument, we'll restart at the new string's beginning. + clusterBeg = resetIfOutOfRange(beg, end, beg, clusterBeg); auto clusterEnd = clusterBeg; - int state = s._state; - int width = 0; - int lead; - char32_t cp; - - // The _state is stored ~flipped, so that we can differentiate - // between it being unset (0) and it being set to 0 (~0 = 255). - if (state) + + // Skip if we're already at the end. + if (clusterEnd < end) { + char32_t cp; + + // If a previous parsing of a grapheme cluster got interrupted because we reached the end of the string, + // we'll have stored the parser state in `s._state` so that we can continue parsing within the new string. + // The problem is that a `state` of zero is also a valid state parameter for `ucdGraphemeJoins`. + // Thus, we're storing `s._state` bit-flipped so that we can differentiate between it being unset (0) and + // storing a previous state of 0 (0xffff...). + const auto gotState = state != 0; state = ~state; - lead = s._last; - width = s.width; - goto fetchNext; - } + if (gotState) + { + goto fetchNext; + } - clusterEnd = utf16NextOrFFFD(clusterEnd, end, cp); - lead = ucdLookup(cp); + clusterEnd = utf16NextOrFFFD(clusterEnd, end, cp); + lead = ucdLookup(cp); + width = 0; + state = 0; - for (;;) - { + for (;;) { - auto w = ucdToCharacterWidth(lead); - if (w == 3) { - w = _ambiguousWidth; + auto w = ucdToCharacterWidth(lead); + if (w == 3) + { + w = _ambiguousWidth; + } + + // U+FE0F Variation Selector-16 is used to turn unqualified Emojis into qualified ones. + // By convention, this turns them from being ambiguous width (= narrow) into wide ones. + // We achieve this here by explicitly giving this codepoint a wide width. + // Later down below we'll clamp width back to <= 2. + if (cp == 0xFE0F) + { + w = 2; + } + + width += w; } - // U+FE0F Variation Selector-16 is used to turn unqualified Emojis into qualified ones. - // By convention, this turns them from being ambiguous width (= narrow) into wide ones. - // We achieve this here by explicitly giving this codepoint a wide width. - // Later down below we'll clamp width back to <= 2. - if (cp == 0xFE0F) + // If we're at the end of the string, we'll break out of the loop, but leave + // `state` and `lead` as-is, so that we can continue parsing in the next string. + if (clusterEnd >= end) { - w = 2; + break; } - width += w; - } + fetchNext: + const auto clusterEndNext = utf16NextOrFFFD(clusterEnd, end, cp); + const auto trail = ucdLookup(cp); - if (clusterEnd >= end) - { - break; - } - - fetchNext: - const auto clusterEndNext = utf16NextOrFFFD(clusterEnd, end, cp); - const auto trail = ucdLookup(cp); - state = ucdGraphemeJoins(state, lead, trail); + state = ucdGraphemeJoins(state, lead, trail); + if (ucdGraphemeDone(state)) + { + // We'll later do `state = ~state` which will result in `state == 0`. + state = ~0; + lead = 0; + break; + } - if (ucdGraphemeDone(state)) - { - // We'll later do a `state = ~state` which will result in `state == 0`. - state = ~0; - lead = 0; - break; + clusterEnd = clusterEndNext; + lead = trail; } - clusterEnd = clusterEndNext; - lead = trail; - } + state = ~state; + width = width > 2 ? 2 : width; - state = ~state; - width = width > 2 ? 2 : width; + s.beg = clusterBeg; + s.len = static_cast(clusterEnd - clusterBeg); + s.width = width; + s._state = state; + s._last = lead; + } - s.beg = clusterBeg; - s.len = static_cast(clusterEnd - clusterBeg); - s.width = width; - s._state = state; - s._last = lead; return clusterEnd < end; } -// This code is identical to GraphemeNext() but with the order of operations reversed since we're iterating backwards. -bool CodepointWidthDetector::GraphemePrev(GraphemeState& s, const std::wstring_view& str) noexcept +// Parses the preceding grapheme cluster from the given string. The algorithm largely follows "UAX #29: Unicode Text Segmentation", +// but takes some mild liberties. Returns false if the end of the string was reached. Updates `s` with the cluster. +// This code is identical to _graphemeNext() but with the order of operations reversed since we're iterating backwards. +bool CodepointWidthDetector::_graphemePrev(GraphemeState& s, const std::wstring_view& str) const noexcept { const auto beg = str.data(); const auto end = beg + str.size(); auto clusterEnd = s.beg; + auto width = s.width; + auto state = s._state; + auto trail = s._last; - // If it's a new string argument, we'll restart at the new string's end. - if (clusterEnd < beg || clusterEnd > end) - { - clusterEnd = end; - } + // If it's a new string argument, we'll restart at the new string's beginning. + clusterEnd = resetIfOutOfRange(beg, end, end, clusterEnd); - if (clusterEnd <= beg) - { - return false; - } + auto clusterBeg = clusterEnd; - if (_mode != TextMeasurementMode::Graphemes) + // Skip if we're already at the end. + if (clusterEnd > beg) { - return _graphemePrevWcswidth(s, beg, clusterEnd); - } + char32_t cp; - auto clusterBeg = clusterEnd; - int state = s._state; - int width = 0; - int trail; - char32_t cp; - - // The _state is stored ~flipped, so that we can differentiate - // between it being unset (0) and it being set to 0 (~0 = 255). - if (state) - { + // If a previous parsing of a grapheme cluster got interrupted because we reached the end of the string, + // we'll have stored the parser state in `s._state` so that we can continue parsing within the new string. + // The problem is that a `state` of zero is also a valid state parameter for `ucdGraphemeJoins`. + // Thus, we're storing `s._state` bit-flipped so that we can differentiate between it being unset (0) and + // storing a previous state of 0 (0xffff...). + const auto gotState = state != 0; state = ~state; - trail = s._last; - width = s.width; - goto fetchNext; - } + if (gotState) + { + goto fetchNext; + } - clusterBeg = utf16PrevOrFFFD(clusterBeg, beg, cp); - trail = ucdLookup(cp); + clusterBeg = utf16PrevOrFFFD(clusterBeg, beg, cp); + trail = ucdLookup(cp); + width = 0; + state = 0; - for (;;) - { + for (;;) { - auto w = ucdToCharacterWidth(trail); - if (w == 3) { - w = _ambiguousWidth; + auto w = ucdToCharacterWidth(trail); + if (w == 3) + { + w = _ambiguousWidth; + } + + // U+FE0F Variation Selector-16 is used to turn unqualified Emojis into qualified ones. + // By convention, this turns them from being ambiguous width (= narrow) into wide ones. + // We achieve this here by explicitly giving this codepoint a wide width. + // Later down below we'll clamp width back to <= 2. + if (cp == 0xFE0F) + { + w = 2; + } + + width += w; } - // U+FE0F Variation Selector-16 is used to turn unqualified Emojis into qualified ones. - // By convention, this turns them from being ambiguous width (= narrow) into wide ones. - // We achieve this here by explicitly giving this codepoint a wide width. - // Later down below we'll clamp width back to <= 2. - if (cp == 0xFE0F) + // If we're at the end of the string, we'll break out of the loop, but leave + // `state` and `lead` as-is, so that we can continue parsing in the next string. + if (clusterBeg <= beg) { - w = 2; + break; } - width += w; - } - - if (clusterBeg <= beg) - { - break; - } + fetchNext: + const auto clusterBegNext = utf16PrevOrFFFD(clusterBeg, beg, cp); + const auto lead = ucdLookup(cp); - fetchNext: - const auto clusterBegNext = utf16PrevOrFFFD(clusterBeg, beg, cp); - const auto lead = ucdLookup(cp); - state = ucdGraphemeJoins(state, lead, trail); + state = ucdGraphemeJoins(state, lead, trail); + if (ucdGraphemeDone(state)) + { + // We'll later do `state = ~state` which will result in `state == 0`. + state = ~0; + trail = 0; + break; + } - if (ucdGraphemeDone(state)) - { - // We'll later do a `state = ~state` which will result in `state == 0`. - state = ~0; - trail = 0; - break; + clusterBeg = clusterBegNext; + trail = lead; } - clusterBeg = clusterBegNext; - trail = lead; - } + state = ~state; + width = width > 2 ? 2 : width; - state = ~state; - width = width > 2 ? 2 : width; + s.beg = clusterBeg; + s.len = static_cast(clusterEnd - clusterBeg); + s.width = width; + s._state = state; + s._last = trail; + } - s.beg = clusterBeg; - s.len = static_cast(clusterEnd - clusterBeg); - s.width = width; - s._state = state; - s._last = trail; return clusterBeg > beg; } -__declspec(noinline) bool CodepointWidthDetector::_graphemeNextWcswidth(GraphemeState& s, const wchar_t* end, const wchar_t* clusterBeg) noexcept +// Implements a clustering algorithm that behaves similar to terminals and applications based on `wcswidth`. +// Such terminals have no actual notion of graphemes or joining characters, but do know zero-width characters. +// During cursor navigation they'll skip over such zero-width characters to reach the target column. +// In effect this means, that a non-zero-width character gets clustered with any number of following zero-width characters. +bool CodepointWidthDetector::_graphemeNextWcswidth(GraphemeState& s, const std::wstring_view& str) const noexcept { - if (_mode != TextMeasurementMode::Wcswidth) + const auto beg = str.data(); + const auto end = beg + str.size(); + auto clusterBeg = s.beg + s.len; + auto width = s.width; + auto state = s._state; + + // If it's a new string argument, we'll restart at the new string's beginning. + clusterBeg = resetIfOutOfRange(beg, end, beg, clusterBeg); + + if (clusterBeg >= end) { - return _graphemeNextConsole(s, end, clusterBeg); + return false; } auto clusterEnd = clusterBeg; - auto width = s.width; - auto state = s._state; + // Normally we could just append any zero-width characters to the current cluster, + // but theoretically we could have a zero-width character itself as the lead character. + // Because of that we don't use `s.width` to track the state but rather flag + // whether we've encountered our "lead" character in `s._state` (1 if we had one). if (state == 0) { width = 0; @@ -914,19 +977,32 @@ __declspec(noinline) bool CodepointWidthDetector::_graphemeNextWcswidth(Grapheme return clusterEnd < end; } -__declspec(noinline) bool CodepointWidthDetector::_graphemePrevWcswidth(GraphemeState& s, const wchar_t* beg, const wchar_t* clusterEnd) noexcept +// Implements a clustering algorithm that behaves similar to terminals and applications based on `wcswidth`. +// Such terminals have no actual notion of graphemes or joining characters, but do know zero-width characters. +// During cursor navigation they'll skip over such zero-width characters to reach the target column. +// In effect this means, that a non-zero-width character gets clustered with any number of following zero-width characters. +bool CodepointWidthDetector::_graphemePrevWcswidth(GraphemeState& s, const std::wstring_view& str) const noexcept { - if (_mode != TextMeasurementMode::Wcswidth) + const auto beg = str.data(); + const auto end = beg + str.size(); + auto clusterEnd = s.beg; + + // If it's a new string argument, we'll restart at the new string's beginning. + clusterEnd = resetIfOutOfRange(beg, end, end, clusterEnd); + + if (clusterEnd <= beg) { - return _graphemePrevConsole(s, beg, clusterEnd); + return false; } auto clusterBeg = clusterEnd; auto width = s.width; - int state = 0; + int delayedCompletion = 0; - // Insert a `.len = 0` result if we previously returned false but had a complete cluster (`.width > 0`). - // This will indicate to the caller that the previous cluster was complete, just like how the grapheme algorithm works. + // In order to conform to the behavior of _graphemePrev(), we need to pretend as if we don't know + // whether the cluster is complete yet (with graphemes there may be prepended concatenation marks). + // As such, we flag `delayedCompletion` to true which gets stored as `s._state = 1` and return false. + // Then, when we get called again with the next input string, we'll finally return false with a `s.len` of 0. if (s._state == 0) { width = 0; @@ -950,7 +1026,7 @@ __declspec(noinline) bool CodepointWidthDetector::_graphemePrevWcswidth(Grapheme if (hasWidth || atEnd) { - state = hasWidth && atEnd; + delayedCompletion = hasWidth && atEnd; break; } } @@ -959,18 +1035,34 @@ __declspec(noinline) bool CodepointWidthDetector::_graphemePrevWcswidth(Grapheme s.beg = clusterBeg; s.len = static_cast(clusterEnd - clusterBeg); s.width = width; - s._state = state; + s._state = delayedCompletion; return clusterBeg > beg; } -bool CodepointWidthDetector::_graphemeNextConsole(GraphemeState& s, const wchar_t* end, const wchar_t* clusterBeg) noexcept +// Implements a clustering algorithm that behaves similar to the old conhost. +// It even asks the text renderer how wide ambiguous width characters are instead of defaulting to 1 (or 2). +bool CodepointWidthDetector::_graphemeNextConsole(GraphemeState& s, const std::wstring_view& str) noexcept { + const auto beg = str.data(); + const auto end = beg + str.size(); + auto clusterBeg = s.beg + s.len; + + // If it's a new string argument, we'll restart at the new string's beginning. + clusterBeg = resetIfOutOfRange(beg, end, beg, clusterBeg); + + if (clusterBeg >= end) + { + return false; + } + auto clusterEnd = clusterBeg; auto width = s.width; - int state = 0; + int delayedCompletion = 0; - // Insert a `.len = 0` result if we previously returned false but had a complete cluster (`.width > 0`). - // This will indicate to the caller that the previous cluster was complete, just like how the grapheme algorithm works. + // In order to conform to the behavior of _graphemeNext(), we need to pretend as if we don't know + // whether the cluster is complete yet (with graphemes there may be nonspacing marks, etc.). + // As such, we flag `delayedCompletion` to true which gets stored as `s._state = 1` and return false. + // Then, when we get called again with the next input string, we'll finally return false with a `s.len` of 0. if (s._state == 0) { char32_t cp; @@ -983,24 +1075,40 @@ bool CodepointWidthDetector::_graphemeNextConsole(GraphemeState& s, const wchar_ width = _checkFallbackViaCache(cp); } - state = clusterEnd >= end; + delayedCompletion = clusterEnd >= end; } s.beg = clusterBeg; s.len = static_cast(clusterEnd - clusterBeg); s.width = width; - s._state = state; - return state == 0; + s._state = delayedCompletion; + return delayedCompletion == 0; } -bool CodepointWidthDetector::_graphemePrevConsole(GraphemeState& s, const wchar_t* beg, const wchar_t* clusterEnd) noexcept +// Implements a clustering algorithm that behaves similar to the old conhost. +// It even asks the text renderer how wide ambiguous width characters are instead of defaulting to 1 (or 2). +bool CodepointWidthDetector::_graphemePrevConsole(GraphemeState& s, const std::wstring_view& str) noexcept { + const auto beg = str.data(); + const auto end = beg + str.size(); + auto clusterEnd = s.beg; + + // If it's a new string argument, we'll restart at the new string's beginning. + clusterEnd = resetIfOutOfRange(beg, end, end, clusterEnd); + + if (clusterEnd <= beg) + { + return false; + } + auto clusterBeg = clusterEnd; auto width = s.width; - int state = 0; + int delayedCompletion = 0; - // Insert a `.len = 0` result if we previously returned false but had a complete cluster (`.width > 0`). - // This will indicate to the caller that the previous cluster was complete, just like how the grapheme algorithm works. + // In order to conform to the behavior of _graphemePrev(), we need to pretend as if we don't know + // whether the cluster is complete yet (with graphemes there may be prepended concatenation marks). + // As such, we flag `delayedCompletion` to true which gets stored as `s._state = 1` and return false. + // Then, when we get called again with the next input string, we'll finally return false with a `s.len` of 0. if (s._state == 0) { char32_t cp; @@ -1013,19 +1121,19 @@ bool CodepointWidthDetector::_graphemePrevConsole(GraphemeState& s, const wchar_ width = _checkFallbackViaCache(cp); } - state = clusterBeg <= beg; + delayedCompletion = clusterBeg <= beg; } s.beg = clusterBeg; s.len = static_cast(clusterEnd - clusterBeg); s.width = width; - s._state = state; - return state == 0; + s._state = delayedCompletion; + return delayedCompletion == 0; } // Call the function specified via SetFallbackMethod() to turn ambiguous (width = 3) into narrow/wide. // Caches the results in _fallbackCache. -__declspec(noinline) int CodepointWidthDetector::_checkFallbackViaCache(const char32_t codepoint) noexcept +int CodepointWidthDetector::_checkFallbackViaCache(const char32_t codepoint) noexcept try { // Ambiguous glyphs are considered narrow by default. See microsoft/terminal#2066 for more info. diff --git a/src/types/inc/CodepointWidthDetector.hpp b/src/types/inc/CodepointWidthDetector.hpp index b63d805e50b..7429d6a96c5 100644 --- a/src/types/inc/CodepointWidthDetector.hpp +++ b/src/types/inc/CodepointWidthDetector.hpp @@ -60,10 +60,12 @@ struct CodepointWidthDetector void Reset(TextMeasurementMode mode) noexcept; private: - __declspec(noinline) bool _graphemeNextWcswidth(GraphemeState& s, const wchar_t* end, const wchar_t* clusterBeg) noexcept; - __declspec(noinline) bool _graphemePrevWcswidth(GraphemeState& s, const wchar_t* beg, const wchar_t* clusterEnd) noexcept; - __declspec(noinline) bool _graphemeNextConsole(GraphemeState& s, const wchar_t* end, const wchar_t* clusterBeg) noexcept; - __declspec(noinline) bool _graphemePrevConsole(GraphemeState& s, const wchar_t* beg, const wchar_t* clusterEnd) noexcept; + bool _graphemeNext(GraphemeState& s, const std::wstring_view& str) const noexcept; + bool _graphemePrev(GraphemeState& s, const std::wstring_view& str) const noexcept; + bool _graphemeNextWcswidth(GraphemeState& s, const std::wstring_view& str) const noexcept; + bool _graphemePrevWcswidth(GraphemeState& s, const std::wstring_view& str) const noexcept; + bool _graphemeNextConsole(GraphemeState& s, const std::wstring_view& str) noexcept; + bool _graphemePrevConsole(GraphemeState& s, const std::wstring_view& str) noexcept; __declspec(noinline) int _checkFallbackViaCache(char32_t codepoint) noexcept; std::unordered_map _fallbackCache; From 8cfe171fb06580e62f66f0438d29aeca9bc84d4e Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 25 Jun 2024 14:07:29 +0200 Subject: [PATCH 14/14] The spellcheck happifier --- src/types/CodepointWidthDetector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index 3b392dd278a..c7accf328b9 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -1060,7 +1060,7 @@ bool CodepointWidthDetector::_graphemeNextConsole(GraphemeState& s, const std::w int delayedCompletion = 0; // In order to conform to the behavior of _graphemeNext(), we need to pretend as if we don't know - // whether the cluster is complete yet (with graphemes there may be nonspacing marks, etc.). + // whether the cluster is complete yet (with graphemes there may be combining marks, etc.). // As such, we flag `delayedCompletion` to true which gets stored as `s._state = 1` and return false. // Then, when we get called again with the next input string, we'll finally return false with a `s.len` of 0. if (s._state == 0)