diff --git a/.github/actions/spell-check/dictionary/dictionary.txt b/.github/actions/spell-check/dictionary/dictionary.txt index ca528f2f7df..9bb4f2b810c 100644 --- a/.github/actions/spell-check/dictionary/dictionary.txt +++ b/.github/actions/spell-check/dictionary/dictionary.txt @@ -97786,6 +97786,7 @@ dalesman dalesmen dalespeople daleswoman +dalet daleth daleths Daleville @@ -107561,6 +107562,7 @@ dialystelic dialystely dialytic dialytically +dialytika dialyzability dialyzable dialyzate @@ -114004,6 +114006,7 @@ djalmaite Djambi djasakid djave +dje djebel djebels djehad @@ -120418,11 +120421,13 @@ DZ dz dz. Dzaudzhikau +dze dzeren dzerin dzeron Dzerzhinsk Dzhambul +dzhe Dzhugashvili dziggetai dzo @@ -158966,6 +158971,7 @@ Ghaznevid Ghazzah Ghazzali ghbor +ghe Gheber gheber ghebeta @@ -160166,6 +160172,7 @@ gizzards gizzen gizzened gizzern +gje gjedost Gjellerup gjetost @@ -212347,6 +212354,7 @@ Kizilbash Kizzee Kizzie kJ +kje Kjeldahl kjeldahlization kjeldahlize @@ -224856,6 +224864,7 @@ lizzie Lizzy LJ LJBF +lje Ljod Ljoka Ljubljana @@ -261607,6 +261616,7 @@ N.J. NJ nj njave +nje Njord Njorth NKGB @@ -274785,6 +274795,7 @@ Ogma ogmic Ogmios OGO +ogonek ogonium Ogor O'Gowan @@ -329834,6 +329845,7 @@ QN qn QNP QNS +qof Qoheleth Qom qoph @@ -371408,6 +371420,7 @@ Shaysite shazam Shazar SHCD +shcha Shcheglovsk Shcherbakov she @@ -420973,6 +420986,7 @@ tonometry Tonopah tonophant tonoplast +tonos tonoscope tonotactic tonotaxis @@ -428676,6 +428690,7 @@ tsetses TSF TSgt TSH +tshe Tshi tshi Tshiluba @@ -477068,6 +477083,7 @@ Yermo yern yertchuk yerth +yeru yerva Yerwa-Maiduguri Yerxa @@ -478235,6 +478251,7 @@ Z-bar ZBB ZBR ZD +ze Zea zea zeal @@ -478604,6 +478621,7 @@ ZG ZGS Zhang Zhdanov +zhe Zhitomir Zhivkov Zhmud diff --git a/.github/actions/spell-check/expect/expect.txt b/.github/actions/spell-check/expect/expect.txt index ed907ce3095..886af6b9cbe 100644 --- a/.github/actions/spell-check/expect/expect.txt +++ b/.github/actions/spell-check/expect/expect.txt @@ -906,6 +906,7 @@ grep Greyscale gridline groupbox +gset gsl GTP guc @@ -1530,6 +1531,7 @@ NOYIELD NOZORDER NPM npos +NRCS NSTATUS ntapi ntcon @@ -2030,6 +2032,7 @@ SCROLLSCALE SCROLLSCREENBUFFER Scrollup Scrolluppage +SCS scursor sddl sdeleted @@ -2444,6 +2447,7 @@ untimes UPDATEDISPLAY UPDOWN UPKEY +UPSS upvote uri url diff --git a/.github/actions/spell-check/patterns/patterns.txt b/.github/actions/spell-check/patterns/patterns.txt index cddd540bffe..3833627a51d 100644 --- a/.github/actions/spell-check/patterns/patterns.txt +++ b/.github/actions/spell-check/patterns/patterns.txt @@ -1,11 +1,13 @@ https://(?:(?:[-a-zA-Z0-9?&=]*\.|)microsoft\.com)/[-a-zA-Z0-9?&=_#\/.]* https://aka\.ms/[-a-zA-Z0-9?&=\/_]* +https://www\.itscj\.ipsj\.or\.jp/iso-ir/[-0-9]+\.pdf +https://www\.vt100\.net/docs/[-a-zA-Z0-9#_\/.]* https://www.w3.org/[-a-zA-Z0-9?&=\/_#]* https://(?:(?:www\.|)youtube\.com|youtu.be)/[-a-zA-Z0-9?&=]* https://[a-z-]+\.githubusercontent\.com/[-a-zA-Z0-9?&=_\/.]* [Pp]ublicKeyToken="?[0-9a-fA-F]{16}"? (?:[{"]|UniqueIdentifier>)[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}(?:[}"]| charset) = 0; // SCS + virtual bool Designate96Charset(const size_t gsetNumber, const std::pair charset) = 0; // SCS + virtual bool LockingShift(const size_t gsetNumber) = 0; // LS0, LS1, LS2, LS3 + virtual bool LockingShiftRight(const size_t gsetNumber) = 0; // LS1R, LS2R, LS3R + virtual bool SingleShift(const size_t gsetNumber) = 0; // SS2, SS3 virtual bool SoftReset() = 0; // DECSTR virtual bool HardReset() = 0; // RIS diff --git a/src/terminal/adapter/adaptDispatch.cpp b/src/terminal/adapter/adaptDispatch.cpp index 336494a0374..babdbab708e 100644 --- a/src/terminal/adapter/adaptDispatch.cpp +++ b/src/terminal/adapter/adaptDispatch.cpp @@ -8,6 +8,7 @@ #include "../../types/inc/Viewport.hpp" #include "../../types/inc/utils.hpp" #include "../../inc/unicode.hpp" +#include "../parser/ascii.hpp" using namespace Microsoft::Console::Types; using namespace Microsoft::Console::VirtualTerminal; @@ -46,7 +47,15 @@ AdaptDispatch::AdaptDispatch(std::unique_ptr pConApi, // - void AdaptDispatch::Print(const wchar_t wchPrintable) { - _pDefaults->Print(_termOutput.TranslateKey(wchPrintable)); + const auto wchTranslated = _termOutput.TranslateKey(wchPrintable); + // By default the DEL character is meant to be ignored in the same way as a + // NUL character. However, it's possible that it could be translated to a + // printable character in a 96-character set. This condition makes sure that + // a character is only output if the DEL is translated to something else. + if (wchTranslated != AsciiChars::DEL) + { + _pDefaults->Print(wchTranslated); + } } // Routine Description @@ -335,6 +344,7 @@ bool AdaptDispatch::CursorSaveState() savedCursorState.IsOriginModeRelative = _isOriginModeRelative; savedCursorState.Attributes = attributes; savedCursorState.TermOutput = _termOutput; + _pConApi->GetConsoleOutputCP(savedCursorState.CodePage); } return success; @@ -376,6 +386,12 @@ bool AdaptDispatch::CursorRestoreState() // Restore designated character set. _termOutput = savedCursorState.TermOutput; + // Restore the code page if it was previously saved. + if (savedCursorState.CodePage != 0) + { + success = _pConApi->SetConsoleOutputCP(savedCursorState.CodePage); + } + return success; } @@ -1590,18 +1606,107 @@ void AdaptDispatch::_InitTabStopsForWidth(const size_t width) } //Routine Description: -// Designate Charset - Sets the active charset to be the one mapped to wch. -// See DispatchTypes::VTCharacterSets for a list of supported charsets. -// Also http://invisible-island.net/xterm/ctlseqs/ctlseqs.html#h2-Controls-beginning-with-ESC -// For a list of all charsets and their codes. +// DOCS - Selects the coding system through which character sets are activated. +// When ISO2022 is selected, the code page is set to ISO-8859-1, and both +// GL and GR areas of the code table can be remapped. When UTF8 is selected, +// the code page is set to UTF-8, and only the GL area can be remapped. +//Arguments: +// - codingSystem - The coding system that will be selected. +// Return value: +// True if handled successfully. False otherwise. +bool AdaptDispatch::DesignateCodingSystem(const wchar_t codingSystem) +{ + // If we haven't previously saved the initial code page, do so now. + // This will be used to restore the code page in response to a reset. + if (!_initialCodePage.has_value()) + { + unsigned int currentCodePage; + _pConApi->GetConsoleOutputCP(currentCodePage); + _initialCodePage = currentCodePage; + } + + bool success = false; + switch (codingSystem) + { + case DispatchTypes::CodingSystem::ISO2022: + success = _pConApi->SetConsoleOutputCP(28591); + if (success) + { + _termOutput.EnableGrTranslation(true); + } + break; + case DispatchTypes::CodingSystem::UTF8: + success = _pConApi->SetConsoleOutputCP(CP_UTF8); + if (success) + { + _termOutput.EnableGrTranslation(false); + } + break; + } + return success; +} + +//Routine Description: +// Designate Charset - Selects a specific 94-character set into one of the four G-sets. +// See http://invisible-island.net/xterm/ctlseqs/ctlseqs.html#h3-Controls-beginning-with-ESC +// for a list of all charsets and their codes. +// If the specified charset is unsupported, we do nothing (remain on the current one) +//Arguments: +// - gsetNumber - The G-set into which the charset will be selected. +// - charset - The characters indicating the charset that will be used. +// Return value: +// True if handled successfully. False otherwise. +bool AdaptDispatch::Designate94Charset(const size_t gsetNumber, const std::pair charset) +{ + return _termOutput.Designate94Charset(gsetNumber, charset); +} + +//Routine Description: +// Designate Charset - Selects a specific 96-character set into one of the four G-sets. +// See http://invisible-island.net/xterm/ctlseqs/ctlseqs.html#h3-Controls-beginning-with-ESC +// for a list of all charsets and their codes. // If the specified charset is unsupported, we do nothing (remain on the current one) //Arguments: -// - wchCharset - The character indicating the charset we should switch to. +// - gsetNumber - The G-set into which the charset will be selected. +// - charset - The characters indicating the charset that will be used. // Return value: // True if handled successfully. False otherwise. -bool AdaptDispatch::DesignateCharset(const wchar_t wchCharset) noexcept +bool AdaptDispatch::Designate96Charset(const size_t gsetNumber, const std::pair charset) { - return _termOutput.DesignateCharset(wchCharset); + return _termOutput.Designate96Charset(gsetNumber, charset); +} + +//Routine Description: +// Locking Shift - Invoke one of the G-sets into the left half of the code table. +//Arguments: +// - gsetNumber - The G-set that will be invoked. +// Return value: +// True if handled successfully. False otherwise. +bool AdaptDispatch::LockingShift(const size_t gsetNumber) +{ + return _termOutput.LockingShift(gsetNumber); +} + +//Routine Description: +// Locking Shift Right - Invoke one of the G-sets into the right half of the code table. +//Arguments: +// - gsetNumber - The G-set that will be invoked. +// Return value: +// True if handled successfully. False otherwise. +bool AdaptDispatch::LockingShiftRight(const size_t gsetNumber) +{ + return _termOutput.LockingShiftRight(gsetNumber); +} + +//Routine Description: +// Single Shift - Temporarily invoke one of the G-sets into the code table. +//Arguments: +// - gsetNumber - The G-set that will be invoked. +// Return value: +// True if handled successfully. False otherwise. +bool AdaptDispatch::SingleShift(const size_t gsetNumber) +{ + return _termOutput.SingleShift(gsetNumber); } //Routine Description: @@ -1667,7 +1772,12 @@ bool AdaptDispatch::SoftReset() } if (success) { - success = DesignateCharset(DispatchTypes::VTCharacterSets::USASCII); // Default Charset + _termOutput = {}; // Reset all character set designations. + if (_initialCodePage.has_value()) + { + // Restore initial code page if previously changed by a DOCS sequence. + success = _pConApi->SetConsoleOutputCP(_initialCodePage.value()); + } } if (success) { diff --git a/src/terminal/adapter/adaptDispatch.hpp b/src/terminal/adapter/adaptDispatch.hpp index 5f7b51e86a5..fd1dcd7e3c0 100644 --- a/src/terminal/adapter/adaptDispatch.hpp +++ b/src/terminal/adapter/adaptDispatch.hpp @@ -86,7 +86,12 @@ namespace Microsoft::Console::VirtualTerminal bool ForwardTab(const size_t numTabs) override; // CHT, HT bool BackwardsTab(const size_t numTabs) override; // CBT bool TabClear(const size_t clearType) override; // TBC - bool DesignateCharset(const wchar_t wchCharset) noexcept override; // SCS + bool DesignateCodingSystem(const wchar_t codingSystem) override; // DOCS + bool Designate94Charset(const size_t gsetNumber, const std::pair charset) override; // SCS + bool Designate96Charset(const size_t gsetNumber, const std::pair charset) override; // SCS + bool LockingShift(const size_t gsetNumber) override; // LS0, LS1, LS2, LS3 + bool LockingShiftRight(const size_t gsetNumber) override; // LS1R, LS2R, LS3R + bool SingleShift(const size_t gsetNumber) override; // SS2, SS3 bool SoftReset() override; // DECSTR bool HardReset() override; // RIS bool ScreenAlignmentPattern() override; // DECALN @@ -121,6 +126,7 @@ namespace Microsoft::Console::VirtualTerminal bool IsOriginModeRelative = false; TextAttribute Attributes = {}; TerminalOutput TermOutput = {}; + unsigned int CodePage = 0; }; struct Offset { @@ -163,6 +169,7 @@ namespace Microsoft::Console::VirtualTerminal std::unique_ptr _pConApi; std::unique_ptr _pDefaults; TerminalOutput _termOutput; + std::optional _initialCodePage; // We have two instances of the saved cursor state, because we need // one for the main buffer (at index 0), and another for the alt buffer diff --git a/src/terminal/adapter/charsets.hpp b/src/terminal/adapter/charsets.hpp new file mode 100644 index 00000000000..4ead5f14bce --- /dev/null +++ b/src/terminal/adapter/charsets.hpp @@ -0,0 +1,1055 @@ +/*++ +Copyright (c) Microsoft Corporation +Licensed under the MIT license. + +Module Name: +- charsets.hpp + +Abstract: +- Defines translation tables for the various VT character sets used in the TerminalOutput class. +--*/ + +#pragma once + +namespace Microsoft::Console::VirtualTerminal +{ + template + class CharSet + { + public: + constexpr CharSet(const std::initializer_list> replacements) + { + for (auto i = L'\0'; i < _translationTable.size(); i++) + _translationTable.at(i) = BaseChar + i; + for (auto replacement : replacements) + _translationTable.at(replacement.first - BaseChar) = replacement.second; + } + constexpr operator const std::wstring_view() const + { + return { _translationTable.data(), _translationTable.size() }; + } + constexpr bool operator==(const std::wstring_view rhs) const + { + return _translationTable.data() == rhs.data(); + } + + private: + std::array _translationTable = {}; + }; + + template + constexpr bool operator==(const std::wstring_view lhs, const CharSet& rhs) + { + return rhs == lhs; + } + + typedef CharSet AsciiBasedCharSet; + typedef CharSet Latin1BasedCharSet94; + typedef CharSet Latin1BasedCharSet96; + + static constexpr auto Ascii = AsciiBasedCharSet{}; + static constexpr auto Latin1 = Latin1BasedCharSet96{}; + +#pragma warning(push) +#pragma warning(disable : 26483) // Suppress spurious "value is outside the bounds" warnings + + // https://en.wikipedia.org/wiki/ISO/IEC_8859-2 + static constexpr auto Latin2 = Latin1BasedCharSet96{ + { L'\xa1', L'\u0104' }, // Latin Capital Letter A With Ogonek + { L'\xa2', L'\u02d8' }, // Breve + { L'\xa3', L'\u0141' }, // Latin Capital Letter L With Stroke + { L'\xa5', L'\u013d' }, // Latin Capital Letter L With Caron + { L'\xa6', L'\u015a' }, // Latin Capital Letter S With Acute + { L'\xa9', L'\u0160' }, // Latin Capital Letter S With Caron + { L'\xaa', L'\u015e' }, // Latin Capital Letter S With Cedilla + { L'\xab', L'\u0164' }, // Latin Capital Letter T With Caron + { L'\xac', L'\u0179' }, // Latin Capital Letter Z With Acute + { L'\xae', L'\u017d' }, // Latin Capital Letter Z With Caron + { L'\xaf', L'\u017b' }, // Latin Capital Letter Z With Dot Above + { L'\xb1', L'\u0105' }, // Latin Small Letter A With Ogonek + { L'\xb2', L'\u02db' }, // Ogonek + { L'\xb3', L'\u0142' }, // Latin Small Letter L With Stroke + { L'\xb5', L'\u013e' }, // Latin Small Letter L With Caron + { L'\xb6', L'\u015b' }, // Latin Small Letter S With Acute + { L'\xb7', L'\u02c7' }, // Caron + { L'\xb9', L'\u0161' }, // Latin Small Letter S With Caron + { L'\xba', L'\u015f' }, // Latin Small Letter S With Cedilla + { L'\xbb', L'\u0165' }, // Latin Small Letter T With Caron + { L'\xbc', L'\u017a' }, // Latin Small Letter Z With Acute + { L'\xbd', L'\u02dd' }, // Double Acute Accent + { L'\xbe', L'\u017e' }, // Latin Small Letter Z With Caron + { L'\xbf', L'\u017c' }, // Latin Small Letter Z With Dot Above + { L'\xc0', L'\u0154' }, // Latin Capital Letter R With Acute + { L'\xc3', L'\u0102' }, // Latin Capital Letter A With Breve + { L'\xc5', L'\u0139' }, // Latin Capital Letter L With Acute + { L'\xc6', L'\u0106' }, // Latin Capital Letter C With Acute + { L'\xc8', L'\u010c' }, // Latin Capital Letter C With Caron + { L'\xca', L'\u0118' }, // Latin Capital Letter E With Ogonek + { L'\xcc', L'\u011a' }, // Latin Capital Letter E With Caron + { L'\xcf', L'\u010e' }, // Latin Capital Letter D With Caron + { L'\xd0', L'\u0110' }, // Latin Capital Letter D With Stroke + { L'\xd1', L'\u0143' }, // Latin Capital Letter N With Acute + { L'\xd2', L'\u0147' }, // Latin Capital Letter N With Caron + { L'\xd5', L'\u0150' }, // Latin Capital Letter O With Double Acute + { L'\xd8', L'\u0158' }, // Latin Capital Letter R With Caron + { L'\xd9', L'\u016e' }, // Latin Capital Letter U With Ring Above + { L'\xdb', L'\u0170' }, // Latin Capital Letter U With Double Acute + { L'\xde', L'\u0162' }, // Latin Capital Letter T With Cedilla + { L'\xe0', L'\u0155' }, // Latin Small Letter R With Acute + { L'\xe3', L'\u0103' }, // Latin Small Letter A With Breve + { L'\xe5', L'\u013a' }, // Latin Small Letter L With Acute + { L'\xe6', L'\u0107' }, // Latin Small Letter C With Acute + { L'\xe8', L'\u010d' }, // Latin Small Letter C With Caron + { L'\xea', L'\u0119' }, // Latin Small Letter E With Ogonek + { L'\xec', L'\u011b' }, // Latin Small Letter E With Caron + { L'\xef', L'\u010f' }, // Latin Small Letter D With Caron + { L'\xf0', L'\u0111' }, // Latin Small Letter D With Stroke + { L'\xf1', L'\u0144' }, // Latin Small Letter N With Acute + { L'\xf2', L'\u0148' }, // Latin Small Letter N With Caron + { L'\xf5', L'\u0151' }, // Latin Small Letter O With Double Acute + { L'\xf8', L'\u0159' }, // Latin Small Letter R With Caron + { L'\xf9', L'\u016f' }, // Latin Small Letter U With Ring Above + { L'\xfb', L'\u0171' }, // Latin Small Letter U With Double Acute + { L'\xfe', L'\u0163' }, // Latin Small Letter T With Cedilla + { L'\xff', L'\u02d9' }, // Dot Above + }; + + // https://en.wikipedia.org/wiki/ISO/IEC_8859-5 + static constexpr auto LatinCyrillic = Latin1BasedCharSet96{ + { L'\xa1', L'\u0401' }, // Cyrillic Capital Letter Io + { L'\xa2', L'\u0402' }, // Cyrillic Capital Letter Dje + { L'\xa3', L'\u0403' }, // Cyrillic Capital Letter Gje + { L'\xa4', L'\u0404' }, // Cyrillic Capital Letter Ukrainian Ie + { L'\xa5', L'\u0405' }, // Cyrillic Capital Letter Dze + { L'\xa6', L'\u0406' }, // Cyrillic Capital Letter Byelorussian-Ukrainian I + { L'\xa7', L'\u0407' }, // Cyrillic Capital Letter Yi + { L'\xa8', L'\u0408' }, // Cyrillic Capital Letter Je + { L'\xa9', L'\u0409' }, // Cyrillic Capital Letter Lje + { L'\xaa', L'\u040a' }, // Cyrillic Capital Letter Nje + { L'\xab', L'\u040b' }, // Cyrillic Capital Letter Tshe + { L'\xac', L'\u040c' }, // Cyrillic Capital Letter Kje + { L'\xae', L'\u040e' }, // Cyrillic Capital Letter Short U + { L'\xaf', L'\u040f' }, // Cyrillic Capital Letter Dzhe + { L'\xb0', L'\u0410' }, // Cyrillic Capital Letter A + { L'\xb1', L'\u0411' }, // Cyrillic Capital Letter Be + { L'\xb2', L'\u0412' }, // Cyrillic Capital Letter Ve + { L'\xb3', L'\u0413' }, // Cyrillic Capital Letter Ghe + { L'\xb4', L'\u0414' }, // Cyrillic Capital Letter De + { L'\xb5', L'\u0415' }, // Cyrillic Capital Letter Ie + { L'\xb6', L'\u0416' }, // Cyrillic Capital Letter Zhe + { L'\xb7', L'\u0417' }, // Cyrillic Capital Letter Ze + { L'\xb8', L'\u0418' }, // Cyrillic Capital Letter I + { L'\xb9', L'\u0419' }, // Cyrillic Capital Letter Short I + { L'\xba', L'\u041a' }, // Cyrillic Capital Letter Ka + { L'\xbb', L'\u041b' }, // Cyrillic Capital Letter El + { L'\xbc', L'\u041c' }, // Cyrillic Capital Letter Em + { L'\xbd', L'\u041d' }, // Cyrillic Capital Letter En + { L'\xbe', L'\u041e' }, // Cyrillic Capital Letter O + { L'\xbf', L'\u041f' }, // Cyrillic Capital Letter Pe + { L'\xc0', L'\u0420' }, // Cyrillic Capital Letter Er + { L'\xc1', L'\u0421' }, // Cyrillic Capital Letter Es + { L'\xc2', L'\u0422' }, // Cyrillic Capital Letter Te + { L'\xc3', L'\u0423' }, // Cyrillic Capital Letter U + { L'\xc4', L'\u0424' }, // Cyrillic Capital Letter Ef + { L'\xc5', L'\u0425' }, // Cyrillic Capital Letter Ha + { L'\xc6', L'\u0426' }, // Cyrillic Capital Letter Tse + { L'\xc7', L'\u0427' }, // Cyrillic Capital Letter Che + { L'\xc8', L'\u0428' }, // Cyrillic Capital Letter Sha + { L'\xc9', L'\u0429' }, // Cyrillic Capital Letter Shcha + { L'\xca', L'\u042a' }, // Cyrillic Capital Letter Hard Sign + { L'\xcb', L'\u042b' }, // Cyrillic Capital Letter Yeru + { L'\xcc', L'\u042c' }, // Cyrillic Capital Letter Soft Sign + { L'\xcd', L'\u042d' }, // Cyrillic Capital Letter E + { L'\xce', L'\u042e' }, // Cyrillic Capital Letter Yu + { L'\xcf', L'\u042f' }, // Cyrillic Capital Letter Ya + { L'\xd0', L'\u0430' }, // Cyrillic Small Letter A + { L'\xd1', L'\u0431' }, // Cyrillic Small Letter Be + { L'\xd2', L'\u0432' }, // Cyrillic Small Letter Ve + { L'\xd3', L'\u0433' }, // Cyrillic Small Letter Ghe + { L'\xd4', L'\u0434' }, // Cyrillic Small Letter De + { L'\xd5', L'\u0435' }, // Cyrillic Small Letter Ie + { L'\xd6', L'\u0436' }, // Cyrillic Small Letter Zhe + { L'\xd7', L'\u0437' }, // Cyrillic Small Letter Ze + { L'\xd8', L'\u0438' }, // Cyrillic Small Letter I + { L'\xd9', L'\u0439' }, // Cyrillic Small Letter Short I + { L'\xda', L'\u043a' }, // Cyrillic Small Letter Ka + { L'\xdb', L'\u043b' }, // Cyrillic Small Letter El + { L'\xdc', L'\u043c' }, // Cyrillic Small Letter Em + { L'\xdd', L'\u043d' }, // Cyrillic Small Letter En + { L'\xde', L'\u043e' }, // Cyrillic Small Letter O + { L'\xdf', L'\u043f' }, // Cyrillic Small Letter Pe + { L'\xe0', L'\u0440' }, // Cyrillic Small Letter Er + { L'\xe1', L'\u0441' }, // Cyrillic Small Letter Es + { L'\xe2', L'\u0442' }, // Cyrillic Small Letter Te + { L'\xe3', L'\u0443' }, // Cyrillic Small Letter U + { L'\xe4', L'\u0444' }, // Cyrillic Small Letter Ef + { L'\xe5', L'\u0445' }, // Cyrillic Small Letter Ha + { L'\xe6', L'\u0446' }, // Cyrillic Small Letter Tse + { L'\xe7', L'\u0447' }, // Cyrillic Small Letter Che + { L'\xe8', L'\u0448' }, // Cyrillic Small Letter Sha + { L'\xe9', L'\u0449' }, // Cyrillic Small Letter Shcha + { L'\xea', L'\u044a' }, // Cyrillic Small Letter Hard Sign + { L'\xeb', L'\u044b' }, // Cyrillic Small Letter Yeru + { L'\xec', L'\u044c' }, // Cyrillic Small Letter Soft Sign + { L'\xed', L'\u044d' }, // Cyrillic Small Letter E + { L'\xee', L'\u044e' }, // Cyrillic Small Letter Yu + { L'\xef', L'\u044f' }, // Cyrillic Small Letter Ya + { L'\xf0', L'\u2116' }, // Numero Sign + { L'\xf1', L'\u0451' }, // Cyrillic Small Letter Io + { L'\xf2', L'\u0452' }, // Cyrillic Small Letter Dje + { L'\xf3', L'\u0453' }, // Cyrillic Small Letter Gje + { L'\xf4', L'\u0454' }, // Cyrillic Small Letter Ukrainian Ie + { L'\xf5', L'\u0455' }, // Cyrillic Small Letter Dze + { L'\xf6', L'\u0456' }, // Cyrillic Small Letter Byelorussian-Ukrainian I + { L'\xf7', L'\u0457' }, // Cyrillic Small Letter Yi + { L'\xf8', L'\u0458' }, // Cyrillic Small Letter Je + { L'\xf9', L'\u0459' }, // Cyrillic Small Letter Lje + { L'\xfa', L'\u045a' }, // Cyrillic Small Letter Nje + { L'\xfb', L'\u045b' }, // Cyrillic Small Letter Tshe + { L'\xfc', L'\u045c' }, // Cyrillic Small Letter Kje + { L'\xfd', L'\u00a7' }, // Section Sign + { L'\xfe', L'\u045e' }, // Cyrillic Small Letter Short U + { L'\xff', L'\u045f' }, // Cyrillic Small Letter Dzhe + }; + + // https://en.wikipedia.org/wiki/ISO/IEC_8859-7 + // Note that this is the 1987 version of the standard, and not the 2003 + // update, which has three additional characters. + static constexpr auto LatinGreek = Latin1BasedCharSet96{ + { L'\xa1', L'\u2018' }, // Left Single Quotation Mark + { L'\xa2', L'\u2019' }, // Right Single Quotation Mark + { L'\xa4', L'\u2426' }, // Undefined + { L'\xa5', L'\u2426' }, // Undefined + { L'\xaa', L'\u2426' }, // Undefined + { L'\xae', L'\u2426' }, // Undefined + { L'\xaf', L'\u2015' }, // Horizontal Bar + { L'\xb4', L'\u0384' }, // Greek Tonos + { L'\xb5', L'\u0385' }, // Greek Dialytika Tonos + { L'\xb6', L'\u0386' }, // Greek Capital Letter Alpha With Tonos + { L'\xb8', L'\u0388' }, // Greek Capital Letter Epsilon With Tonos + { L'\xb9', L'\u0389' }, // Greek Capital Letter Eta With Tonos + { L'\xba', L'\u038a' }, // Greek Capital Letter Iota With Tonos + { L'\xbc', L'\u038c' }, // Greek Capital Letter Omicron With Tonos + { L'\xbe', L'\u038e' }, // Greek Capital Letter Upsilon With Tonos + { L'\xbf', L'\u038f' }, // Greek Capital Letter Omega With Tonos + { L'\xc0', L'\u0390' }, // Greek Small Letter Iota With Dialytika And Tonos + { L'\xc1', L'\u0391' }, // Greek Capital Letter Alpha + { L'\xc2', L'\u0392' }, // Greek Capital Letter Beta + { L'\xc3', L'\u0393' }, // Greek Capital Letter Gamma + { L'\xc4', L'\u0394' }, // Greek Capital Letter Delta + { L'\xc5', L'\u0395' }, // Greek Capital Letter Epsilon + { L'\xc6', L'\u0396' }, // Greek Capital Letter Zeta + { L'\xc7', L'\u0397' }, // Greek Capital Letter Eta + { L'\xc8', L'\u0398' }, // Greek Capital Letter Theta + { L'\xc9', L'\u0399' }, // Greek Capital Letter Iota + { L'\xca', L'\u039a' }, // Greek Capital Letter Kappa + { L'\xcb', L'\u039b' }, // Greek Capital Letter Lamda + { L'\xcc', L'\u039c' }, // Greek Capital Letter Mu + { L'\xcd', L'\u039d' }, // Greek Capital Letter Nu + { L'\xce', L'\u039e' }, // Greek Capital Letter Xi + { L'\xcf', L'\u039f' }, // Greek Capital Letter Omicron + { L'\xd0', L'\u03a0' }, // Greek Capital Letter Pi + { L'\xd1', L'\u03a1' }, // Greek Capital Letter Rho + { L'\xd2', L'\u2426' }, // Undefined + { L'\xd3', L'\u03a3' }, // Greek Capital Letter Sigma + { L'\xd4', L'\u03a4' }, // Greek Capital Letter Tau + { L'\xd5', L'\u03a5' }, // Greek Capital Letter Upsilon + { L'\xd6', L'\u03a6' }, // Greek Capital Letter Phi + { L'\xd7', L'\u03a7' }, // Greek Capital Letter Chi + { L'\xd8', L'\u03a8' }, // Greek Capital Letter Psi + { L'\xd9', L'\u03a9' }, // Greek Capital Letter Omega + { L'\xda', L'\u03aa' }, // Greek Capital Letter Iota With Dialytika + { L'\xdb', L'\u03ab' }, // Greek Capital Letter Upsilon With Dialytika + { L'\xdc', L'\u03ac' }, // Greek Small Letter Alpha With Tonos + { L'\xdd', L'\u03ad' }, // Greek Small Letter Epsilon With Tonos + { L'\xde', L'\u03ae' }, // Greek Small Letter Eta With Tonos + { L'\xdf', L'\u03af' }, // Greek Small Letter Iota With Tonos + { L'\xe0', L'\u03b0' }, // Greek Small Letter Upsilon With Dialytika And Tonos + { L'\xe1', L'\u03b1' }, // Greek Small Letter Alpha + { L'\xe2', L'\u03b2' }, // Greek Small Letter Beta + { L'\xe3', L'\u03b3' }, // Greek Small Letter Gamma + { L'\xe4', L'\u03b4' }, // Greek Small Letter Delta + { L'\xe5', L'\u03b5' }, // Greek Small Letter Epsilon + { L'\xe6', L'\u03b6' }, // Greek Small Letter Zeta + { L'\xe7', L'\u03b7' }, // Greek Small Letter Eta + { L'\xe8', L'\u03b8' }, // Greek Small Letter Theta + { L'\xe9', L'\u03b9' }, // Greek Small Letter Iota + { L'\xea', L'\u03ba' }, // Greek Small Letter Kappa + { L'\xeb', L'\u03bb' }, // Greek Small Letter Lamda + { L'\xec', L'\u03bc' }, // Greek Small Letter Mu + { L'\xed', L'\u03bd' }, // Greek Small Letter Nu + { L'\xee', L'\u03be' }, // Greek Small Letter Xi + { L'\xef', L'\u03bf' }, // Greek Small Letter Omicron + { L'\xf0', L'\u03c0' }, // Greek Small Letter Pi + { L'\xf1', L'\u03c1' }, // Greek Small Letter Rho + { L'\xf2', L'\u03c2' }, // Greek Small Letter Final Sigma + { L'\xf3', L'\u03c3' }, // Greek Small Letter Sigma + { L'\xf4', L'\u03c4' }, // Greek Small Letter Tau + { L'\xf5', L'\u03c5' }, // Greek Small Letter Upsilon + { L'\xf6', L'\u03c6' }, // Greek Small Letter Phi + { L'\xf7', L'\u03c7' }, // Greek Small Letter Chi + { L'\xf8', L'\u03c8' }, // Greek Small Letter Psi + { L'\xf9', L'\u03c9' }, // Greek Small Letter Omega + { L'\xfa', L'\u03ca' }, // Greek Small Letter Iota With Dialytika + { L'\xfb', L'\u03cb' }, // Greek Small Letter Upsilon With Dialytika + { L'\xfc', L'\u03cc' }, // Greek Small Letter Omicron With Tonos + { L'\xfd', L'\u03cd' }, // Greek Small Letter Upsilon With Tonos + { L'\xfe', L'\u03ce' }, // Greek Small Letter Omega With Tonos + { L'\xff', L'\u2426' }, // Undefined + }; + + // https://en.wikipedia.org/wiki/ISO/IEC_8859-8 + static constexpr auto LatinHebrew = Latin1BasedCharSet96{ + { L'\xa1', L'\u2426' }, // Undefined + { L'\xaa', L'\u00d7' }, // Multiplication Sign + { L'\xba', L'\u00f7' }, // Division Sign + { L'\xbf', L'\u2426' }, // Undefined + { L'\xc0', L'\u2426' }, // Undefined + { L'\xc1', L'\u2426' }, // Undefined + { L'\xc2', L'\u2426' }, // Undefined + { L'\xc3', L'\u2426' }, // Undefined + { L'\xc4', L'\u2426' }, // Undefined + { L'\xc5', L'\u2426' }, // Undefined + { L'\xc6', L'\u2426' }, // Undefined + { L'\xc7', L'\u2426' }, // Undefined + { L'\xc8', L'\u2426' }, // Undefined + { L'\xc9', L'\u2426' }, // Undefined + { L'\xca', L'\u2426' }, // Undefined + { L'\xcb', L'\u2426' }, // Undefined + { L'\xcc', L'\u2426' }, // Undefined + { L'\xcd', L'\u2426' }, // Undefined + { L'\xce', L'\u2426' }, // Undefined + { L'\xcf', L'\u2426' }, // Undefined + { L'\xd0', L'\u2426' }, // Undefined + { L'\xd1', L'\u2426' }, // Undefined + { L'\xd2', L'\u2426' }, // Undefined + { L'\xd3', L'\u2426' }, // Undefined + { L'\xd4', L'\u2426' }, // Undefined + { L'\xd5', L'\u2426' }, // Undefined + { L'\xd6', L'\u2426' }, // Undefined + { L'\xd7', L'\u2426' }, // Undefined + { L'\xd8', L'\u2426' }, // Undefined + { L'\xd9', L'\u2426' }, // Undefined + { L'\xda', L'\u2426' }, // Undefined + { L'\xdb', L'\u2426' }, // Undefined + { L'\xdc', L'\u2426' }, // Undefined + { L'\xdd', L'\u2426' }, // Undefined + { L'\xde', L'\u2426' }, // Undefined + { L'\xdf', L'\u2017' }, // Double Low Line + { L'\xe0', L'\u05d0' }, // Hebrew Letter Alef + { L'\xe1', L'\u05d1' }, // Hebrew Letter Bet + { L'\xe2', L'\u05d2' }, // Hebrew Letter Gimel + { L'\xe3', L'\u05d3' }, // Hebrew Letter Dalet + { L'\xe4', L'\u05d4' }, // Hebrew Letter He + { L'\xe5', L'\u05d5' }, // Hebrew Letter Vav + { L'\xe6', L'\u05d6' }, // Hebrew Letter Zayin + { L'\xe7', L'\u05d7' }, // Hebrew Letter Het + { L'\xe8', L'\u05d8' }, // Hebrew Letter Tet + { L'\xe9', L'\u05d9' }, // Hebrew Letter Yod + { L'\xea', L'\u05da' }, // Hebrew Letter Final Kaf + { L'\xeb', L'\u05db' }, // Hebrew Letter Kaf + { L'\xec', L'\u05dc' }, // Hebrew Letter Lamed + { L'\xed', L'\u05dd' }, // Hebrew Letter Final Mem + { L'\xee', L'\u05de' }, // Hebrew Letter Mem + { L'\xef', L'\u05df' }, // Hebrew Letter Final Nun + { L'\xf0', L'\u05e0' }, // Hebrew Letter Nun + { L'\xf1', L'\u05e1' }, // Hebrew Letter Samekh + { L'\xf2', L'\u05e2' }, // Hebrew Letter Ayin + { L'\xf3', L'\u05e3' }, // Hebrew Letter Final Pe + { L'\xf4', L'\u05e4' }, // Hebrew Letter Pe + { L'\xf5', L'\u05e5' }, // Hebrew Letter Final Tsadi + { L'\xf6', L'\u05e6' }, // Hebrew Letter Tsadi + { L'\xf7', L'\u05e7' }, // Hebrew Letter Qof + { L'\xf8', L'\u05e8' }, // Hebrew Letter Resh + { L'\xf9', L'\u05e9' }, // Hebrew Letter Shin + { L'\xfa', L'\u05ea' }, // Hebrew Letter Tav + { L'\xfb', L'\u2426' }, // Undefined + { L'\xfc', L'\u2426' }, // Undefined + { L'\xfd', L'\u200e' }, // Left-To-Right Mark + { L'\xfe', L'\u200f' }, // Right-To-Left Mark + { L'\xff', L'\u2426' }, // Undefined + }; + + // https://en.wikipedia.org/wiki/ISO/IEC_8859-9 + static constexpr auto Latin5 = Latin1BasedCharSet96{ + { L'\xd0', L'\u011e' }, // Latin Capital Letter G With Breve + { L'\xdd', L'\u0130' }, // Latin Capital Letter I With Dot Above + { L'\xde', L'\u015e' }, // Latin Capital Letter S With Cedilla + { L'\xf0', L'\u011f' }, // Latin Small Letter G With Breve + { L'\xfd', L'\u0131' }, // Latin Small Letter Dotless I + { L'\xfe', L'\u015f' }, // Latin Small Letter S With Cedilla + }; + + // https://www.vt100.net/docs/vt220-rm/table2-3b.html + static constexpr auto DecSupplemental = Latin1BasedCharSet94{ + { L'\xa4', L'\u2426' }, // Undefined + { L'\xa6', L'\u2426' }, // Undefined + { L'\xa8', L'\u00a4' }, // Currency Sign + { L'\xac', L'\u2426' }, // Undefined + { L'\xad', L'\u2426' }, // Undefined + { L'\xae', L'\u2426' }, // Undefined + { L'\xaf', L'\u2426' }, // Undefined + { L'\xb4', L'\u2426' }, // Undefined + { L'\xb8', L'\u2426' }, // Undefined + { L'\xbe', L'\u2426' }, // Undefined + { L'\xd0', L'\u2426' }, // Undefined + { L'\xd7', L'\u0152' }, // Latin Capital Ligature Oe + { L'\xdd', L'\u0178' }, // Latin Capital Letter Y With Diaeresis + { L'\xde', L'\u2426' }, // Undefined + { L'\xf0', L'\u2426' }, // Undefined + { L'\xf7', L'\u0153' }, // Latin Small Ligature Oe + { L'\xfd', L'\u00ff' }, // Latin Small Letter Y With Diaeresis + { L'\xfe', L'\u2426' }, // Undefined + }; + + // https://www.vt100.net/docs/vt220-rm/table2-4.html + static constexpr auto DecSpecialGraphics = AsciiBasedCharSet{ + { L'\x5f', L'\u0020' }, // Blank + { L'\x60', L'\u2666' }, // Diamond (more commonly U+25C6, but U+2666 renders better for us) + { L'\x61', L'\u2592' }, // Checkerboard + { L'\x62', L'\u2409' }, // HT, SYMBOL FOR HORIZONTAL TABULATION + { L'\x63', L'\u240c' }, // FF, SYMBOL FOR FORM FEED + { L'\x64', L'\u240d' }, // CR, SYMBOL FOR CARRIAGE RETURN + { L'\x65', L'\u240a' }, // LF, SYMBOL FOR LINE FEED + { L'\x66', L'\u00b0' }, // Degree symbol + { L'\x67', L'\u00b1' }, // Plus/minus + { L'\x68', L'\u2424' }, // NL, SYMBOL FOR NEWLINE + { L'\x69', L'\u240b' }, // VT, SYMBOL FOR VERTICAL TABULATION + { L'\x6a', L'\u2518' }, // Lower-right corner + { L'\x6b', L'\u2510' }, // Upper-right corner + { L'\x6c', L'\u250c' }, // Upper-left corner + { L'\x6d', L'\u2514' }, // Lower-left corner + { L'\x6e', L'\u253c' }, // Crossing lines + { L'\x6f', L'\u23ba' }, // Horizontal line - Scan 1 + { L'\x70', L'\u23bb' }, // Horizontal line - Scan 3 + { L'\x71', L'\u2500' }, // Horizontal line - Scan 5 + { L'\x72', L'\u23bc' }, // Horizontal line - Scan 7 + { L'\x73', L'\u23bd' }, // Horizontal line - Scan 9 + { L'\x74', L'\u251c' }, // Left "T" + { L'\x75', L'\u2524' }, // Right "T" + { L'\x76', L'\u2534' }, // Bottom "T" + { L'\x77', L'\u252c' }, // Top "T" + { L'\x78', L'\u2502' }, // | Vertical bar + { L'\x79', L'\u2264' }, // Less than or equal to + { L'\x7a', L'\u2265' }, // Greater than or equal to + { L'\x7b', L'\u03c0' }, // Pi + { L'\x7c', L'\u2260' }, // Not equal to + { L'\x7d', L'\u00a3' }, // UK pound sign + { L'\x7e', L'\u00b7' }, // Centered dot + }; + + // https://en.wikipedia.org/wiki/KOI-8 + // This is referred to as KOI-8 Cyrillic in the VT520/VT525 Video Terminal + // Programmer Information manual (EK-VT520-RM.A01) + static constexpr auto DecCyrillic = Latin1BasedCharSet94{ + { L'\xa1', L'\u2426' }, // Undefined + { L'\xa2', L'\u2426' }, // Undefined + { L'\xa3', L'\u2426' }, // Undefined + { L'\xa4', L'\u2426' }, // Undefined + { L'\xa5', L'\u2426' }, // Undefined + { L'\xa6', L'\u2426' }, // Undefined + { L'\xa7', L'\u2426' }, // Undefined + { L'\xa8', L'\u2426' }, // Undefined + { L'\xa9', L'\u2426' }, // Undefined + { L'\xaa', L'\u2426' }, // Undefined + { L'\xab', L'\u2426' }, // Undefined + { L'\xac', L'\u2426' }, // Undefined + { L'\xad', L'\u2426' }, // Undefined + { L'\xae', L'\u2426' }, // Undefined + { L'\xaf', L'\u2426' }, // Undefined + { L'\xb0', L'\u2426' }, // Undefined + { L'\xb1', L'\u2426' }, // Undefined + { L'\xb2', L'\u2426' }, // Undefined + { L'\xb3', L'\u2426' }, // Undefined + { L'\xb4', L'\u2426' }, // Undefined + { L'\xb5', L'\u2426' }, // Undefined + { L'\xb6', L'\u2426' }, // Undefined + { L'\xb7', L'\u2426' }, // Undefined + { L'\xb8', L'\u2426' }, // Undefined + { L'\xb9', L'\u2426' }, // Undefined + { L'\xba', L'\u2426' }, // Undefined + { L'\xbb', L'\u2426' }, // Undefined + { L'\xbc', L'\u2426' }, // Undefined + { L'\xbd', L'\u2426' }, // Undefined + { L'\xbe', L'\u2426' }, // Undefined + { L'\xbf', L'\u2426' }, // Undefined + { L'\xc0', L'\u044e' }, // Cyrillic Small Letter Yu + { L'\xc1', L'\u0430' }, // Cyrillic Small Letter A + { L'\xc2', L'\u0431' }, // Cyrillic Small Letter Be + { L'\xc3', L'\u0446' }, // Cyrillic Small Letter Tse + { L'\xc4', L'\u0434' }, // Cyrillic Small Letter De + { L'\xc5', L'\u0435' }, // Cyrillic Small Letter Ie + { L'\xc6', L'\u0444' }, // Cyrillic Small Letter Ef + { L'\xc7', L'\u0433' }, // Cyrillic Small Letter Ghe + { L'\xc8', L'\u0445' }, // Cyrillic Small Letter Ha + { L'\xc9', L'\u0438' }, // Cyrillic Small Letter I + { L'\xca', L'\u0439' }, // Cyrillic Small Letter Short I + { L'\xcb', L'\u043a' }, // Cyrillic Small Letter Ka + { L'\xcc', L'\u043b' }, // Cyrillic Small Letter El + { L'\xcd', L'\u043c' }, // Cyrillic Small Letter Em + { L'\xce', L'\u043d' }, // Cyrillic Small Letter En + { L'\xcf', L'\u043e' }, // Cyrillic Small Letter O + { L'\xd0', L'\u043f' }, // Cyrillic Small Letter Pe + { L'\xd1', L'\u044f' }, // Cyrillic Small Letter Ya + { L'\xd2', L'\u0440' }, // Cyrillic Small Letter Er + { L'\xd3', L'\u0441' }, // Cyrillic Small Letter Es + { L'\xd4', L'\u0442' }, // Cyrillic Small Letter Te + { L'\xd5', L'\u0443' }, // Cyrillic Small Letter U + { L'\xd6', L'\u0436' }, // Cyrillic Small Letter Zhe + { L'\xd7', L'\u0432' }, // Cyrillic Small Letter Ve + { L'\xd8', L'\u044c' }, // Cyrillic Small Letter Soft Sign + { L'\xd9', L'\u044b' }, // Cyrillic Small Letter Yeru + { L'\xda', L'\u0437' }, // Cyrillic Small Letter Ze + { L'\xdb', L'\u0448' }, // Cyrillic Small Letter Sha + { L'\xdc', L'\u044d' }, // Cyrillic Small Letter E + { L'\xdd', L'\u0449' }, // Cyrillic Small Letter Shcha + { L'\xde', L'\u0447' }, // Cyrillic Small Letter Che + { L'\xdf', L'\u044a' }, // Cyrillic Small Letter Hard Sign + { L'\xe0', L'\u042e' }, // Cyrillic Capital Letter Yu + { L'\xe1', L'\u0410' }, // Cyrillic Capital Letter A + { L'\xe2', L'\u0411' }, // Cyrillic Capital Letter Be + { L'\xe3', L'\u0426' }, // Cyrillic Capital Letter Tse + { L'\xe4', L'\u0414' }, // Cyrillic Capital Letter De + { L'\xe5', L'\u0415' }, // Cyrillic Capital Letter Ie + { L'\xe6', L'\u0424' }, // Cyrillic Capital Letter Ef + { L'\xe7', L'\u0413' }, // Cyrillic Capital Letter Ghe + { L'\xe8', L'\u0425' }, // Cyrillic Capital Letter Ha + { L'\xe9', L'\u0418' }, // Cyrillic Capital Letter I + { L'\xea', L'\u0419' }, // Cyrillic Capital Letter Short I + { L'\xeb', L'\u041a' }, // Cyrillic Capital Letter Ka + { L'\xec', L'\u041b' }, // Cyrillic Capital Letter El + { L'\xed', L'\u041c' }, // Cyrillic Capital Letter Em + { L'\xee', L'\u041d' }, // Cyrillic Capital Letter En + { L'\xef', L'\u041e' }, // Cyrillic Capital Letter O + { L'\xf0', L'\u041f' }, // Cyrillic Capital Letter Pe + { L'\xf1', L'\u042f' }, // Cyrillic Capital Letter Ya + { L'\xf2', L'\u0420' }, // Cyrillic Capital Letter Er + { L'\xf3', L'\u0421' }, // Cyrillic Capital Letter Es + { L'\xf4', L'\u0422' }, // Cyrillic Capital Letter Te + { L'\xf5', L'\u0423' }, // Cyrillic Capital Letter U + { L'\xf6', L'\u0416' }, // Cyrillic Capital Letter Zhe + { L'\xf7', L'\u0412' }, // Cyrillic Capital Letter Ve + { L'\xf8', L'\u042c' }, // Cyrillic Capital Letter Soft Sign + { L'\xf9', L'\u042b' }, // Cyrillic Capital Letter Yeru + { L'\xfa', L'\u0417' }, // Cyrillic Capital Letter Ze + { L'\xfb', L'\u0428' }, // Cyrillic Capital Letter Sha + { L'\xfc', L'\u042d' }, // Cyrillic Capital Letter E + { L'\xfd', L'\u0429' }, // Cyrillic Capital Letter Shcha + { L'\xfe', L'\u0427' }, // Cyrillic Capital Letter Che + }; + + // See Figure 5-1 in Installing and Using The VT420 Video Terminal + // With PC Terminal Mode Update (EK-VT42A-UP.A01) + static constexpr auto DecGreek = Latin1BasedCharSet94{ + { L'\xa4', L'\u2426' }, // Undefined + { L'\xa6', L'\u2426' }, // Undefined + { L'\xa8', L'\u00a4' }, // Currency Sign + { L'\xac', L'\u2426' }, // Undefined + { L'\xad', L'\u2426' }, // Undefined + { L'\xae', L'\u2426' }, // Undefined + { L'\xaf', L'\u2426' }, // Undefined + { L'\xb4', L'\u2426' }, // Undefined + { L'\xb8', L'\u2426' }, // Undefined + { L'\xbe', L'\u2426' }, // Undefined + { L'\xc0', L'\u03ca' }, // Greek Small Letter Iota With Dialytika + { L'\xc1', L'\u0391' }, // Greek Capital Letter Alpha + { L'\xc2', L'\u0392' }, // Greek Capital Letter Beta + { L'\xc3', L'\u0393' }, // Greek Capital Letter Gamma + { L'\xc4', L'\u0394' }, // Greek Capital Letter Delta + { L'\xc5', L'\u0395' }, // Greek Capital Letter Epsilon + { L'\xc6', L'\u0396' }, // Greek Capital Letter Zeta + { L'\xc7', L'\u0397' }, // Greek Capital Letter Eta + { L'\xc8', L'\u0398' }, // Greek Capital Letter Theta + { L'\xc9', L'\u0399' }, // Greek Capital Letter Iota + { L'\xca', L'\u039a' }, // Greek Capital Letter Kappa + { L'\xcb', L'\u039b' }, // Greek Capital Letter Lamda + { L'\xcc', L'\u039c' }, // Greek Capital Letter Mu + { L'\xcd', L'\u039d' }, // Greek Capital Letter Nu + { L'\xce', L'\u039e' }, // Greek Capital Letter Xi + { L'\xcf', L'\u039f' }, // Greek Capital Letter Omicron + { L'\xd0', L'\u2426' }, // Undefined + { L'\xd1', L'\u03a0' }, // Greek Capital Letter Pi + { L'\xd2', L'\u03a1' }, // Greek Capital Letter Rho + { L'\xd3', L'\u03a3' }, // Greek Capital Letter Sigma + { L'\xd4', L'\u03a4' }, // Greek Capital Letter Tau + { L'\xd5', L'\u03a5' }, // Greek Capital Letter Upsilon + { L'\xd6', L'\u03a6' }, // Greek Capital Letter Phi + { L'\xd7', L'\u03a7' }, // Greek Capital Letter Chi + { L'\xd8', L'\u03a8' }, // Greek Capital Letter Psi + { L'\xd9', L'\u03a9' }, // Greek Capital Letter Omega + { L'\xda', L'\u03ac' }, // Greek Small Letter Alpha With Tonos + { L'\xdb', L'\u03ad' }, // Greek Small Letter Epsilon With Tonos + { L'\xdc', L'\u03ae' }, // Greek Small Letter Eta With Tonos + { L'\xdd', L'\u03af' }, // Greek Small Letter Iota With Tonos + { L'\xde', L'\u2426' }, // Undefined + { L'\xdf', L'\u03cc' }, // Greek Small Letter Omicron With Tonos + { L'\xe0', L'\u03cb' }, // Greek Small Letter Upsilon With Dialytika + { L'\xe1', L'\u03b1' }, // Greek Small Letter Alpha + { L'\xe2', L'\u03b2' }, // Greek Small Letter Beta + { L'\xe3', L'\u03b3' }, // Greek Small Letter Gamma + { L'\xe4', L'\u03b4' }, // Greek Small Letter Delta + { L'\xe5', L'\u03b5' }, // Greek Small Letter Epsilon + { L'\xe6', L'\u03b6' }, // Greek Small Letter Zeta + { L'\xe7', L'\u03b7' }, // Greek Small Letter Eta + { L'\xe8', L'\u03b8' }, // Greek Small Letter Theta + { L'\xe9', L'\u03b9' }, // Greek Small Letter Iota + { L'\xea', L'\u03ba' }, // Greek Small Letter Kappa + { L'\xeb', L'\u03bb' }, // Greek Small Letter Lamda + { L'\xec', L'\u03bc' }, // Greek Small Letter Mu + { L'\xed', L'\u03bd' }, // Greek Small Letter Nu + { L'\xee', L'\u03be' }, // Greek Small Letter Xi + { L'\xef', L'\u03bf' }, // Greek Small Letter Omicron + { L'\xf0', L'\u2426' }, // Undefined + { L'\xf1', L'\u03c0' }, // Greek Small Letter Pi + { L'\xf2', L'\u03c1' }, // Greek Small Letter Rho + { L'\xf3', L'\u03c3' }, // Greek Small Letter Sigma + { L'\xf4', L'\u03c4' }, // Greek Small Letter Tau + { L'\xf5', L'\u03c5' }, // Greek Small Letter Upsilon + { L'\xf6', L'\u03c6' }, // Greek Small Letter Phi + { L'\xf7', L'\u03c7' }, // Greek Small Letter Chi + { L'\xf8', L'\u03c8' }, // Greek Small Letter Psi + { L'\xf9', L'\u03c9' }, // Greek Small Letter Omega + { L'\xfa', L'\u03c2' }, // Greek Small Letter Final Sigma + { L'\xfb', L'\u03cd' }, // Greek Small Letter Upsilon With Tonos + { L'\xfc', L'\u03ce' }, // Greek Small Letter Omega With Tonos + { L'\xfd', L'\u0384' }, // Greek Tonos + { L'\xfe', L'\u2426' }, // Undefined + }; + + // See Figure 5-6 in Installing and Using The VT420 Video Terminal + // With PC Terminal Mode Update (EK-VT42A-UP.A01) + static constexpr auto DecHebrew = Latin1BasedCharSet94{ + { L'\xa4', L'\u2426' }, // Undefined + { L'\xa6', L'\u2426' }, // Undefined + { L'\xa8', L'\u00a4' }, // Currency Sign + { L'\xac', L'\u2426' }, // Undefined + { L'\xad', L'\u2426' }, // Undefined + { L'\xae', L'\u2426' }, // Undefined + { L'\xaf', L'\u2426' }, // Undefined + { L'\xb4', L'\u2426' }, // Undefined + { L'\xb8', L'\u2426' }, // Undefined + { L'\xbe', L'\u2426' }, // Undefined + { L'\xc0', L'\u2426' }, // Undefined + { L'\xc1', L'\u2426' }, // Undefined + { L'\xc2', L'\u2426' }, // Undefined + { L'\xc3', L'\u2426' }, // Undefined + { L'\xc4', L'\u2426' }, // Undefined + { L'\xc5', L'\u2426' }, // Undefined + { L'\xc6', L'\u2426' }, // Undefined + { L'\xc7', L'\u2426' }, // Undefined + { L'\xc8', L'\u2426' }, // Undefined + { L'\xc9', L'\u2426' }, // Undefined + { L'\xca', L'\u2426' }, // Undefined + { L'\xcb', L'\u2426' }, // Undefined + { L'\xcc', L'\u2426' }, // Undefined + { L'\xcd', L'\u2426' }, // Undefined + { L'\xce', L'\u2426' }, // Undefined + { L'\xcf', L'\u2426' }, // Undefined + { L'\xd0', L'\u2426' }, // Undefined + { L'\xd1', L'\u2426' }, // Undefined + { L'\xd2', L'\u2426' }, // Undefined + { L'\xd3', L'\u2426' }, // Undefined + { L'\xd4', L'\u2426' }, // Undefined + { L'\xd5', L'\u2426' }, // Undefined + { L'\xd6', L'\u2426' }, // Undefined + { L'\xd7', L'\u2426' }, // Undefined + { L'\xd8', L'\u2426' }, // Undefined + { L'\xd9', L'\u2426' }, // Undefined + { L'\xda', L'\u2426' }, // Undefined + { L'\xdb', L'\u2426' }, // Undefined + { L'\xdc', L'\u2426' }, // Undefined + { L'\xdd', L'\u2426' }, // Undefined + { L'\xde', L'\u2426' }, // Undefined + { L'\xdf', L'\u2426' }, // Undefined + { L'\xe0', L'\u05d0' }, // Hebrew Letter Alef + { L'\xe1', L'\u05d1' }, // Hebrew Letter Bet + { L'\xe2', L'\u05d2' }, // Hebrew Letter Gimel + { L'\xe3', L'\u05d3' }, // Hebrew Letter Dalet + { L'\xe4', L'\u05d4' }, // Hebrew Letter He + { L'\xe5', L'\u05d5' }, // Hebrew Letter Vav + { L'\xe6', L'\u05d6' }, // Hebrew Letter Zayin + { L'\xe7', L'\u05d7' }, // Hebrew Letter Het + { L'\xe8', L'\u05d8' }, // Hebrew Letter Tet + { L'\xe9', L'\u05d9' }, // Hebrew Letter Yod + { L'\xea', L'\u05da' }, // Hebrew Letter Final Kaf + { L'\xeb', L'\u05db' }, // Hebrew Letter Kaf + { L'\xec', L'\u05dc' }, // Hebrew Letter Lamed + { L'\xed', L'\u05dd' }, // Hebrew Letter Final Mem + { L'\xee', L'\u05de' }, // Hebrew Letter Mem + { L'\xef', L'\u05df' }, // Hebrew Letter Final Nun + { L'\xf0', L'\u05e0' }, // Hebrew Letter Nun + { L'\xf1', L'\u05e1' }, // Hebrew Letter Samekh + { L'\xf2', L'\u05e2' }, // Hebrew Letter Ayin + { L'\xf3', L'\u05e3' }, // Hebrew Letter Final Pe + { L'\xf4', L'\u05e4' }, // Hebrew Letter Pe + { L'\xf5', L'\u05e5' }, // Hebrew Letter Final Tsadi + { L'\xf6', L'\u05e6' }, // Hebrew Letter Tsadi + { L'\xf7', L'\u05e7' }, // Hebrew Letter Qof + { L'\xf8', L'\u05e8' }, // Hebrew Letter Resh + { L'\xf9', L'\u05e9' }, // Hebrew Letter Shin + { L'\xfa', L'\u05ea' }, // Hebrew Letter Tav + { L'\xfb', L'\u2426' }, // Undefined + { L'\xfc', L'\u2426' }, // Undefined + { L'\xfd', L'\u2426' }, // Undefined + { L'\xfe', L'\u2426' }, // Undefined + }; + + // See Figure 5-11 in Installing and Using The VT420 Video Terminal + // With PC Terminal Mode Update (EK-VT42A-UP.A01) + static constexpr auto DecTurkish = Latin1BasedCharSet94{ + { L'\xa4', L'\u2426' }, // Undefined + { L'\xa6', L'\u2426' }, // Undefined + { L'\xa8', L'\u00a4' }, // Currency Sign + { L'\xac', L'\u2426' }, // Undefined + { L'\xad', L'\u2426' }, // Undefined + { L'\xae', L'\u0130' }, // Latin Capital Letter I With Dot Above + { L'\xaf', L'\u2426' }, // Undefined + { L'\xb4', L'\u2426' }, // Undefined + { L'\xb8', L'\u2426' }, // Undefined + { L'\xbe', L'\u0131' }, // Latin Small Letter Dotless I + { L'\xd0', L'\u011e' }, // Latin Capital Letter G With Breve + { L'\xd7', L'\u0152' }, // Latin Capital Ligature Oe + { L'\xdd', L'\u0178' }, // Latin Capital Letter Y With Diaeresis + { L'\xde', L'\u015e' }, // Latin Capital Letter S With Cedilla + { L'\xf0', L'\u011f' }, // Latin Small Letter G With Breve + { L'\xf7', L'\u0153' }, // Latin Small Ligature Oe + { L'\xfd', L'\u00ff' }, // Latin Small Letter Y With Diaeresis + { L'\xfe', L'\u015f' }, // Latin Small Letter S With Cedilla + }; + + // https://www.vt100.net/docs/vt220-rm/table2-5.html + static constexpr auto BritishNrcs = AsciiBasedCharSet{ + { L'\x23', L'\u00a3' }, // Pound Sign + }; + + // https://www.vt100.net/docs/vt220-rm/table2-6.html + static constexpr auto DutchNrcs = AsciiBasedCharSet{ + { L'\x23', L'\u00a3' }, // Pound Sign + { L'\x40', L'\u00be' }, // Vulgar Fraction Three Quarters + { L'\x5b', L'\u0133' }, // Latin Small Ligature Ij (sometimes approximated as y with diaeresis) + { L'\x5c', L'\u00bd' }, // Vulgar Fraction One Half + { L'\x5d', L'\u007c' }, // Vertical Line + { L'\x7b', L'\u00a8' }, // Diaeresis + { L'\x7c', L'\u0192' }, // Latin Small Letter F With Hook (sometimes approximated as f) + { L'\x7d', L'\u00bc' }, // Vulgar Fraction One Quarter + { L'\x7e', L'\u00b4' }, // Acute Accent + }; + + // https://www.vt100.net/docs/vt220-rm/table2-7.html + static constexpr auto FinnishNrcs = AsciiBasedCharSet{ + { L'\x5b', L'\u00c4' }, // Latin Capital Letter A With Diaeresis + { L'\x5c', L'\u00d6' }, // Latin Capital Letter O With Diaeresis + { L'\x5d', L'\u00c5' }, // Latin Capital Letter A With Ring Above + { L'\x5e', L'\u00dc' }, // Latin Capital Letter U With Diaeresis + { L'\x60', L'\u00e9' }, // Latin Small Letter E With Acute + { L'\x7b', L'\u00e4' }, // Latin Small Letter A With Diaeresis + { L'\x7c', L'\u00f6' }, // Latin Small Letter O With Diaeresis + { L'\x7d', L'\u00e5' }, // Latin Small Letter A With Ring Above + { L'\x7e', L'\u00fc' }, // Latin Small Letter U With Diaeresis + }; + + // https://www.vt100.net/docs/vt220-rm/table2-8.html + static constexpr auto FrenchNrcs = AsciiBasedCharSet{ + { L'\x23', L'\u00a3' }, // Pound Sign + { L'\x40', L'\u00e0' }, // Latin Small Letter A With Grave + { L'\x5b', L'\u00b0' }, // Degree Sign + { L'\x5c', L'\u00e7' }, // Latin Small Letter C With Cedilla + { L'\x5d', L'\u00a7' }, // Section Sign + { L'\x7b', L'\u00e9' }, // Latin Small Letter E With Acute + { L'\x7c', L'\u00f9' }, // Latin Small Letter U With Grave + { L'\x7d', L'\u00e8' }, // Latin Small Letter E With Grave + { L'\x7e', L'\u00a8' }, // Diaeresis + }; + + // https://www.itscj.ipsj.or.jp/iso-ir/069.pdf + // Some terminal emulators consider all the French character sets as equivalent, + // but the 6/6 designator is actually an updated ISO standard, which adds the + // Micro Sign character, which is not included in the DEC version. + static constexpr auto FrenchNrcsIso = AsciiBasedCharSet{ + { L'\x23', L'\u00a3' }, // Pound Sign + { L'\x40', L'\u00e0' }, // Latin Small Letter A With Grave + { L'\x5b', L'\u00b0' }, // Degree Sign + { L'\x5c', L'\u00e7' }, // Latin Small Letter C With Cedilla + { L'\x5d', L'\u00a7' }, // Section Sign + { L'\x60', L'\u00b5' }, // Micro Sign + { L'\x7b', L'\u00e9' }, // Latin Small Letter E With Acute + { L'\x7c', L'\u00f9' }, // Latin Small Letter U With Grave + { L'\x7d', L'\u00e8' }, // Latin Small Letter E With Grave + { L'\x7e', L'\u00a8' }, // Diaeresis + }; + + // https://www.vt100.net/docs/vt220-rm/table2-9.html + static constexpr auto FrenchCanadianNrcs = AsciiBasedCharSet{ + { L'\x40', L'\u00e0' }, // Latin Small Letter A With Grave + { L'\x5b', L'\u00e2' }, // Latin Small Letter A With Circumflex + { L'\x5c', L'\u00e7' }, // Latin Small Letter C With Cedilla + { L'\x5d', L'\u00ea' }, // Latin Small Letter E With Circumflex + { L'\x5e', L'\u00ee' }, // Latin Small Letter I With Circumflex + { L'\x60', L'\u00f4' }, // Latin Small Letter O With Circumflex + { L'\x7b', L'\u00e9' }, // Latin Small Letter E With Acute + { L'\x7c', L'\u00f9' }, // Latin Small Letter U With Grave + { L'\x7d', L'\u00e8' }, // Latin Small Letter E With Grave + { L'\x7e', L'\u00fb' }, // Latin Small Letter U With Circumflex + }; + + // https://www.vt100.net/docs/vt220-rm/table2-10.html + static constexpr auto GermanNrcs = AsciiBasedCharSet{ + { L'\x40', L'\u00a7' }, // Section Sign + { L'\x5b', L'\u00c4' }, // Latin Capital Letter A With Diaeresis + { L'\x5c', L'\u00d6' }, // Latin Capital Letter O With Diaeresis + { L'\x5d', L'\u00dc' }, // Latin Capital Letter U With Diaeresis + { L'\x7b', L'\u00e4' }, // Latin Small Letter A With Diaeresis + { L'\x7c', L'\u00f6' }, // Latin Small Letter O With Diaeresis + { L'\x7d', L'\u00fc' }, // Latin Small Letter U With Diaeresis (VT320 manual incorrectly has this as U+00A8) + { L'\x7e', L'\u00df' }, // Latin Small Letter Sharp S + }; + + // See Figure 5-4 in Installing and Using The VT420 Video Terminal + // With PC Terminal Mode Update (EK-VT42A-UP.A01) + static constexpr auto GreekNrcs = AsciiBasedCharSet{ + { L'\x40', L'\u03ca' }, // Greek Small Letter Iota With Dialytika + { L'\x41', L'\u0391' }, // Greek Capital Letter Alpha + { L'\x42', L'\u0392' }, // Greek Capital Letter Beta + { L'\x43', L'\u0393' }, // Greek Capital Letter Gamma + { L'\x44', L'\u0394' }, // Greek Capital Letter Delta + { L'\x45', L'\u0395' }, // Greek Capital Letter Epsilon + { L'\x46', L'\u0396' }, // Greek Capital Letter Zeta + { L'\x47', L'\u0397' }, // Greek Capital Letter Eta + { L'\x48', L'\u0398' }, // Greek Capital Letter Theta + { L'\x49', L'\u0399' }, // Greek Capital Letter Iota + { L'\x4a', L'\u039a' }, // Greek Capital Letter Kappa + { L'\x4b', L'\u039b' }, // Greek Capital Letter Lamda + { L'\x4c', L'\u039c' }, // Greek Capital Letter Mu + { L'\x4d', L'\u039d' }, // Greek Capital Letter Nu + { L'\x4e', L'\u039e' }, // Greek Capital Letter Xi + { L'\x4f', L'\u039f' }, // Greek Capital Letter Omicron + { L'\x50', L'\u2426' }, // Undefined + { L'\x51', L'\u03a0' }, // Greek Capital Letter Pi + { L'\x52', L'\u03a1' }, // Greek Capital Letter Rho + { L'\x53', L'\u03a3' }, // Greek Capital Letter Sigma + { L'\x54', L'\u03a4' }, // Greek Capital Letter Tau + { L'\x55', L'\u03a5' }, // Greek Capital Letter Upsilon + { L'\x56', L'\u03a6' }, // Greek Capital Letter Phi + { L'\x57', L'\u03a7' }, // Greek Capital Letter Chi + { L'\x58', L'\u03a8' }, // Greek Capital Letter Psi + { L'\x59', L'\u03a9' }, // Greek Capital Letter Omega + { L'\x5a', L'\u03ac' }, // Greek Small Letter Alpha With Tonos + { L'\x5b', L'\u03ad' }, // Greek Small Letter Epsilon With Tonos + { L'\x5c', L'\u03ae' }, // Greek Small Letter Eta With Tonos + { L'\x5d', L'\u03af' }, // Greek Small Letter Iota With Tonos + { L'\x5e', L'\u2426' }, // Undefined + { L'\x5f', L'\u03cc' }, // Greek Small Letter Omicron With Tonos + { L'\x60', L'\u03cb' }, // Greek Small Letter Upsilon With Dialytika + { L'\x61', L'\u03b1' }, // Greek Small Letter Alpha + { L'\x62', L'\u03b2' }, // Greek Small Letter Beta + { L'\x63', L'\u03b3' }, // Greek Small Letter Gamma + { L'\x64', L'\u03b4' }, // Greek Small Letter Delta + { L'\x65', L'\u03b5' }, // Greek Small Letter Epsilon + { L'\x66', L'\u03b6' }, // Greek Small Letter Zeta + { L'\x67', L'\u03b7' }, // Greek Small Letter Eta + { L'\x68', L'\u03b8' }, // Greek Small Letter Theta + { L'\x69', L'\u03b9' }, // Greek Small Letter Iota + { L'\x6a', L'\u03ba' }, // Greek Small Letter Kappa + { L'\x6b', L'\u03bb' }, // Greek Small Letter Lamda + { L'\x6c', L'\u03bc' }, // Greek Small Letter Mu + { L'\x6d', L'\u03bd' }, // Greek Small Letter Nu + { L'\x6e', L'\u03be' }, // Greek Small Letter Xi + { L'\x6f', L'\u03bf' }, // Greek Small Letter Omicron + { L'\x70', L'\u2426' }, // Undefined + { L'\x71', L'\u03c0' }, // Greek Small Letter Pi + { L'\x72', L'\u03c1' }, // Greek Small Letter Rho + { L'\x73', L'\u03c3' }, // Greek Small Letter Sigma + { L'\x74', L'\u03c4' }, // Greek Small Letter Tau + { L'\x75', L'\u03c5' }, // Greek Small Letter Upsilon + { L'\x76', L'\u03c6' }, // Greek Small Letter Phi + { L'\x77', L'\u03c7' }, // Greek Small Letter Chi + { L'\x78', L'\u03c8' }, // Greek Small Letter Psi + { L'\x79', L'\u03c9' }, // Greek Small Letter Omega + { L'\x7a', L'\u03c2' }, // Greek Small Letter Final Sigma + { L'\x7b', L'\u03cd' }, // Greek Small Letter Upsilon With Tonos + { L'\x7c', L'\u03ce' }, // Greek Small Letter Omega With Tonos + { L'\x7d', L'\u0384' }, // Greek Tonos + { L'\x7e', L'\u2426' }, // Undefined + }; + + // See Figure 5-9 in Installing and Using The VT420 Video Terminal + // With PC Terminal Mode Update (EK-VT42A-UP.A01) + static constexpr auto HebrewNrcs = AsciiBasedCharSet{ + { L'\x60', L'\u05d0' }, // Hebrew Letter Alef + { L'\x61', L'\u05d1' }, // Hebrew Letter Bet + { L'\x62', L'\u05d2' }, // Hebrew Letter Gimel + { L'\x63', L'\u05d3' }, // Hebrew Letter Dalet + { L'\x64', L'\u05d4' }, // Hebrew Letter He + { L'\x65', L'\u05d5' }, // Hebrew Letter Vav + { L'\x66', L'\u05d6' }, // Hebrew Letter Zayin + { L'\x67', L'\u05d7' }, // Hebrew Letter Het + { L'\x68', L'\u05d8' }, // Hebrew Letter Tet + { L'\x69', L'\u05d9' }, // Hebrew Letter Yod + { L'\x6a', L'\u05da' }, // Hebrew Letter Final Kaf + { L'\x6b', L'\u05db' }, // Hebrew Letter Kaf + { L'\x6c', L'\u05dc' }, // Hebrew Letter Lamed + { L'\x6d', L'\u05dd' }, // Hebrew Letter Final Mem + { L'\x6e', L'\u05de' }, // Hebrew Letter Mem + { L'\x6f', L'\u05df' }, // Hebrew Letter Final Nun + { L'\x70', L'\u05e0' }, // Hebrew Letter Nun + { L'\x71', L'\u05e1' }, // Hebrew Letter Samekh + { L'\x72', L'\u05e2' }, // Hebrew Letter Ayin + { L'\x73', L'\u05e3' }, // Hebrew Letter Final Pe + { L'\x74', L'\u05e4' }, // Hebrew Letter Pe + { L'\x75', L'\u05e5' }, // Hebrew Letter Final Tsadi + { L'\x76', L'\u05e6' }, // Hebrew Letter Tsadi + { L'\x77', L'\u05e7' }, // Hebrew Letter Qof + { L'\x78', L'\u05e8' }, // Hebrew Letter Resh + { L'\x79', L'\u05e9' }, // Hebrew Letter Shin + { L'\x7a', L'\u05ea' }, // Hebrew Letter Tav + }; + + // https://www.vt100.net/docs/vt220-rm/table2-11.html + static constexpr auto ItalianNrcs = AsciiBasedCharSet{ + { L'\x23', L'\u00a3' }, // Pound Sign + { L'\x40', L'\u00a7' }, // Section Sign + { L'\x5b', L'\u00b0' }, // Degree Sign + { L'\x5c', L'\u00e7' }, // Latin Small Letter C With Cedilla + { L'\x5d', L'\u00e9' }, // Latin Small Letter E With Acute + { L'\x60', L'\u00f9' }, // Latin Small Letter U With Grave + { L'\x7b', L'\u00e0' }, // Latin Small Letter A With Grave + { L'\x7c', L'\u00f2' }, // Latin Small Letter O With Grave + { L'\x7d', L'\u00e8' }, // Latin Small Letter E With Grave + { L'\x7e', L'\u00ec' }, // Latin Small Letter I With Grave + }; + + // https://www.vt100.net/docs/vt220-rm/table2-12.html + static constexpr auto NorwegianDanishNrcs = AsciiBasedCharSet{ + { L'\x40', L'\u00c4' }, // Latin Capital Letter A With Diaeresis + { L'\x5b', L'\u00c6' }, // Latin Capital Letter Ae + { L'\x5c', L'\u00d8' }, // Latin Capital Letter O With Stroke + { L'\x5d', L'\u00c5' }, // Latin Capital Letter A With Ring Above + { L'\x5e', L'\u00dc' }, // Latin Capital Letter U With Diaeresis + { L'\x60', L'\u00e4' }, // Latin Small Letter A With Diaeresis + { L'\x7b', L'\u00e6' }, // Latin Small Letter Ae + { L'\x7c', L'\u00f8' }, // Latin Small Letter O With Stroke + { L'\x7d', L'\u00e5' }, // Latin Small Letter A With Ring Above + { L'\x7e', L'\u00fc' }, // Latin Small Letter U With Diaeresis + }; + + // https://www.itscj.ipsj.or.jp/iso-ir/060.pdf + // Some terminal emulators consider all the Nordic character sets as equivalent, + // but the 6/0 designator is a separate ISO-registered standard, which only maps + // a subset of the characters included in the DEC version. + static constexpr auto NorwegianDanishNrcsIso = AsciiBasedCharSet{ + { L'\x5b', L'\u00c6' }, // Latin Capital Letter Ae + { L'\x5c', L'\u00d8' }, // Latin Capital Letter O With Stroke + { L'\x5d', L'\u00c5' }, // Latin Capital Letter A With Ring Above + { L'\x7b', L'\u00e6' }, // Latin Small Letter Ae + { L'\x7c', L'\u00f8' }, // Latin Small Letter O With Stroke + { L'\x7d', L'\u00e5' }, // Latin Small Letter A With Ring Above + }; + + // https://www.vt100.net/docs/vt320-uu/appendixe.html#SE.2.3 + static constexpr auto PortugueseNrcs = AsciiBasedCharSet{ + { L'\x5b', L'\u00c3' }, // Latin Capital Letter A With Tilde + { L'\x5c', L'\u00c7' }, // Latin Capital Letter C With Cedilla + { L'\x5d', L'\u00d5' }, // Latin Capital Letter O With Tilde + { L'\x7b', L'\u00e3' }, // Latin Small Letter A With Tilde + { L'\x7c', L'\u00e7' }, // Latin Small Letter C With Cedilla + { L'\x7d', L'\u00f5' }, // Latin Small Letter O With Tilde + }; + + // https://en.wikipedia.org/wiki/KOI-7#KOI-7_N2 + // This is referred to as Russian 7-bit (KOI-7) in the VT520/VT525 Video + // Terminal Programmer Information manual (EK-VT520-RM.A01) + static constexpr auto RussianNrcs = AsciiBasedCharSet{ + { L'\x60', L'\u042e' }, // Cyrillic Capital Letter Yu + { L'\x61', L'\u0410' }, // Cyrillic Capital Letter A + { L'\x62', L'\u0411' }, // Cyrillic Capital Letter Be + { L'\x63', L'\u0426' }, // Cyrillic Capital Letter Tse + { L'\x64', L'\u0414' }, // Cyrillic Capital Letter De + { L'\x65', L'\u0415' }, // Cyrillic Capital Letter Ie + { L'\x66', L'\u0424' }, // Cyrillic Capital Letter Ef + { L'\x67', L'\u0413' }, // Cyrillic Capital Letter Ghe + { L'\x68', L'\u0425' }, // Cyrillic Capital Letter Ha + { L'\x69', L'\u0418' }, // Cyrillic Capital Letter I + { L'\x6a', L'\u0419' }, // Cyrillic Capital Letter Short I + { L'\x6b', L'\u041a' }, // Cyrillic Capital Letter Ka + { L'\x6c', L'\u041b' }, // Cyrillic Capital Letter El + { L'\x6d', L'\u041c' }, // Cyrillic Capital Letter Em + { L'\x6e', L'\u041d' }, // Cyrillic Capital Letter En + { L'\x6f', L'\u041e' }, // Cyrillic Capital Letter O + { L'\x70', L'\u041f' }, // Cyrillic Capital Letter Pe + { L'\x71', L'\u042f' }, // Cyrillic Capital Letter Ya + { L'\x72', L'\u0420' }, // Cyrillic Capital Letter Er + { L'\x73', L'\u0421' }, // Cyrillic Capital Letter Es + { L'\x74', L'\u0422' }, // Cyrillic Capital Letter Te + { L'\x75', L'\u0423' }, // Cyrillic Capital Letter U + { L'\x76', L'\u0416' }, // Cyrillic Capital Letter Zhe + { L'\x77', L'\u0412' }, // Cyrillic Capital Letter Ve + { L'\x78', L'\u042c' }, // Cyrillic Capital Letter Soft Sign + { L'\x79', L'\u042b' }, // Cyrillic Capital Letter Yeru + { L'\x7a', L'\u0417' }, // Cyrillic Capital Letter Ze + { L'\x7b', L'\u0428' }, // Cyrillic Capital Letter Sha + { L'\x7c', L'\u042d' }, // Cyrillic Capital Letter E + { L'\x7d', L'\u0429' }, // Cyrillic Capital Letter Shcha + { L'\x7e', L'\u0427' }, // Cyrillic Capital Letter Che + }; + + // https://www.vt100.net/docs/vt220-rm/table2-13.html + static constexpr auto SpanishNrcs = AsciiBasedCharSet{ + { L'\x23', L'\u00a3' }, // Pound Sign + { L'\x40', L'\u00a7' }, // Section Sign + { L'\x5b', L'\u00a1' }, // Inverted Exclamation Mark + { L'\x5c', L'\u00d1' }, // Latin Capital Letter N With Tilde + { L'\x5d', L'\u00bf' }, // Inverted Question Mark + { L'\x7b', L'\u00b0' }, // Degree Sign (VT320 manual has these last 3 off by 1) + { L'\x7c', L'\u00f1' }, // Latin Small Letter N With Tilde + { L'\x7d', L'\u00e7' }, // Latin Small Letter C With Cedilla + }; + + // https://www.vt100.net/docs/vt220-rm/table2-14.html + static constexpr auto SwedishNrcs = AsciiBasedCharSet{ + { L'\x40', L'\u00c9' }, // Latin Capital Letter E With Acute + { L'\x5b', L'\u00c4' }, // Latin Capital Letter A With Diaeresis + { L'\x5c', L'\u00d6' }, // Latin Capital Letter O With Diaeresis + { L'\x5d', L'\u00c5' }, // Latin Capital Letter A With Ring Above + { L'\x5e', L'\u00dc' }, // Latin Capital Letter U With Diaeresis + { L'\x60', L'\u00e9' }, // Latin Small Letter E With Acute + { L'\x7b', L'\u00e4' }, // Latin Small Letter A With Diaeresis + { L'\x7c', L'\u00f6' }, // Latin Small Letter O With Diaeresis + { L'\x7d', L'\u00e5' }, // Latin Small Letter A With Ring Above + { L'\x7e', L'\u00fc' }, // Latin Small Letter U With Diaeresis + }; + + // https://www.vt100.net/docs/vt220-rm/table2-15.html + static constexpr auto SwissNrcs = AsciiBasedCharSet{ + { L'\x23', L'\u00f9' }, // Latin Small Letter U With Grave + { L'\x40', L'\u00e0' }, // Latin Small Letter A With Grave + { L'\x5b', L'\u00e9' }, // Latin Small Letter E With Acute + { L'\x5c', L'\u00e7' }, // Latin Small Letter C With Cedilla + { L'\x5d', L'\u00ea' }, // Latin Small Letter E With Circumflex + { L'\x5e', L'\u00ee' }, // Latin Small Letter I With Circumflex + { L'\x5f', L'\u00e8' }, // Latin Small Letter E With Grave + { L'\x60', L'\u00f4' }, // Latin Small Letter O With Circumflex + { L'\x7b', L'\u00e4' }, // Latin Small Letter A With Diaeresis + { L'\x7c', L'\u00f6' }, // Latin Small Letter O With Diaeresis + { L'\x7d', L'\u00fc' }, // Latin Small Letter U With Diaeresis + { L'\x7e', L'\u00fb' }, // Latin Small Letter U With Circumflex + }; + + // See Figure 5-14 in Installing and Using The VT420 Video Terminal + // With PC Terminal Mode Update (EK-VT42A-UP.A01) + static constexpr auto TurkishNrcs = AsciiBasedCharSet{ + { L'\x21', L'\u0131' }, // Latin Small Letter Dotless I + { L'\x26', L'\u011f' }, // Latin Small Letter G With Breve + { L'\x40', L'\u0130' }, // Latin Capital Letter I With Dot Above + { L'\x5b', L'\u015e' }, // Latin Capital Letter S With Cedilla + { L'\x5c', L'\u00d6' }, // Latin Capital Letter O With Diaeresis + { L'\x5d', L'\u00c7' }, // Latin Capital Letter C With Cedilla + { L'\x5e', L'\u00dc' }, // Latin Capital Letter U With Diaeresis + { L'\x60', L'\u011e' }, // Latin Capital Letter G With Breve + { L'\x7b', L'\u015f' }, // Latin Small Letter S With Cedilla + { L'\x7c', L'\u00f6' }, // Latin Small Letter O With Diaeresis + { L'\x7d', L'\u00e7' }, // Latin Small Letter C With Cedilla + { L'\x7e', L'\u00fc' }, // Latin Small Letter U With Diaeresis + }; + +#pragma warning(pop) +} diff --git a/src/terminal/adapter/conGetSet.hpp b/src/terminal/adapter/conGetSet.hpp index 79c97b2ebf6..2a752ecad15 100644 --- a/src/terminal/adapter/conGetSet.hpp +++ b/src/terminal/adapter/conGetSet.hpp @@ -76,6 +76,7 @@ namespace Microsoft::Console::VirtualTerminal virtual bool PrivateWriteConsoleControlInput(const KeyEvent key) = 0; virtual bool PrivateRefreshWindow() = 0; + virtual bool SetConsoleOutputCP(const unsigned int codepage) = 0; virtual bool GetConsoleOutputCP(unsigned int& codepage) = 0; virtual bool PrivateSuppressResizeRepaint() = 0; diff --git a/src/terminal/adapter/lib/adapter.vcxproj b/src/terminal/adapter/lib/adapter.vcxproj index 0c8a0e52c10..eeb6f4574bc 100644 --- a/src/terminal/adapter/lib/adapter.vcxproj +++ b/src/terminal/adapter/lib/adapter.vcxproj @@ -24,6 +24,7 @@ + diff --git a/src/terminal/adapter/lib/adapter.vcxproj.filters b/src/terminal/adapter/lib/adapter.vcxproj.filters index 4e3979bed04..4675a7e3a20 100644 --- a/src/terminal/adapter/lib/adapter.vcxproj.filters +++ b/src/terminal/adapter/lib/adapter.vcxproj.filters @@ -80,6 +80,9 @@ Header Files + + Header Files + diff --git a/src/terminal/adapter/termDispatch.hpp b/src/terminal/adapter/termDispatch.hpp index b111210776e..f4e5b080538 100644 --- a/src/terminal/adapter/termDispatch.hpp +++ b/src/terminal/adapter/termDispatch.hpp @@ -89,7 +89,12 @@ class Microsoft::Console::VirtualTerminal::TermDispatch : public Microsoft::Cons bool DeviceAttributes() noexcept override { return false; } // DA1 bool Vt52DeviceAttributes() noexcept override { return false; } // VT52 Identify - bool DesignateCharset(const wchar_t /*wchCharset*/) noexcept override { return false; } // SCS + bool DesignateCodingSystem(const wchar_t /*codingSystem*/) noexcept override { return false; } // DOCS + bool Designate94Charset(const size_t /*gsetNumber*/, const std::pair /*charset*/) noexcept override { return false; } // SCS + bool Designate96Charset(const size_t /*gsetNumber*/, const std::pair /*charset*/) noexcept override { return false; } // SCS + bool LockingShift(const size_t /*gsetNumber*/) noexcept override { return false; } // LS0, LS1, LS2, LS3 + bool LockingShiftRight(const size_t /*gsetNumber*/) noexcept override { return false; } // LS1R, LS2R, LS3R + bool SingleShift(const size_t /*gsetNumber*/) noexcept override { return false; } // SS2, SS3 bool SoftReset() noexcept override { return false; } // DECSTR bool HardReset() noexcept override { return false; } // RIS diff --git a/src/terminal/adapter/terminalOutput.cpp b/src/terminal/adapter/terminalOutput.cpp index 8af9a3bbc2f..a7b7c58aee7 100644 --- a/src/terminal/adapter/terminalOutput.cpp +++ b/src/terminal/adapter/terminalOutput.cpp @@ -3,161 +3,207 @@ #include #include +#include "charsets.hpp" #include "terminalOutput.hpp" #include "strsafe.h" using namespace Microsoft::Console::VirtualTerminal; -// We include a full table so all we have to do is the lookup. -// The tables only ever change the values x20 - x7f, hence why the table starts at \x20 -// From http://vt100.net/docs/vt220-rm/table2-4.html -static constexpr std::array s_decSpecialGraphicsTranslations{ - L'\x20', - L'\x21', - L'\x22', - L'\x23', - L'\x24', - L'\x25', - L'\x26', - L'\x27', - L'\x28', - L'\x29', - L'\x2a', - L'\x2b', - L'\x2c', - L'\x2d', - L'\x2e', - L'\x2f', - L'\x30', - L'\x31', - L'\x32', - L'\x33', - L'\x34', - L'\x35', - L'\x36', - L'\x37', - L'\x38', - L'\x39', - L'\x3a', - L'\x3b', - L'\x3c', - L'\x3d', - L'\x3e', - L'\x3f', - L'\x40', - L'\x41', - L'\x42', - L'\x43', - L'\x44', - L'\x45', - L'\x46', - L'\x47', - L'\x48', - L'\x49', - L'\x4a', - L'\x4b', - L'\x4c', - L'\x4d', - L'\x4e', - L'\x4f', - L'\x50', - L'\x51', - L'\x52', - L'\x53', - L'\x54', - L'\x55', - L'\x56', - L'\x57', - L'\x58', - L'\x59', - L'\x5a', - L'\x5b', - L'\x5c', - L'\x5d', - L'\x5e', - L'\u0020', // L'\x5f', -> Blank - L'\u2666', // L'\x60', -> Diamond (more commonly U+25C6, but U+2666 renders better for us) - L'\u2592', // L'\x61', -> Checkerboard - L'\u2409', // L'\x62', -> HT, SYMBOL FOR HORIZONTAL TABULATION - L'\u240c', // L'\x63', -> FF, SYMBOL FOR FORM FEED - L'\u240d', // L'\x64', -> CR, SYMBOL FOR CARRIAGE RETURN - L'\u240a', // L'\x65', -> LF, SYMBOL FOR LINE FEED - L'\u00b0', // L'\x66', -> Degree symbol - L'\u00b1', // L'\x67', -> Plus/minus - L'\u2424', // L'\x68', -> NL, SYMBOL FOR NEWLINE - L'\u240b', // L'\x69', -> VT, SYMBOL FOR VERTICAL TABULATION - L'\u2518', // L'\x6a', -> Lower-right corner - L'\u2510', // L'\x6b', -> Upper-right corner - L'\u250c', // L'\x6c', -> Upper-left corner - L'\u2514', // L'\x6d', -> Lower-left corner - L'\u253c', // L'\x6e', -> Crossing lines - L'\u23ba', // L'\x6f', -> Horizontal line - Scan 1 - L'\u23bb', // L'\x70', -> Horizontal line - Scan 3 - L'\u2500', // L'\x71', -> Horizontal line - Scan 5 - L'\u23bc', // L'\x72', -> Horizontal line - Scan 7 - L'\u23bd', // L'\x73', -> Horizontal line - Scan 9 - L'\u251c', // L'\x74', -> Left "T" - L'\u2524', // L'\x75', -> Right "T" - L'\u2534', // L'\x76', -> Bottom "T" - L'\u252c', // L'\x77', -> Top "T" - L'\u2502', // L'\x78', -> | Vertical bar - L'\u2264', // L'\x79', -> Less than or equal to - L'\u2265', // L'\x7a', -> Greater than or equal to - L'\u03c0', // L'\x7b', -> Pi - L'\u2260', // L'\x7c', -> Not equal to - L'\u00a3', // L'\x7d', -> UK pound sign - L'\u00b7', // L'\x7e', -> Centered dot - L'\x7f' // L'\x7f', -> DEL -}; +TerminalOutput::TerminalOutput() noexcept +{ + _gsetTranslationTables.at(0) = Ascii; + _gsetTranslationTables.at(1) = Ascii; + _gsetTranslationTables.at(2) = Latin1; + _gsetTranslationTables.at(3) = Latin1; +} -bool TerminalOutput::DesignateCharset(const wchar_t newCharset) noexcept +bool TerminalOutput::Designate94Charset(size_t gsetNumber, const std::pair charset) { - bool result = false; - if (newCharset == DispatchTypes::VTCharacterSets::DEC_LineDrawing || - newCharset == DispatchTypes::VTCharacterSets::USASCII) + switch (charset.first) { - _currentCharset = newCharset; - result = true; + case L'B': // US ASCII + case L'1': // Alternate Character ROM + return _SetTranslationTable(gsetNumber, Ascii); + case L'0': // DEC Special Graphics + case L'2': // Alternate Character ROM Special Graphics + return _SetTranslationTable(gsetNumber, DecSpecialGraphics); + case L'<': // DEC Supplemental + return _SetTranslationTable(gsetNumber, DecSupplemental); + case L'A': // British NRCS + return _SetTranslationTable(gsetNumber, BritishNrcs); + case L'4': // Dutch NRCS + return _SetTranslationTable(gsetNumber, DutchNrcs); + case L'5': // Finnish NRCS + case L'C': // (fallback) + return _SetTranslationTable(gsetNumber, FinnishNrcs); + case L'R': // French NRCS + return _SetTranslationTable(gsetNumber, FrenchNrcs); + case L'f': // French NRCS (ISO update) + return _SetTranslationTable(gsetNumber, FrenchNrcsIso); + case L'9': // French Canadian NRCS + case L'Q': // (fallback) + return _SetTranslationTable(gsetNumber, FrenchCanadianNrcs); + case L'K': // German NRCS + return _SetTranslationTable(gsetNumber, GermanNrcs); + case L'Y': // Italian NRCS + return _SetTranslationTable(gsetNumber, ItalianNrcs); + case L'6': // Norwegian/Danish NRCS + case L'E': // (fallback) + return _SetTranslationTable(gsetNumber, NorwegianDanishNrcs); + case L'`': // Norwegian/Danish NRCS (ISO standard) + return _SetTranslationTable(gsetNumber, NorwegianDanishNrcsIso); + case L'Z': // Spanish NRCS + return _SetTranslationTable(gsetNumber, SpanishNrcs); + case L'7': // Swedish NRCS + case L'H': // (fallback) + return _SetTranslationTable(gsetNumber, SwedishNrcs); + case L'=': // Swiss NRCS + return _SetTranslationTable(gsetNumber, SwissNrcs); + case L'&': + switch (charset.second) + { + case L'4': // DEC Cyrillic + return _SetTranslationTable(gsetNumber, DecCyrillic); + case L'5': // Russian NRCS + return _SetTranslationTable(gsetNumber, RussianNrcs); + } + return false; + case L'"': + switch (charset.second) + { + case L'?': // DEC Greek + return _SetTranslationTable(gsetNumber, DecGreek); + case L'>': // Greek NRCS + return _SetTranslationTable(gsetNumber, GreekNrcs); + case L'4': // DEC Hebrew + return _SetTranslationTable(gsetNumber, DecHebrew); + } + return false; + case L'%': + switch (charset.second) + { + case L'=': // Hebrew NRCS + return _SetTranslationTable(gsetNumber, HebrewNrcs); + case L'0': // DEC Turkish + return _SetTranslationTable(gsetNumber, DecTurkish); + case L'2': // Turkish NRCS + return _SetTranslationTable(gsetNumber, TurkishNrcs); + case L'5': // DEC Supplemental + return _SetTranslationTable(gsetNumber, DecSupplemental); + case L'6': // Portuguese NRCS + return _SetTranslationTable(gsetNumber, PortugueseNrcs); + } + return false; + default: + return false; } - return result; +} + +bool TerminalOutput::Designate96Charset(size_t gsetNumber, const std::pair charset) +{ + switch (charset.first) + { + case L'A': // ISO Latin-1 Supplemental + case L'<': // (UPSS when assigned to Latin-1) + return _SetTranslationTable(gsetNumber, Latin1); + case L'B': // ISO Latin-2 Supplemental + return _SetTranslationTable(gsetNumber, Latin2); + case L'L': // ISO Latin-Cyrillic Supplemental + return _SetTranslationTable(gsetNumber, LatinCyrillic); + case L'F': // ISO Latin-Greek Supplemental + return _SetTranslationTable(gsetNumber, LatinGreek); + case L'H': // ISO Latin-Hebrew Supplemental + return _SetTranslationTable(gsetNumber, LatinHebrew); + case L'M': // ISO Latin-5 Supplemental + return _SetTranslationTable(gsetNumber, Latin5); + default: + return false; + } +} + +#pragma warning(suppress : 26440) // Suppress spurious "function can be declared noexcept" warning +bool TerminalOutput::LockingShift(const size_t gsetNumber) +{ + _glSetNumber = gsetNumber; + _glTranslationTable = _gsetTranslationTables.at(_glSetNumber); + // If GL is mapped to ASCII then we don't need to translate anything. + if (_glTranslationTable == Ascii) + { + _glTranslationTable = {}; + } + return true; +} + +#pragma warning(suppress : 26440) // Suppress spurious "function can be declared noexcept" warning +bool TerminalOutput::LockingShiftRight(const size_t gsetNumber) +{ + _grSetNumber = gsetNumber; + _grTranslationTable = _gsetTranslationTables.at(_grSetNumber); + // If GR is mapped to Latin1, or GR translation is not allowed, we don't need to translate anything. + if (_grTranslationTable == Latin1 || !_grTranslationEnabled) + { + _grTranslationTable = {}; + } + return true; +} + +#pragma warning(suppress : 26440) // Suppress spurious "function can be declared noexcept" warning +bool TerminalOutput::SingleShift(const size_t gsetNumber) +{ + _ssTranslationTable = _gsetTranslationTables.at(gsetNumber); + return true; } // Routine Description: -// - Returns true if the current charset isn't USASCII, indicating that text has to come through here +// - Returns true if there is an active translation table, indicating that text has to come through here // Arguments: // - // Return Value: -// - True if the current charset is not USASCII +// - True if translation is required. bool TerminalOutput::NeedToTranslate() const noexcept { - return _currentCharset != DispatchTypes::VTCharacterSets::USASCII; + return !_glTranslationTable.empty() || !_grTranslationTable.empty() || !_ssTranslationTable.empty(); } -const std::wstring_view TerminalOutput::_GetTranslationTable() const noexcept +void TerminalOutput::EnableGrTranslation(boolean enabled) { - switch (_currentCharset) - { - case DispatchTypes::VTCharacterSets::DEC_LineDrawing: - return { s_decSpecialGraphicsTranslations.data(), s_decSpecialGraphicsTranslations.size() }; - } - return {}; + _grTranslationEnabled = enabled; + // We need to reapply the right locking shift to (de)activate the translation table. + LockingShiftRight(_grSetNumber); } wchar_t TerminalOutput::TranslateKey(const wchar_t wch) const noexcept { wchar_t wchFound = wch; - if (_currentCharset == DispatchTypes::VTCharacterSets::USASCII || - wch < '\x5f' || wch > '\x7f') // filter out the region we know is unchanged + if (!_ssTranslationTable.empty()) { - ; // do nothing, these are the same as default. + if (wch - 0x20u < _ssTranslationTable.size()) + { + wchFound = _ssTranslationTable.at(wch - 0x20u); + } + else if (wch - 0xA0u < _ssTranslationTable.size()) + { + wchFound = _ssTranslationTable.at(wch - 0xA0u); + } + _ssTranslationTable = {}; } else { - const auto translationTable = _GetTranslationTable(); - if (!translationTable.empty()) + if (wch - 0x20u < _glTranslationTable.size()) { - wchFound = translationTable.at(wch - '\x20'); + wchFound = _glTranslationTable.at(wch - 0x20u); + } + else if (wch - 0xA0u < _grTranslationTable.size()) + { + wchFound = _grTranslationTable.at(wch - 0xA0u); } } return wchFound; } + +bool TerminalOutput::_SetTranslationTable(const size_t gsetNumber, const std::wstring_view translationTable) +{ + _gsetTranslationTables.at(gsetNumber) = translationTable; + // We need to reapply the locking shifts in case the underlying G-sets have changed. + return LockingShift(_glSetNumber) && LockingShiftRight(_grSetNumber); +} diff --git a/src/terminal/adapter/terminalOutput.hpp b/src/terminal/adapter/terminalOutput.hpp index c0dbb89367d..a26f6d85a61 100644 --- a/src/terminal/adapter/terminalOutput.hpp +++ b/src/terminal/adapter/terminalOutput.hpp @@ -23,15 +23,26 @@ namespace Microsoft::Console::VirtualTerminal class TerminalOutput sealed { public: - TerminalOutput() = default; + TerminalOutput() noexcept; wchar_t TranslateKey(const wchar_t wch) const noexcept; - bool DesignateCharset(const wchar_t wchNewCharset) noexcept; + bool Designate94Charset(const size_t gsetNumber, const std::pair charset); + bool Designate96Charset(const size_t gsetNumber, const std::pair charset); + bool LockingShift(const size_t gsetNumber); + bool LockingShiftRight(const size_t gsetNumber); + bool SingleShift(const size_t gsetNumber); bool NeedToTranslate() const noexcept; + void EnableGrTranslation(boolean enabled); private: - wchar_t _currentCharset = DispatchTypes::VTCharacterSets::USASCII; - - const std::wstring_view _GetTranslationTable() const noexcept; + bool _SetTranslationTable(const size_t gsetNumber, const std::wstring_view translationTable); + + std::array _gsetTranslationTables; + size_t _glSetNumber = 0; + size_t _grSetNumber = 2; + std::wstring_view _glTranslationTable; + std::wstring_view _grTranslationTable; + mutable std::wstring_view _ssTranslationTable; + boolean _grTranslationEnabled = false; }; } diff --git a/src/terminal/adapter/ut_adapter/adapterTest.cpp b/src/terminal/adapter/ut_adapter/adapterTest.cpp index 49eaa3294f0..65e54786356 100644 --- a/src/terminal/adapter/ut_adapter/adapterTest.cpp +++ b/src/terminal/adapter/ut_adapter/adapterTest.cpp @@ -461,6 +461,12 @@ class TestGetSet final : public ConGetSet return FALSE; } + bool SetConsoleOutputCP(const unsigned int /*codepage*/) override + { + Log::Comment(L"SetConsoleOutputCP MOCK called..."); + return TRUE; + } + bool GetConsoleOutputCP(unsigned int& codepage) override { Log::Comment(L"GetConsoleOutputCP MOCK called..."); diff --git a/src/terminal/parser/IStateMachineEngine.hpp b/src/terminal/parser/IStateMachineEngine.hpp index 12904256428..9b024438d63 100644 --- a/src/terminal/parser/IStateMachineEngine.hpp +++ b/src/terminal/parser/IStateMachineEngine.hpp @@ -50,6 +50,7 @@ namespace Microsoft::Console::VirtualTerminal virtual bool ActionSs3Dispatch(const wchar_t wch, const std::basic_string_view parameters) = 0; + virtual bool ParseControlSequenceAfterSs3() const = 0; virtual bool FlushAtEndOfString() const = 0; virtual bool DispatchControlCharsFromEscape() const = 0; virtual bool DispatchIntermediatesFromEscape() const = 0; diff --git a/src/terminal/parser/InputStateMachineEngine.cpp b/src/terminal/parser/InputStateMachineEngine.cpp index 882495108ae..5013820de9d 100644 --- a/src/terminal/parser/InputStateMachineEngine.cpp +++ b/src/terminal/parser/InputStateMachineEngine.cpp @@ -1099,6 +1099,18 @@ bool InputStateMachineEngine::_GenerateKeyFromChar(const wchar_t wch, return true; } +// Method Description: +// - Returns true if the engine should attempt to parse a control sequence +// following an SS3 escape prefix. +// If this is false, an SS3 escape sequence should be dispatched as soon +// as it is encountered. +// Return Value: +// - True iff we should parse a control sequence following an SS3. +bool InputStateMachineEngine::ParseControlSequenceAfterSs3() const noexcept +{ + return true; +} + // Method Description: // - Returns true if the engine should dispatch on the last character of a string // always, even if the sequence hasn't normally dispatched. diff --git a/src/terminal/parser/InputStateMachineEngine.hpp b/src/terminal/parser/InputStateMachineEngine.hpp index 4d971b02eda..64d5de3a9b5 100644 --- a/src/terminal/parser/InputStateMachineEngine.hpp +++ b/src/terminal/parser/InputStateMachineEngine.hpp @@ -166,6 +166,7 @@ namespace Microsoft::Console::VirtualTerminal bool ActionSs3Dispatch(const wchar_t wch, const std::basic_string_view parameters) override; + bool ParseControlSequenceAfterSs3() const noexcept override; bool FlushAtEndOfString() const noexcept override; bool DispatchControlCharsFromEscape() const noexcept override; bool DispatchIntermediatesFromEscape() const noexcept override; diff --git a/src/terminal/parser/OutputStateMachineEngine.cpp b/src/terminal/parser/OutputStateMachineEngine.cpp index c2b13d27d04..a3c7a0f465a 100644 --- a/src/terminal/parser/OutputStateMachineEngine.cpp +++ b/src/terminal/parser/OutputStateMachineEngine.cpp @@ -70,8 +70,14 @@ bool OutputStateMachineEngine::ActionExecute(const wchar_t wch) // LF, FF, and VT are identical in function. _dispatch->LineFeed(DispatchTypes::LineFeedType::DependsOnMode); break; + case AsciiChars::SI: + _dispatch->LockingShift(0); + break; + case AsciiChars::SO: + _dispatch->LockingShift(1); + break; default: - _dispatch->Execute(wch); + _dispatch->Print(wch); break; } @@ -221,6 +227,34 @@ bool OutputStateMachineEngine::ActionEscDispatch(const wchar_t wch, success = _dispatch->HardReset(); TermTelemetry::Instance().Log(TermTelemetry::Codes::RIS); break; + case VTActionCodes::SS2_SingleShift: + success = _dispatch->SingleShift(2); + TermTelemetry::Instance().Log(TermTelemetry::Codes::SS2); + break; + case VTActionCodes::SS3_SingleShift: + success = _dispatch->SingleShift(3); + TermTelemetry::Instance().Log(TermTelemetry::Codes::SS3); + break; + case VTActionCodes::LS2_LockingShift: + success = _dispatch->LockingShift(2); + TermTelemetry::Instance().Log(TermTelemetry::Codes::LS2); + break; + case VTActionCodes::LS3_LockingShift: + success = _dispatch->LockingShift(3); + TermTelemetry::Instance().Log(TermTelemetry::Codes::LS3); + break; + case VTActionCodes::LS1R_LockingShift: + success = _dispatch->LockingShiftRight(1); + TermTelemetry::Instance().Log(TermTelemetry::Codes::LS1R); + break; + case VTActionCodes::LS2R_LockingShift: + success = _dispatch->LockingShiftRight(2); + TermTelemetry::Instance().Log(TermTelemetry::Codes::LS2R); + break; + case VTActionCodes::LS3R_LockingShift: + success = _dispatch->LockingShiftRight(3); + TermTelemetry::Instance().Log(TermTelemetry::Codes::LS3R); + break; default: // If no functions to call, overall dispatch was a failure. success = false; @@ -229,37 +263,13 @@ bool OutputStateMachineEngine::ActionEscDispatch(const wchar_t wch, } else if (intermediates.size() == 1) { - const auto value = til::at(intermediates, 0); - DesignateCharsetTypes designateType = DefaultDesignateCharsetType; - success = _GetDesignateType(value, designateType); - if (success) - { - switch (designateType) - { - case DesignateCharsetTypes::G0: - success = _dispatch->DesignateCharset(wch); - TermTelemetry::Instance().Log(TermTelemetry::Codes::DesignateG0); - break; - case DesignateCharsetTypes::G1: - success = false; - TermTelemetry::Instance().Log(TermTelemetry::Codes::DesignateG1); - break; - case DesignateCharsetTypes::G2: - success = false; - TermTelemetry::Instance().Log(TermTelemetry::Codes::DesignateG2); - break; - case DesignateCharsetTypes::G3: - success = false; - TermTelemetry::Instance().Log(TermTelemetry::Codes::DesignateG3); - break; - default: - // If no functions to call, overall dispatch was a failure. - success = false; - break; - } - } - else if (value == L'#') + switch (til::at(intermediates, 0)) { + case L'%': + success = _dispatch->DesignateCodingSystem(wch); + TermTelemetry::Instance().Log(TermTelemetry::Codes::DOCS); + break; + case L'#': switch (wch) { case VTActionCodes::DECALN_ScreenAlignmentPattern: @@ -271,8 +281,16 @@ bool OutputStateMachineEngine::ActionEscDispatch(const wchar_t wch, success = false; break; } + break; + default: + success = _IntermediateScsDispatch(wch, intermediates); + break; } } + else if (intermediates.size() == 2) + { + success = _IntermediateScsDispatch(wch, intermediates); + } // If we were unable to process the string, and there's a TTY attached to us, // trigger the state machine to flush the string to the terminal. @@ -320,10 +338,10 @@ bool OutputStateMachineEngine::ActionVt52EscDispatch(const wchar_t wch, success = _dispatch->CursorBackward(1); break; case Vt52ActionCodes::EnterGraphicsMode: - success = _dispatch->DesignateCharset(DispatchTypes::VTCharacterSets::DEC_LineDrawing); + success = _dispatch->Designate94Charset(0, DispatchTypes::CharacterSets::DecSpecialGraphics); break; case Vt52ActionCodes::ExitGraphicsMode: - success = _dispatch->DesignateCharset(DispatchTypes::VTCharacterSets::USASCII); + success = _dispatch->Designate94Charset(0, DispatchTypes::CharacterSets::ASCII); break; case Vt52ActionCodes::CursorToHome: success = _dispatch->CursorPosition(1, 1); @@ -369,6 +387,57 @@ bool OutputStateMachineEngine::ActionVt52EscDispatch(const wchar_t wch, return success; } +// Routine Description: +// - Handles SCS charset designation actions that can have one or two possible intermediates. +// Arguments: +// - wch - Character to dispatch. +// - intermediates - Intermediate characters in the sequence +// Return Value: +// - True if handled successfully. False otherwise. +bool OutputStateMachineEngine::_IntermediateScsDispatch(const wchar_t wch, + const std::basic_string_view intermediates) +{ + bool success = false; + + // If we have more than one intermediate, the second intermediate forms part of + // the charset identifier. Otherwise it's identified by just the final character. + const auto charset = intermediates.size() > 1 ? std::make_pair(intermediates.at(1), wch) : std::make_pair(wch, L'\0'); + + switch (intermediates.at(0)) + { + case L'(': + success = _dispatch->Designate94Charset(0, charset); + TermTelemetry::Instance().Log(TermTelemetry::Codes::DesignateG0); + break; + case L')': + success = _dispatch->Designate94Charset(1, charset); + TermTelemetry::Instance().Log(TermTelemetry::Codes::DesignateG1); + break; + case L'*': + success = _dispatch->Designate94Charset(2, charset); + TermTelemetry::Instance().Log(TermTelemetry::Codes::DesignateG2); + break; + case L'+': + success = _dispatch->Designate94Charset(3, charset); + TermTelemetry::Instance().Log(TermTelemetry::Codes::DesignateG3); + break; + case L'-': + success = _dispatch->Designate96Charset(1, charset); + TermTelemetry::Instance().Log(TermTelemetry::Codes::DesignateG1); + break; + case L'.': + success = _dispatch->Designate96Charset(2, charset); + TermTelemetry::Instance().Log(TermTelemetry::Codes::DesignateG2); + break; + case L'/': + success = _dispatch->Designate96Charset(3, charset); + TermTelemetry::Instance().Log(TermTelemetry::Codes::DesignateG3); + break; + } + + return success; +} + // Routine Description: // - Triggers the CsiDispatch action to indicate that the listener should handle // a control sequence. These sequences perform various API-type commands @@ -1345,43 +1414,16 @@ bool OutputStateMachineEngine::_GetTabClearType(const std::basic_string_view parameters) noexcept override; + bool ParseControlSequenceAfterSs3() const noexcept override; bool FlushAtEndOfString() const noexcept override; bool DispatchControlCharsFromEscape() const noexcept override; bool DispatchIntermediatesFromEscape() const noexcept override; @@ -71,6 +72,8 @@ namespace Microsoft::Console::VirtualTerminal std::function _pfnFlushToTerminal; wchar_t _lastPrintedChar; + bool _IntermediateScsDispatch(const wchar_t wch, + const std::basic_string_view intermediates); bool _IntermediateQuestionMarkDispatch(const wchar_t wchAction, const std::basic_string_view parameters); bool _IntermediateExclamationDispatch(const wchar_t wch); @@ -127,6 +130,13 @@ namespace Microsoft::Console::VirtualTerminal DECSCUSR_SetCursorStyle = L'q', // I believe we'll only ever implement DECSCUSR DTTERM_WindowManipulation = L't', REP_RepeatCharacter = L'b', + SS2_SingleShift = L'N', + SS3_SingleShift = L'O', + LS2_LockingShift = L'n', + LS3_LockingShift = L'o', + LS1R_LockingShift = L'~', + LS2R_LockingShift = L'}', + LS3R_LockingShift = L'|', DECALN_ScreenAlignmentPattern = L'8' }; @@ -164,14 +174,6 @@ namespace Microsoft::Console::VirtualTerminal ResetCursorColor = 112, }; - enum class DesignateCharsetTypes - { - G0, - G1, - G2, - G3 - }; - static constexpr DispatchTypes::GraphicsOptions DefaultGraphicsOption = DispatchTypes::GraphicsOptions::Off; bool _GetGraphicsOptions(const std::basic_string_view parameters, std::vector& options) const; @@ -225,10 +227,6 @@ namespace Microsoft::Console::VirtualTerminal bool _GetTabClearType(const std::basic_string_view parameters, size_t& clearType) const noexcept; - static constexpr DesignateCharsetTypes DefaultDesignateCharsetType = DesignateCharsetTypes::G0; - bool _GetDesignateType(const wchar_t intermediate, - DesignateCharsetTypes& designateType) const noexcept; - static constexpr DispatchTypes::WindowManipulationType DefaultWindowManipulationType = DispatchTypes::WindowManipulationType::Invalid; bool _GetWindowManipulationType(const std::basic_string_view parameters, unsigned int& function) const noexcept; diff --git a/src/terminal/parser/stateMachine.cpp b/src/terminal/parser/stateMachine.cpp index d39433da098..c49e52e3c78 100644 --- a/src/terminal/parser/stateMachine.cpp +++ b/src/terminal/parser/stateMachine.cpp @@ -190,10 +190,8 @@ static constexpr bool _isCsiInvalid(const wchar_t wch) noexcept } // Routine Description: -// - Determines if a character is "operating system control string" beginning -// indicator. -// This immediately follows an escape and signifies a signifies a varying -// length control sequence, quite similar to CSI. +// - Determines if a character is a "Single Shift Select" indicator. +// This immediately follows an escape and signifies a varying length control string. // Arguments: // - wch - Character to check. // Return Value: @@ -217,8 +215,10 @@ static constexpr bool _isVt52CursorAddress(const wchar_t wch) noexcept } // Routine Description: -// - Determines if a character is a "Single Shift Select" indicator. -// This immediately follows an escape and signifies a varying length control string. +// - Determines if a character is "operating system control string" beginning +// indicator. +// This immediately follows an escape and signifies a signifies a varying +// length control sequence, quite similar to CSI. // Arguments: // - wch - Character to check. // Return Value: @@ -838,7 +838,7 @@ void StateMachine::_EventEscape(const wchar_t wch) { _EnterOscParam(); } - else if (_isSs3Indicator(wch)) + else if (_isSs3Indicator(wch) && _engine->ParseControlSequenceAfterSs3()) { _EnterSs3Entry(); } diff --git a/src/terminal/parser/telemetry.cpp b/src/terminal/parser/telemetry.cpp index 068a9b90dd3..205efdbf298 100644 --- a/src/terminal/parser/telemetry.cpp +++ b/src/terminal/parser/telemetry.cpp @@ -250,6 +250,14 @@ void TermTelemetry::WriteFinalTraceLog() const TraceLoggingUInt32(_uiTimesUsed[DesignateG1], "DesignateG1"), TraceLoggingUInt32(_uiTimesUsed[DesignateG2], "DesignateG2"), TraceLoggingUInt32(_uiTimesUsed[DesignateG3], "DesignateG3"), + TraceLoggingUInt32(_uiTimesUsed[LS2], "LS2"), + TraceLoggingUInt32(_uiTimesUsed[LS3], "LS3"), + TraceLoggingUInt32(_uiTimesUsed[LS1R], "LS1R"), + TraceLoggingUInt32(_uiTimesUsed[LS2R], "LS2R"), + TraceLoggingUInt32(_uiTimesUsed[LS3R], "LS3R"), + TraceLoggingUInt32(_uiTimesUsed[SS2], "SS2"), + TraceLoggingUInt32(_uiTimesUsed[SS3], "SS3"), + TraceLoggingUInt32(_uiTimesUsed[DOCS], "DOCS"), TraceLoggingUInt32(_uiTimesUsed[HVP], "HVP"), TraceLoggingUInt32(_uiTimesUsed[DECSTR], "DECSTR"), TraceLoggingUInt32(_uiTimesUsed[RIS], "RIS"), diff --git a/src/terminal/parser/telemetry.hpp b/src/terminal/parser/telemetry.hpp index c6d5970f614..04396bdeb01 100644 --- a/src/terminal/parser/telemetry.hpp +++ b/src/terminal/parser/telemetry.hpp @@ -77,6 +77,14 @@ namespace Microsoft::Console::VirtualTerminal DesignateG1, DesignateG2, DesignateG3, + LS2, + LS3, + LS1R, + LS2R, + LS3R, + SS2, + SS3, + DOCS, HVP, DECSTR, RIS, diff --git a/src/terminal/parser/ut_parser/InputEngineTest.cpp b/src/terminal/parser/ut_parser/InputEngineTest.cpp index a114a84f145..c56544db105 100644 --- a/src/terminal/parser/ut_parser/InputEngineTest.cpp +++ b/src/terminal/parser/ut_parser/InputEngineTest.cpp @@ -7,6 +7,7 @@ #include "stateMachine.hpp" #include "InputStateMachineEngine.hpp" +#include "ascii.hpp" #include "../input/terminalInput.hpp" #include "../../inc/unicode.hpp" #include "../../types/inc/convert.hpp" @@ -263,6 +264,9 @@ class Microsoft::Console::VirtualTerminal::InputEngineTest TEST_METHOD(SGRMouseTest_Movement); TEST_METHOD(SGRMouseTest_Scroll); TEST_METHOD(CtrlAltZCtrlAltXTest); + TEST_METHOD(TestSs3Entry); + TEST_METHOD(TestSs3Immediate); + TEST_METHOD(TestSs3Param); friend class TestInteractDispatch; }; @@ -1310,3 +1314,87 @@ void InputEngineTest::CtrlAltZCtrlAltXTest() VerifyExpectedInputDrained(); } + +void InputEngineTest::TestSs3Entry() +{ + auto pfn = std::bind(&TestState::TestInputCallback, &testState, std::placeholders::_1); + auto dispatch = std::make_unique(pfn, &testState); + auto engine = std::make_unique(std::move(dispatch)); + StateMachine mach(std::move(engine)); + + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); + mach.ProcessCharacter(AsciiChars::ESC); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Escape); + mach.ProcessCharacter(L'O'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Entry); + mach.ProcessCharacter(L'm'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); +} + +void InputEngineTest::TestSs3Immediate() +{ + // Intermediates aren't supported by Ss3 - they just get dispatched + auto pfn = std::bind(&TestState::TestInputCallback, &testState, std::placeholders::_1); + auto dispatch = std::make_unique(pfn, &testState); + auto engine = std::make_unique(std::move(dispatch)); + StateMachine mach(std::move(engine)); + + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); + mach.ProcessCharacter(AsciiChars::ESC); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Escape); + mach.ProcessCharacter(L'O'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Entry); + mach.ProcessCharacter(L'$'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); + + mach.ProcessCharacter(AsciiChars::ESC); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Escape); + mach.ProcessCharacter(L'O'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Entry); + mach.ProcessCharacter(L'#'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); + + mach.ProcessCharacter(AsciiChars::ESC); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Escape); + mach.ProcessCharacter(L'O'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Entry); + mach.ProcessCharacter(L'%'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); + + mach.ProcessCharacter(AsciiChars::ESC); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Escape); + mach.ProcessCharacter(L'O'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Entry); + mach.ProcessCharacter(L'?'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); +} + +void InputEngineTest::TestSs3Param() +{ + auto pfn = std::bind(&TestState::TestInputCallback, &testState, std::placeholders::_1); + auto dispatch = std::make_unique(pfn, &testState); + auto engine = std::make_unique(std::move(dispatch)); + StateMachine mach(std::move(engine)); + + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); + mach.ProcessCharacter(AsciiChars::ESC); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Escape); + mach.ProcessCharacter(L'O'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Entry); + mach.ProcessCharacter(L';'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Param); + mach.ProcessCharacter(L'3'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Param); + mach.ProcessCharacter(L'2'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Param); + mach.ProcessCharacter(L'4'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Param); + mach.ProcessCharacter(L';'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Param); + mach.ProcessCharacter(L';'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Param); + mach.ProcessCharacter(L'8'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Param); + mach.ProcessCharacter(L'J'); + VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); +} diff --git a/src/terminal/parser/ut_parser/OutputEngineTest.cpp b/src/terminal/parser/ut_parser/OutputEngineTest.cpp index c6871a99c3e..09c116abb63 100644 --- a/src/terminal/parser/ut_parser/OutputEngineTest.cpp +++ b/src/terminal/parser/ut_parser/OutputEngineTest.cpp @@ -550,87 +550,6 @@ class Microsoft::Console::VirtualTerminal::OutputEngineTest final mach.ProcessCharacter(AsciiChars::BEL); VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); } - - TEST_METHOD(TestSs3Entry) - { - auto dispatch = std::make_unique(); - auto engine = std::make_unique(std::move(dispatch)); - StateMachine mach(std::move(engine)); - - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); - mach.ProcessCharacter(AsciiChars::ESC); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Escape); - mach.ProcessCharacter(L'O'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Entry); - mach.ProcessCharacter(L'm'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); - } - - TEST_METHOD(TestSs3Immediate) - { - // Intermediates aren't supported by Ss3 - they just get dispatched - auto dispatch = std::make_unique(); - auto engine = std::make_unique(std::move(dispatch)); - StateMachine mach(std::move(engine)); - - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); - mach.ProcessCharacter(AsciiChars::ESC); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Escape); - mach.ProcessCharacter(L'O'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Entry); - mach.ProcessCharacter(L'$'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); - - mach.ProcessCharacter(AsciiChars::ESC); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Escape); - mach.ProcessCharacter(L'O'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Entry); - mach.ProcessCharacter(L'#'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); - - mach.ProcessCharacter(AsciiChars::ESC); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Escape); - mach.ProcessCharacter(L'O'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Entry); - mach.ProcessCharacter(L'%'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); - - mach.ProcessCharacter(AsciiChars::ESC); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Escape); - mach.ProcessCharacter(L'O'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Entry); - mach.ProcessCharacter(L'?'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); - } - - TEST_METHOD(TestSs3Param) - { - auto dispatch = std::make_unique(); - auto engine = std::make_unique(std::move(dispatch)); - StateMachine mach(std::move(engine)); - - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); - mach.ProcessCharacter(AsciiChars::ESC); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Escape); - mach.ProcessCharacter(L'O'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Entry); - mach.ProcessCharacter(L';'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Param); - mach.ProcessCharacter(L'3'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Param); - mach.ProcessCharacter(L'2'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Param); - mach.ProcessCharacter(L'4'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Param); - mach.ProcessCharacter(L';'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Param); - mach.ProcessCharacter(L';'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Param); - mach.ProcessCharacter(L'8'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ss3Param); - mach.ProcessCharacter(L'J'); - VERIFY_ARE_EQUAL(mach._state, StateMachine::VTStates::Ground); - } }; class StatefulDispatch final : public TermDispatch diff --git a/src/terminal/parser/ut_parser/StateMachineTest.cpp b/src/terminal/parser/ut_parser/StateMachineTest.cpp index de6ce92b6eb..d82a46b87d6 100644 --- a/src/terminal/parser/ut_parser/StateMachineTest.cpp +++ b/src/terminal/parser/ut_parser/StateMachineTest.cpp @@ -76,6 +76,7 @@ class Microsoft::Console::VirtualTerminal::TestStateMachineEngine : public IStat bool ActionSs3Dispatch(const wchar_t /* wch */, const std::basic_string_view /* parameters */) override { return true; }; + bool ParseControlSequenceAfterSs3() const override { return false; } bool FlushAtEndOfString() const override { return false; }; bool DispatchControlCharsFromEscape() const override { return false; }; bool DispatchIntermediatesFromEscape() const override { return false; };