diff --git a/src/host/VtInputThread.cpp b/src/host/VtInputThread.cpp index 7abe5258c1d..e02ce62c86c 100644 --- a/src/host/VtInputThread.cpp +++ b/src/host/VtInputThread.cpp @@ -28,7 +28,7 @@ VtInputThread::VtInputThread(_In_ wil::unique_hfile hPipe, const bool inheritCursor) : _hFile{ std::move(hPipe) }, _hThread{}, - _utf8Parser{ CP_UTF8 }, + _u8State{}, _dwThreadId{ 0 }, _exitRequested{ false }, _exitResult{ S_OK } @@ -47,15 +47,14 @@ VtInputThread::VtInputThread(_In_ wil::unique_hfile hPipe, } // Method Description: -// - Processes a buffer of input characters. The characters should be utf-8 -// encoded, and will get converted to wchar_t's to be processed by the +// - Processes a string of input characters. The characters should be UTF-8 +// encoded, and will get converted to wstring to be processed by the // input state machine. // Arguments: -// - charBuffer - the UTF-8 characters recieved. -// - cch - number of UTF-8 characters in charBuffer +// - u8Str - the UTF-8 string received. // Return Value: // - S_OK on success, otherwise an appropriate failure. -[[nodiscard]] HRESULT VtInputThread::_HandleRunInput(_In_reads_(cch) const byte* const charBuffer, const int cch) +[[nodiscard]] HRESULT VtInputThread::_HandleRunInput(const std::string_view u8Str) { // Make sure to call the GLOBAL Lock/Unlock, not the gci's lock/unlock. // Only the global unlock attempts to dispatch ctrl events. If you use the @@ -67,16 +66,14 @@ VtInputThread::VtInputThread(_In_ wil::unique_hfile hPipe, try { - std::unique_ptr pwsSequence; - unsigned int cchConsumed; - unsigned int cchSequence; - auto hr = _utf8Parser.Parse(charBuffer, cch, cchConsumed, pwsSequence, cchSequence); + std::wstring wstr{}; + auto hr = til::u8u16(u8Str, wstr, _u8State); // If we hit a parsing error, eat it. It's bad utf-8, we can't do anything with it. if (FAILED(hr)) { return S_FALSE; } - _pInputStateMachine->ProcessString({ pwsSequence.get(), cchSequence }); + _pInputStateMachine->ProcessString(wstr); } CATCH_RETURN(); @@ -100,12 +97,12 @@ DWORD WINAPI VtInputThread::StaticVtInputThreadProc(_In_ LPVOID lpParameter) // failed, throw or log, depending on what the caller wants. // Arguments: // - throwOnFail: If true, throw an exception if there was an error processing -// the input recieved. Otherwise, log the error. +// the input received. Otherwise, log the error. // Return Value: // - void VtInputThread::DoReadInput(const bool throwOnFail) { - byte buffer[256]; + char buffer[256]; DWORD dwRead = 0; bool fSuccess = !!ReadFile(_hFile.get(), buffer, ARRAYSIZE(buffer), &dwRead, nullptr); @@ -120,7 +117,7 @@ void VtInputThread::DoReadInput(const bool throwOnFail) return; } - HRESULT hr = _HandleRunInput(buffer, dwRead); + HRESULT hr = _HandleRunInput({ buffer, gsl::narrow_cast(dwRead) }); if (FAILED(hr)) { if (throwOnFail) diff --git a/src/host/VtInputThread.hpp b/src/host/VtInputThread.hpp index 9f596d7096b..20b232e8798 100644 --- a/src/host/VtInputThread.hpp +++ b/src/host/VtInputThread.hpp @@ -15,7 +15,6 @@ Author(s): #pragma once #include "..\terminal\parser\StateMachine.hpp" -#include "utf8ToWideCharParser.hpp" namespace Microsoft::Console { @@ -29,7 +28,7 @@ namespace Microsoft::Console void DoReadInput(const bool throwOnFail); private: - [[nodiscard]] HRESULT _HandleRunInput(_In_reads_(cch) const byte* const charBuffer, const int cch); + [[nodiscard]] HRESULT _HandleRunInput(const std::string_view u8Str); DWORD _InputThread(); wil::unique_hfile _hFile; @@ -40,6 +39,6 @@ namespace Microsoft::Console HRESULT _exitResult; std::unique_ptr _pInputStateMachine; - Utf8ToWideCharParser _utf8Parser; + til::u8state _u8State; }; } diff --git a/src/host/_stream.cpp b/src/host/_stream.cpp index 88450fcc7be..3a5685b8102 100644 --- a/src/host/_stream.cpp +++ b/src/host/_stream.cpp @@ -14,7 +14,6 @@ #include "dbcs.h" #include "handle.h" #include "misc.h" -#include "utf8ToWidecharParser.hpp" #include "../types/inc/convert.hpp" #include "../types/inc/GlyphWidth.hpp" @@ -501,9 +500,9 @@ constexpr unsigned int LOCAL_BUFFER_SIZE = 100; CursorPosition = cursor.GetPosition(); // Make sure we don't write past the end of the buffer. - if (i > (ULONG)coordScreenBufferSize.X - CursorPosition.X) + if (i > gsl::narrow_cast(coordScreenBufferSize.X) - CursorPosition.X) { - i = (ULONG)coordScreenBufferSize.X - CursorPosition.X; + i = gsl::narrow_cast(coordScreenBufferSize.X) - CursorPosition.X; } // line was wrapped if we're writing up to the end of the current row @@ -693,7 +692,7 @@ constexpr unsigned int LOCAL_BUFFER_SIZE = 100; if (CheckBisectProcessW(screenInfo, pwchBufferBackupLimit, pwchBuffer + 1 - pwchBufferBackupLimit, - coordScreenBufferSize.X - sOriginalXPosition, + gsl::narrow_cast(coordScreenBufferSize.X) - sOriginalXPosition, sOriginalXPosition, dwFlags & WC_ECHO)) { @@ -711,7 +710,7 @@ constexpr unsigned int LOCAL_BUFFER_SIZE = 100; } case UNICODE_TAB: { - const size_t TabSize = NUMBER_OF_SPACES_IN_TAB(cursor.GetPosition().X); + const size_t TabSize = gsl::narrow_cast(NUMBER_OF_SPACES_IN_TAB(cursor.GetPosition().X)); CursorPosition.X = (SHORT)(cursor.GetPosition().X + TabSize); // move cursor forward to next tab stop. fill space with blanks. @@ -1044,199 +1043,148 @@ constexpr unsigned int LOCAL_BUFFER_SIZE = 100; { try { - const CONSOLE_INFORMATION& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); // Ensure output variables are initialized. read = 0; waiter.reset(); - bool fLeadByteCaptured = false; - bool fLeadByteConsumed = false; - - LockConsole(); - auto Unlock = wil::scope_exit([&] { UnlockConsole(); }); - - if (buffer.size() == 0) + if (buffer.empty()) { return S_OK; } - const auto codepage = gci.OutputCP; - - // Convert our input parameters to Unicode - std::unique_ptr wideCharBuffer{ nullptr }; - static Utf8ToWideCharParser parser{ gci.OutputCP }; + LockConsole(); + auto unlock{ wil::scope_exit([&] { UnlockConsole(); }) }; - // update current codepage in case it was changed from last time - // this was called. We do this outside the UTF-8 check because the parser drops its state - // when the codepage changes. - parser.SetCodePage(gci.OutputCP); + auto& screenInfo{ context.GetActiveBuffer() }; + const auto& consoleInfo{ ServiceLocator::LocateGlobals().getConsoleInformation() }; + const auto codepage{ consoleInfo.OutputCP }; + auto leadByteCaptured{ false }; + auto leadByteConsumed{ false }; + std::wstring wstr{}; + static til::u8state u8State{}; - SCREEN_INFORMATION& ScreenInfo = context.GetActiveBuffer(); - wchar_t* pwchBuffer; - size_t cchBuffer; + // Convert our input parameters to Unicode if (codepage == CP_UTF8) { - wideCharBuffer.release(); - unsigned int charCount; - unsigned int charsConsumed; - unsigned int charsGenerated; - RETURN_IF_FAILED(SizeTToUInt(buffer.size(), &charCount)); - RETURN_IF_FAILED(parser.Parse(reinterpret_cast(buffer.data()), - charCount, - charsConsumed, - wideCharBuffer, - charsGenerated)); - - pwchBuffer = reinterpret_cast(wideCharBuffer.get()); - cchBuffer = charsGenerated; - read = charsConsumed; + RETURN_IF_FAILED(til::u8u16(buffer, wstr, u8State)); + read = buffer.size(); } else { - NTSTATUS Status = STATUS_SUCCESS; - PWCHAR TransBuffer; - PWCHAR TransBufferOriginalLocation; - DWORD Length; - ULONG dbcsNumBytes = 0; - ULONG BufPtrNumBytes = 0; - const char* BufPtr = buffer.data(); - - // (cchTextBufferLength + 2) I think because we might be shoving another unicode char - // from ScreenInfo->WriteConsoleDbcsLeadByte in front - TransBuffer = new WCHAR[buffer.size() + 2]; - RETURN_IF_NULL_ALLOC(TransBuffer); - ZeroMemory(TransBuffer, sizeof(WCHAR) * (buffer.size() + 2)); - - TransBufferOriginalLocation = TransBuffer; - - unsigned int uiTextBufferLength; - RETURN_IF_FAILED(SizeTToUInt(buffer.size(), &uiTextBufferLength)); - - if (!ScreenInfo.WriteConsoleDbcsLeadByte[0] || *(PUCHAR)BufPtr < (UCHAR)' ') - { - dbcsNumBytes = 0; - BufPtrNumBytes = uiTextBufferLength; - } - else if (buffer.size()) + // In case the codepage changes from UTF-8 to another, + // we discard partials that might still be cached. + u8State.reset(); + + int mbPtrLength{}; + RETURN_IF_FAILED(SizeTToInt(buffer.size(), &mbPtrLength)); + + // (buffer.size() + 2) I think because we might be shoving another unicode char + // from screenInfo->WriteConsoleDbcsLeadByte in front + // because we previously checked that buffer.size() fits into an int, +2 won't cause an overflow of size_t + wstr.resize(buffer.size() + 2); + + wchar_t* wcPtr{ wstr.data() }; + auto mbPtr{ buffer.data() }; + size_t dbcsLength{}; + if (screenInfo.WriteConsoleDbcsLeadByte[0] != 0 && gsl::narrow_cast(*mbPtr) >= byte{ ' ' }) { // there was a portion of a dbcs character stored from a previous - // call so we take the 2nd half from BufPtr[0], put them together - // and write the wide char to TransBuffer[0] - ScreenInfo.WriteConsoleDbcsLeadByte[1] = *(PCHAR)BufPtr; + // call so we take the 2nd half from mbPtr[0], put them together + // and write the wide char to wcPtr[0] + screenInfo.WriteConsoleDbcsLeadByte[1] = gsl::narrow_cast(*mbPtr); try { - const std::string_view leadByte(reinterpret_cast(ScreenInfo.WriteConsoleDbcsLeadByte), - ARRAYSIZE(ScreenInfo.WriteConsoleDbcsLeadByte)); - - const std::wstring converted = ConvertToW(gci.OutputCP, leadByte); - - FAIL_FAST_IF(converted.size() != 1); - dbcsNumBytes = sizeof(wchar_t); - TransBuffer[0] = converted.at(0); - BufPtr++; + const auto wFromComplemented{ + ConvertToW(codepage, { reinterpret_cast(screenInfo.WriteConsoleDbcsLeadByte), ARRAYSIZE(screenInfo.WriteConsoleDbcsLeadByte) }) + }; + + FAIL_FAST_IF(wFromComplemented.size() != 1); + dbcsLength = sizeof(wchar_t); + wcPtr[0] = wFromComplemented.at(0); + mbPtr++; } catch (...) { - Status = STATUS_UNSUCCESSFUL; - dbcsNumBytes = 0; + dbcsLength = 0; } // this looks weird to be always incrementing even if the conversion failed, but this is the // original behavior so it's left unchanged. - TransBuffer++; - BufPtrNumBytes = uiTextBufferLength - 1; + wcPtr++; + mbPtrLength--; // Note that we used a stored lead byte from a previous call in order to complete this write // Use this to offset the "number of bytes consumed" calculation at the end by -1 to account // for using a byte we had internally, not off the stream. - fLeadByteConsumed = true; - } - else - { - // nothing in ScreenInfo->WriteConsoleDbcsLeadByte and nothing in BufPtr - BufPtrNumBytes = 0; + leadByteConsumed = true; } - ScreenInfo.WriteConsoleDbcsLeadByte[0] = 0; + screenInfo.WriteConsoleDbcsLeadByte[0] = 0; - // if the last byte in BufPtr is a lead byte for the current code page, + // if the last byte in mbPtr is a lead byte for the current code page, // save it for the next time this function is called and we can piece it // back together then - __analysis_assume(BufPtrNumBytes <= uiTextBufferLength); - if (BufPtrNumBytes && CheckBisectStringA((PCHAR)BufPtr, BufPtrNumBytes, &gci.OutputCPInfo)) + if (mbPtrLength != 0 && CheckBisectStringA(const_cast(mbPtr), mbPtrLength, &consoleInfo.OutputCPInfo)) { - ScreenInfo.WriteConsoleDbcsLeadByte[0] = *((PCHAR)BufPtr + BufPtrNumBytes - 1); - BufPtrNumBytes--; + screenInfo.WriteConsoleDbcsLeadByte[0] = gsl::narrow_cast(mbPtr[mbPtrLength - 1]); + mbPtrLength--; // Note that we captured a lead byte during this call, but won't actually draw it until later. // Use this to offset the "number of bytes consumed" calculation at the end by +1 to account // for taking a byte off the stream. - fLeadByteCaptured = true; + leadByteCaptured = true; } - if (BufPtrNumBytes != 0) + if (mbPtrLength != 0) { - // convert the remaining bytes in BufPtr to wide chars - Length = sizeof(WCHAR) * MultiByteToWideChar(gci.OutputCP, - 0, - (LPCCH)BufPtr, - BufPtrNumBytes, - TransBuffer, - BufPtrNumBytes); - - if (Length == 0) - { - Status = STATUS_UNSUCCESSFUL; - } - BufPtrNumBytes = Length; + // convert the remaining bytes in mbPtr to wide chars + mbPtrLength = sizeof(wchar_t) * MultiByteToWideChar(codepage, 0, mbPtr, mbPtrLength, wcPtr, mbPtrLength); } - pwchBuffer = TransBufferOriginalLocation; - cchBuffer = (dbcsNumBytes + BufPtrNumBytes) / sizeof(wchar_t); + wstr.resize((dbcsLength + mbPtrLength) / sizeof(wchar_t)); } - // Make the W version of the call - size_t cchBufferRead; - // Hold the specific version of the waiter locally so we can tinker with it if we must to store additional context. - std::unique_ptr writeDataWaiter; + std::unique_ptr writeDataWaiter{}; - HRESULT const hr = WriteConsoleWImplHelper(ScreenInfo, { pwchBuffer, cchBuffer }, cchBufferRead, writeDataWaiter); + // Make the W version of the call + size_t wcBufferWritten{}; + const auto hr{ WriteConsoleWImplHelper(screenInfo, wstr, wcBufferWritten, writeDataWaiter) }; // If there is no waiter, process the byte count now. if (nullptr == writeDataWaiter.get()) { - // Calculate how many bytes of the original A buffer were consumed in the W version of the call to satisfy pcchTextBufferRead. + // Calculate how many bytes of the original A buffer were consumed in the W version of the call to satisfy mbBufferRead. // For UTF-8 conversions, we've already returned this information above. if (CP_UTF8 != codepage) { - size_t cchTextBufferRead = 0; + size_t mbBufferRead{}; // Start by counting the number of A bytes we used in printing our W string to the screen. try { - cchTextBufferRead = GetALengthFromW(codepage, { pwchBuffer, cchBufferRead }); + mbBufferRead = GetALengthFromW(codepage, { wstr.data(), wcBufferWritten }); } CATCH_LOG(); // If we captured a byte off the string this time around up above, it means we didn't feed // it into the WriteConsoleW above, and therefore its consumption isn't accounted for // in the count we just made. Add +1 to compensate. - if (fLeadByteCaptured) + if (leadByteCaptured) { - cchTextBufferRead++; + mbBufferRead++; } // If we consumed an internally-stored lead byte this time around up above, it means that we // fed a byte into WriteConsoleW that wasn't a part of this particular call's request. // We need to -1 to compensate and tell the caller the right number of bytes consumed this request. - if (fLeadByteConsumed) + if (leadByteConsumed) { - cchTextBufferRead--; + mbBufferRead--; } - read = cchTextBufferRead; + read = mbBufferRead; } } else @@ -1247,7 +1195,7 @@ constexpr unsigned int LOCAL_BUFFER_SIZE = 100; { // For non-UTF8 codepages, save the lead byte captured/consumed data so we can +1 or -1 the final decoded count // in the WaitData::Notify method later. - writeDataWaiter->SetLeadByteAdjustmentStatus(fLeadByteCaptured, fLeadByteConsumed); + writeDataWaiter->SetLeadByteAdjustmentStatus(leadByteCaptured, leadByteConsumed); } else { @@ -1256,12 +1204,6 @@ constexpr unsigned int LOCAL_BUFFER_SIZE = 100; } } - // Free remaining data - if (codepage != CP_UTF8) - { - delete[] pwchBuffer; - } - // Give back the waiter now that we're done with tinkering with it. waiter.reset(writeDataWaiter.release()); @@ -1291,7 +1233,7 @@ constexpr unsigned int LOCAL_BUFFER_SIZE = 100; try { LockConsole(); - auto Unlock = wil::scope_exit([&] { UnlockConsole(); }); + auto unlock = wil::scope_exit([&] { UnlockConsole(); }); std::unique_ptr writeDataWaiter; RETURN_IF_FAILED(WriteConsoleWImplHelper(context.GetActiveBuffer(), buffer, read, writeDataWaiter)); diff --git a/src/host/_stream.h b/src/host/_stream.h index 726bc26c96f..1a88e390384 100644 --- a/src/host/_stream.h +++ b/src/host/_stream.h @@ -91,6 +91,6 @@ Return Value: // NOTE: console lock must be held when calling this routine // String has been translated to unicode at this point. [[nodiscard]] NTSTATUS DoWriteConsole(_In_reads_bytes_(*pcbBuffer) PWCHAR pwchBuffer, - _In_ size_t* const pcbBuffer, + _Inout_ size_t* const pcbBuffer, SCREEN_INFORMATION& screenInfo, std::unique_ptr& waiter);