microsoft · DHowett-MSFT · Feb 4, 2020 · Jan 30, 2020 · Jan 31, 2020 · Jan 31, 2020
diff --git a/src/host/VtInputThread.cpp b/src/host/VtInputThread.cpp
@@ -28,7 +28,7 @@ VtInputThread::VtInputThread(_In_ wil::unique_hfile hPipe,
                              const bool inheritCursor) :
     _hFile{ std::move(hPipe) },
     _hThread{},
-    _utf8Parser{ CP_UTF8 },
+    _u8State{},
     _dwThreadId{ 0 },
     _exitRequested{ false },
     _exitResult{ S_OK }
@@ -47,15 +47,14 @@ VtInputThread::VtInputThread(_In_ wil::unique_hfile hPipe,
 }
 
 // Method Description:
-// - Processes a buffer of input characters. The characters should be utf-8
-//      encoded, and will get converted to wchar_t's to be processed by the
+// - Processes a string of input characters. The characters should be UTF-8
+//      encoded, and will get converted to wstring to be processed by the
 //      input state machine.
 // Arguments:
-// - charBuffer - the UTF-8 characters recieved.
-// - cch - number of UTF-8 characters in charBuffer
+// - u8Str - the UTF-8 string received.
 // Return Value:
 // - S_OK on success, otherwise an appropriate failure.
-[[nodiscard]] HRESULT VtInputThread::_HandleRunInput(_In_reads_(cch) const byte* const charBuffer, const int cch)
+[[nodiscard]] HRESULT VtInputThread::_HandleRunInput(const std::string_view u8Str)
 {
     // Make sure to call the GLOBAL Lock/Unlock, not the gci's lock/unlock.
     // Only the global unlock attempts to dispatch ctrl events. If you use the
@@ -67,16 +66,14 @@ VtInputThread::VtInputThread(_In_ wil::unique_hfile hPipe,
 
     try
     {
-        std::unique_ptr<wchar_t[]> pwsSequence;
-        unsigned int cchConsumed;
-        unsigned int cchSequence;
-        auto hr = _utf8Parser.Parse(charBuffer, cch, cchConsumed, pwsSequence, cchSequence);
+        std::wstring wstr{};
+        auto hr = til::u8u16(u8Str, wstr, _u8State);
         // If we hit a parsing error, eat it. It's bad utf-8, we can't do anything with it.
         if (FAILED(hr))
         {
             return S_FALSE;
         }
-        _pInputStateMachine->ProcessString({ pwsSequence.get(), cchSequence });
+        _pInputStateMachine->ProcessString(wstr);
     }
     CATCH_RETURN();
 
@@ -100,12 +97,12 @@ DWORD WINAPI VtInputThread::StaticVtInputThreadProc(_In_ LPVOID lpParameter)
 //      failed, throw or log, depending on what the caller wants.
 // Arguments:
 // - throwOnFail: If true, throw an exception if there was an error processing
-//      the input recieved. Otherwise, log the error.
+//      the input received. Otherwise, log the error.
 // Return Value:
 // - <none>
 void VtInputThread::DoReadInput(const bool throwOnFail)
 {
-    byte buffer[256];
+    char buffer[256];
     DWORD dwRead = 0;
     bool fSuccess = !!ReadFile(_hFile.get(), buffer, ARRAYSIZE(buffer), &dwRead, nullptr);
 
@@ -120,7 +117,7 @@ void VtInputThread::DoReadInput(const bool throwOnFail)
         return;
     }
 
-    HRESULT hr = _HandleRunInput(buffer, dwRead);
+    HRESULT hr = _HandleRunInput({ buffer, gsl::narrow_cast<size_t>(dwRead) });
     if (FAILED(hr))
     {
         if (throwOnFail)

diff --git a/src/host/VtInputThread.hpp b/src/host/VtInputThread.hpp
@@ -15,7 +15,6 @@ Author(s):
 #pragma once
 
 #include "..\terminal\parser\StateMachine.hpp"
-#include "utf8ToWideCharParser.hpp"
 
 namespace Microsoft::Console
 {
@@ -29,7 +28,7 @@ namespace Microsoft::Console
         void DoReadInput(const bool throwOnFail);
 
     private:
-        [[nodiscard]] HRESULT _HandleRunInput(_In_reads_(cch) const byte* const charBuffer, const int cch);
+        [[nodiscard]] HRESULT _HandleRunInput(const std::string_view u8Str);
         DWORD _InputThread();
 
         wil::unique_hfile _hFile;
@@ -40,6 +39,6 @@ namespace Microsoft::Console
         HRESULT _exitResult;
 
         std::unique_ptr<Microsoft::Console::VirtualTerminal::StateMachine> _pInputStateMachine;
-        Utf8ToWideCharParser _utf8Parser;
+        til::u8state _u8State;
     };
 }
diff --git a/src/host/_stream.cpp b/src/host/_stream.cpp
@@ -14,7 +14,6 @@
 #include "dbcs.h"
 #include "handle.h"
 #include "misc.h"
-#include "utf8ToWidecharParser.hpp"
 
 #include "../types/inc/convert.hpp"
 #include "../types/inc/GlyphWidth.hpp"
@@ -491,9 +490,9 @@ constexpr unsigned int LOCAL_BUFFER_SIZE = 100;
             CursorPosition = cursor.GetPosition();
 
             // Make sure we don't write past the end of the buffer.
-            if (i > (ULONG)coordScreenBufferSize.X - CursorPosition.X)
+            if (i > gsl::narrow_cast<size_t>(coordScreenBufferSize.X) - CursorPosition.X)
             {
-                i = (ULONG)coordScreenBufferSize.X - CursorPosition.X;
+                i = gsl::narrow_cast<size_t>(coordScreenBufferSize.X) - CursorPosition.X;
             }
 
             // line was wrapped if we're writing up to the end of the current row
@@ -683,7 +682,7 @@ constexpr unsigned int LOCAL_BUFFER_SIZE = 100;
                 if (CheckBisectProcessW(screenInfo,
                                         pwchBufferBackupLimit,
                                         pwchBuffer + 1 - pwchBufferBackupLimit,
-                                        coordScreenBufferSize.X - sOriginalXPosition,
+                                        gsl::narrow_cast<size_t>(coordScreenBufferSize.X) - sOriginalXPosition,
                                         sOriginalXPosition,
                                         dwFlags & WC_ECHO))
                 {
@@ -701,7 +700,7 @@ constexpr unsigned int LOCAL_BUFFER_SIZE = 100;
         }
         case UNICODE_TAB:
         {
-            const size_t TabSize = NUMBER_OF_SPACES_IN_TAB(cursor.GetPosition().X);
+            const size_t TabSize = gsl::narrow_cast<size_t>(NUMBER_OF_SPACES_IN_TAB(cursor.GetPosition().X));
             CursorPosition.X = (SHORT)(cursor.GetPosition().X + TabSize);
 
             // move cursor forward to next tab stop.  fill space with blanks.
@@ -1053,36 +1052,25 @@ constexpr unsigned int LOCAL_BUFFER_SIZE = 100;
         const auto codepage = gci.OutputCP;
 
         // Convert our input parameters to Unicode
-        std::unique_ptr<wchar_t[]> wideCharBuffer{ nullptr };
-        static Utf8ToWideCharParser parser{ gci.OutputCP };
-
-        // update current codepage in case it was changed from last time
-        // this was called. We do this outside the UTF-8 check because the parser drops its state
-        // when the codepage changes.
-        parser.SetCodePage(gci.OutputCP);
+        std::wstring wstr{};
+        static til::u8state u8State{};
 
         SCREEN_INFORMATION& ScreenInfo = context.GetActiveBuffer();
         wchar_t* pwchBuffer;
         size_t cchBuffer;
         if (codepage == CP_UTF8)
         {
-            wideCharBuffer.release();
-            unsigned int charCount;
-            unsigned int charsConsumed;
-            unsigned int charsGenerated;
-            RETURN_IF_FAILED(SizeTToUInt(buffer.size(), &charCount));
-            RETURN_IF_FAILED(parser.Parse(reinterpret_cast<const byte*>(buffer.data()),
-                                          charCount,
-                                          charsConsumed,
-                                          wideCharBuffer,
-                                          charsGenerated));
-
-            pwchBuffer = reinterpret_cast<wchar_t*>(wideCharBuffer.get());
-            cchBuffer = charsGenerated;
-            read = charsConsumed;
+            RETURN_IF_FAILED(til::u8u16(buffer, wstr, u8State));
+            pwchBuffer = wstr.data();
+            cchBuffer = wstr.length();
+            read = buffer.size();
         }
         else
         {
+            // In case the codepage changes from UTF-8 to another,
+            // we discard partials that might still be cached.
+            u8State.reset();
+
             NTSTATUS Status = STATUS_SUCCESS;
             PWCHAR TransBuffer;
             PWCHAR TransBufferOriginalLocation;
@@ -1183,7 +1171,7 @@ constexpr unsigned int LOCAL_BUFFER_SIZE = 100;
             }
 
             pwchBuffer = TransBufferOriginalLocation;
-            cchBuffer = (dbcsNumBytes + BufPtrNumBytes) / sizeof(wchar_t);
+            cchBuffer = (gsl::narrow_cast<size_t>(dbcsNumBytes) + BufPtrNumBytes) / sizeof(wchar_t);
         }
 
         // Make the W version of the call

diff --git a/src/host/_stream.h b/src/host/_stream.h
@@ -91,6 +91,6 @@ Return Value:
 // NOTE: console lock must be held when calling this routine
 // String has been translated to unicode at this point.
 [[nodiscard]] NTSTATUS DoWriteConsole(_In_reads_bytes_(*pcbBuffer) PWCHAR pwchBuffer,
-                                      _In_ size_t* const pcbBuffer,
+                                      _Inout_ size_t* const pcbBuffer,
                                       SCREEN_INFORMATION& screenInfo,
                                       std::unique_ptr<WriteData>& waiter);