From 1816dfd40901f3b9262ba3f491708b1aee2fad4a Mon Sep 17 00:00:00 2001
From: Adeel <3840695+am11@users.noreply.github.com>
Date: Sat, 29 Apr 2023 21:17:39 +0300
Subject: [PATCH 1/9] Share UTF8 converters between coreclr and mono - v1

---
 src/coreclr/pal/src/CMakeLists.txt            |    4 +-
 src/coreclr/pal/src/locale/unicode.cpp        |   93 +-
 src/coreclr/pal/src/locale/utf8.cpp           | 2937 -----------------
 .../MultiByteToWideChar/test4/test4.cpp       |    2 +-
 .../WideCharToMultiByte/test5/test5.cpp       |    2 +-
 .../TypeBuilder/TypeBuilderDefineEvent.cs     |    2 +-
 .../TypeBuilder/TypeBuilderDefineProperty.cs  |    2 +-
 src/mono/mono/eglib/CMakeLists.txt            |    5 +-
 src/mono/mono/eglib/glib.h                    |    1 +
 src/mono/mono/eglib/gutf8.c                   |  323 --
 .../minipal/utf8converter.c}                  |  481 ++-
 src/native/minipal/utf8converter.h            |  200 ++
 12 files changed, 683 insertions(+), 3369 deletions(-)
 delete mode 100644 src/coreclr/pal/src/locale/utf8.cpp
 delete mode 100644 src/mono/mono/eglib/gutf8.c
 rename src/{mono/mono/eglib/giconv.c => native/minipal/utf8converter.c} (68%)
 create mode 100644 src/native/minipal/utf8converter.h

diff --git a/src/coreclr/pal/src/CMakeLists.txt b/src/coreclr/pal/src/CMakeLists.txt
index bd5a6bdf4d5b22..22d9c29594dd27 100644
--- a/src/coreclr/pal/src/CMakeLists.txt
+++ b/src/coreclr/pal/src/CMakeLists.txt
@@ -152,7 +152,7 @@ set(SOURCES
   loader/module.cpp
   locale/unicode.cpp
   locale/unicodedata.cpp
-  locale/utf8.cpp
+  ${CLR_SRC_NATIVE_DIR}/minipal/utf8converter.c
   map/common.cpp
   map/map.cpp
   map/virtual.cpp
@@ -213,6 +213,8 @@ set(SOURCES
   thread/threadsusp.cpp
 )
 
+set_source_files_properties("${CLR_SRC_NATIVE_DIR}/minipal/utf8converter.c" PROPERTIES COMPILE_FLAGS -Wno-implicit-fallthrough)
+
 if(NOT CLR_CMAKE_USE_SYSTEM_LIBUNWIND)
   set(LIBUNWIND_OBJECTS $<TARGET_OBJECTS:libunwind>)
 endif(NOT CLR_CMAKE_USE_SYSTEM_LIBUNWIND)
diff --git a/src/coreclr/pal/src/locale/unicode.cpp b/src/coreclr/pal/src/locale/unicode.cpp
index f29eabc07d9be3..b4c832c3d2e599 100644
--- a/src/coreclr/pal/src/locale/unicode.cpp
+++ b/src/coreclr/pal/src/locale/unicode.cpp
@@ -34,6 +34,7 @@ Revision History:
 #include <errno.h>
 
 #include <debugmacrosext.h>
+#include <minipal/utf8converter.h>
 
 using namespace CorUnix;
 
@@ -227,7 +228,7 @@ MultiByteToWideChar(
         OUT LPWSTR lpWideCharStr,
         IN int cchWideChar)
 {
-    INT retval =0;
+    long retval = 0;
 
     PERF_ENTRY(MultiByteToWideChar);
     ENTRY("MultiByteToWideChar(CodePage=%u, dwFlags=%#x, lpMultiByteStr=%p (%s),"
@@ -253,16 +254,51 @@ MultiByteToWideChar(
         goto EXIT;
     }
 
-    // Use UTF8ToUnicode on all systems, since it replaces
+    // Use g_utf8_to_utf16_custom_alloc_optional on all systems, since it replaces
     // invalid characters and Core Foundation doesn't do that.
     if (CodePage == CP_UTF8 || CodePage == CP_ACP)
     {
+        int inputLength = (int)strlen(lpMultiByteStr);
+        bool allowNulls = (cbMultiByte > 0 && lpMultiByteStr[cbMultiByte - 1] != '\0');
+        bool subtractOne = cbMultiByte == cchWideChar || allowNulls;
         if (cbMultiByte <= -1)
         {
-        cbMultiByte = strlen(lpMultiByteStr) + 1;
+            cbMultiByte = inputLength + 1;
         }
 
-        retval = UTF8ToUnicode(lpMultiByteStr, cbMultiByte, lpWideCharStr, cchWideChar, dwFlags);
+        size_t allocSize = 0;
+        struct cookie { LPWSTR str; size_t* allocSize; int* count; };
+        cookie callbackCookie = { .str = lpWideCharStr,  .allocSize = &allocSize, .count = &cchWideChar };
+
+        long itemsWritten;
+        GError *gerror = NULL;
+        lpWideCharStr = (LPWSTR)g_utf8_to_utf16_custom_alloc_optional(lpMultiByteStr, cbMultiByte, &retval, &itemsWritten, allowNulls,
+            !(dwFlags & MB_ERR_INVALID_CHARS), cbMultiByte > inputLength,
+            [](size_t req_size, void* custom_alloc_data)
+            {
+                cookie* callbackCookie = (cookie*)(custom_alloc_data);
+                *(callbackCookie->allocSize) = (req_size / sizeof (gunichar2));
+                int count = *(callbackCookie->count);
+                return (void*)(callbackCookie->str && !(count && *(callbackCookie->allocSize) - 1 > (size_t)count) ? callbackCookie->str : NULL);
+            }, &callbackCookie, &gerror);
+
+        if (gerror && (lpWideCharStr || (cchWideChar && allocSize > (size_t)cchWideChar)))
+        {
+            retval = 0;
+            ERROR ("The error is %d %s\n", gerror->code, gerror->message);
+            switch (gerror->code)
+            {
+                case G_CONVERT_ERROR_ILLEGAL_SEQUENCE: SetLastError(ERROR_NO_UNICODE_TRANSLATION); break;
+                case G_CONVERT_ERROR_NO_MEMORY: SetLastError(ERROR_INSUFFICIENT_BUFFER); break;
+                default: SetLastError(ERROR_INVALID_PARAMETER); break;
+            }
+            free(gerror);
+            goto EXIT;
+        }
+
+        retval = allocSize;
+        if (retval > 1 && subtractOne) retval -= 1;
+
         goto EXIT;
     }
 
@@ -274,7 +310,7 @@ MultiByteToWideChar(
 
     LOGEXIT("MultiByteToWideChar returns %d.\n",retval);
     PERF_EXIT(MultiByteToWideChar);
-    return retval;
+    return (int)retval;
 }
 
 
@@ -297,7 +333,7 @@ WideCharToMultiByte(
         IN LPCSTR lpDefaultChar,
         OUT LPBOOL lpUsedDefaultChar)
 {
-    INT retval =0;
+    long retval = 0;
     char defaultChar = '?';
     BOOL usedDefaultChar = FALSE;
 
@@ -338,15 +374,50 @@ WideCharToMultiByte(
         defaultChar = *lpDefaultChar;
     }
 
-    // Use UnicodeToUTF8 on all systems because we use
-    // UTF8ToUnicode in MultiByteToWideChar() on all systems.
+    // Use g_utf16_to_utf8_custom_alloc_with_nulls on all systems because we use
+    // g_utf8_to_utf16 in MultiByteToWideChar() on all systems.
     if (CodePage == CP_UTF8 || CodePage == CP_ACP)
     {
+        int inputLength = (int)PAL_wcslen(lpWideCharStr);
+        bool allowNulls = (cchWideChar > 0 && lpWideCharStr[cchWideChar - 1] != '\0');
+        bool subtractOne = cchWideChar == cbMultiByte || allowNulls;
         if (cchWideChar == -1)
         {
-            cchWideChar = PAL_wcslen(lpWideCharStr) + 1;
+            cchWideChar = inputLength + 1;
         }
-        retval = UnicodeToUTF8(lpWideCharStr, cchWideChar, lpMultiByteStr, cbMultiByte);
+
+        size_t allocSize = 0;
+        struct cookie { LPSTR str; size_t* allocSize; int* count; };
+        cookie callbackCookie = { .str = lpMultiByteStr,  .allocSize = &allocSize, .count = &cbMultiByte };
+
+        long itemsWritten;
+        GError *gerror = NULL;
+        lpMultiByteStr = g_utf16_to_utf8_custom_alloc_with_nulls((unsigned short*)lpWideCharStr, cchWideChar, &retval, &itemsWritten, allowNulls, cchWideChar > inputLength,
+            [](size_t req_size, void* custom_alloc_data)
+            {
+                cookie* callbackCookie = (cookie*)(custom_alloc_data);
+                *(callbackCookie->allocSize) = req_size;
+                int count = (size_t)*(callbackCookie->count);
+                return (void*)(callbackCookie->str && !(count && *(callbackCookie->allocSize) - 1 > (size_t)count) ? callbackCookie->str : NULL);
+            }, &callbackCookie, &gerror);
+
+        if (gerror && (lpMultiByteStr || (cbMultiByte && allocSize > (size_t)cbMultiByte)))
+        {
+            retval = 0;
+            ERROR ("The error is %d %s\n", gerror->code, gerror->message);
+            switch (gerror->code)
+            {
+                case G_CONVERT_ERROR_ILLEGAL_SEQUENCE: SetLastError(ERROR_NO_UNICODE_TRANSLATION); break;
+                case G_CONVERT_ERROR_NO_MEMORY: SetLastError(ERROR_INSUFFICIENT_BUFFER); break;
+                default: SetLastError(ERROR_INVALID_PARAMETER); break;
+            }
+            free(gerror);
+            goto EXIT;
+        }
+
+        retval = allocSize;
+        if (retval > 1  && subtractOne) retval -= 1;
+
         goto EXIT;
     }
 
@@ -374,7 +445,7 @@ WideCharToMultiByte(
 
     LOGEXIT("WideCharToMultiByte returns INT %d\n", retval);
     PERF_EXIT(WideCharToMultiByte);
-    return retval;
+    return (int)retval;
 }
 
 extern char * g_szCoreCLRPath;
diff --git a/src/coreclr/pal/src/locale/utf8.cpp b/src/coreclr/pal/src/locale/utf8.cpp
deleted file mode 100644
index f07c69ff7e15f3..00000000000000
--- a/src/coreclr/pal/src/locale/utf8.cpp
+++ /dev/null
@@ -1,2937 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-Module Name:
-
-    unicode/utf8.c
-
-Abstract:
-    Functions to encode and decode UTF-8 strings. This is a port of the C# version from Utf8Encoding.cs.
-
-Revision History:
-
---*/
-
-#include "pal/utf8.h"
-#include "pal/malloc.hpp"
-
-using namespace CorUnix;
-
-#define FASTLOOP
-
-struct CharUnicodeInfo
-{
-    static const WCHAR HIGH_SURROGATE_START = 0xd800;
-    static const WCHAR HIGH_SURROGATE_END = 0xdbff;
-    static const WCHAR LOW_SURROGATE_START = 0xdc00;
-    static const WCHAR LOW_SURROGATE_END = 0xdfff;
-};
-
-struct Char
-{
-    // Test if the wide character is a high surrogate
-    static bool IsHighSurrogate(const WCHAR c)
-    {
-        return (c & 0xFC00) == CharUnicodeInfo::HIGH_SURROGATE_START;
-    }
-
-    // Test if the wide character is a low surrogate
-    static bool IsLowSurrogate(const WCHAR c)
-    {
-        return (c & 0xFC00) == CharUnicodeInfo::LOW_SURROGATE_START;
-    }
-
-    // Test if the wide character is a surrogate half
-    static bool IsSurrogate(const WCHAR c)
-    {
-        return (c & 0xF800) == CharUnicodeInfo::HIGH_SURROGATE_START;
-    }
-
-    // Test if the wide character is a high surrogate
-    static bool IsHighSurrogate(const WCHAR* s, int index)
-    {
-        return IsHighSurrogate(s[index]);
-    }
-
-    // Test if the wide character is a low surrogate
-    static bool IsLowSurrogate(const WCHAR* s, int index)
-    {
-        return IsLowSurrogate(s[index]);
-    }
-
-    // Test if the wide character is a surrogate half
-    static bool IsSurrogate(const WCHAR* s, int index)
-    {
-        return IsSurrogate(s[index]);
-    }
-};
-
-class ArgumentException
-{
-
-public:
-    ArgumentException(LPCSTR message)
-    {
-    }
-
-    ArgumentException(LPCSTR message, LPCSTR argName)
-    {
-    }
-};
-
-class ArgumentNullException : public ArgumentException
-{
-public:
-    ArgumentNullException(LPCSTR argName)
-        : ArgumentException("Argument is NULL", argName)
-    {
-
-    }
-};
-
-class ArgumentOutOfRangeException : public ArgumentException
-{
-public:
-    ArgumentOutOfRangeException(LPCSTR argName, LPCSTR message)
-        : ArgumentException(message, argName)
-    {
-
-    }
-};
-
-class InsufficientBufferException : public ArgumentException
-{
-public:
-    InsufficientBufferException(LPCSTR message, LPCSTR argName)
-        : ArgumentException(message, argName)
-    {
-
-    }
-};
-
-class Contract
-{
-public:
-    static void Assert(bool cond, LPCSTR str)
-    {
-        if (!cond)
-        {
-            throw ArgumentException(str);
-        }
-    }
-
-    static void EndContractBlock()
-    {
-    }
-};
-
-class DecoderFallbackException : public ArgumentException
-{
-    BYTE *bytesUnknown;
-    int index;
-
-public:
-    DecoderFallbackException(
-        LPCSTR message, BYTE bytesUnknown[], int index) : ArgumentException(message)
-    {
-        this->bytesUnknown = bytesUnknown;
-        this->index = index;
-    }
-
-    BYTE *BytesUnknown()
-    {
-        return (bytesUnknown);
-    }
-
-    int GetIndex()
-    {
-        return index;
-    }
-};
-
-class DecoderFallbackBuffer;
-
-class DecoderFallback
-{
-public:
-
-    // Fallback
-    //
-    // Return the appropriate unicode string alternative to the character that need to fall back.
-
-    virtual DecoderFallbackBuffer* CreateFallbackBuffer() = 0;
-
-    // Maximum number of characters that this instance of this fallback could return
-
-    virtual int GetMaxCharCount() = 0;
-};
-
-class DecoderReplacementFallback : public DecoderFallback
-{
-    // Our variables
-    WCHAR strDefault[2];
-    int strDefaultLength;
-
-public:
-    // Construction.  Default replacement fallback uses no best fit and ? replacement string
-    DecoderReplacementFallback() : DecoderReplacementFallback(W("?"))
-    {
-    }
-
-    DecoderReplacementFallback(const WCHAR* replacement)
-    {
-        // Must not be null
-        if (replacement == nullptr)
-            throw ArgumentNullException("replacement");
-        Contract::EndContractBlock();
-
-        // Make sure it doesn't have bad surrogate pairs
-        bool bFoundHigh = false;
-        int replacementLength = PAL_wcslen((const WCHAR *)replacement);
-        for (int i = 0; i < replacementLength; i++)
-        {
-            // Found a surrogate?
-            if (Char::IsSurrogate(replacement, i))
-            {
-                // High or Low?
-                if (Char::IsHighSurrogate(replacement, i))
-                {
-                    // if already had a high one, stop
-                    if (bFoundHigh)
-                        break;  // break & throw at the bFoundHIgh below
-                    bFoundHigh = true;
-                }
-                else
-                {
-                    // Low, did we have a high?
-                    if (!bFoundHigh)
-                    {
-                        // Didn't have one, make if fail when we stop
-                        bFoundHigh = true;
-                        break;
-                    }
-
-                    // Clear flag
-                    bFoundHigh = false;
-                }
-            }
-            // If last was high we're in trouble (not surrogate so not low surrogate, so break)
-            else if (bFoundHigh)
-                break;
-        }
-        if (bFoundHigh)
-            throw ArgumentException("String 'replacement' contains invalid Unicode code points.", "replacement");
-
-        wcscpy_s(strDefault, ARRAY_SIZE(strDefault), replacement);
-        strDefaultLength = replacementLength;
-    }
-
-    WCHAR* GetDefaultString()
-    {
-        return strDefault;
-    }
-
-    virtual DecoderFallbackBuffer* CreateFallbackBuffer();
-
-    // Maximum number of characters that this instance of this fallback could return
-    virtual int GetMaxCharCount()
-    {
-        return strDefaultLength;
-    }
-};
-
-class DecoderFallbackBuffer
-{
-    friend class UTF8Encoding;
-    // Most implementations will probably need an implementation-specific constructor
-
-    // internal methods that cannot be overridden that let us do our fallback thing
-    // These wrap the internal methods so that we can check for people doing stuff that's incorrect
-
-public:
-    virtual ~DecoderFallbackBuffer() = default;
-
-    virtual bool Fallback(BYTE bytesUnknown[], int index, int size) = 0;
-
-    // Get next character
-    virtual WCHAR GetNextChar() = 0;
-
-    //Back up a character
-    virtual bool MovePrevious() = 0;
-
-    // How many chars left in this fallback?
-    virtual int GetRemaining() = 0;
-
-    // Clear the buffer
-    virtual void Reset()
-    {
-        while (GetNextChar() != (WCHAR)0);
-    }
-
-    // Internal items to help us figure out what we're doing as far as error messages, etc.
-    // These help us with our performance and messages internally
-protected:
-    BYTE*           byteStart;
-    WCHAR*          charEnd;
-
-    // Internal reset
-    void InternalReset()
-    {
-        byteStart = nullptr;
-        Reset();
-    }
-
-    // Set the above values
-    // This can't be part of the constructor because EncoderFallbacks would have to know how to implement these.
-    void InternalInitialize(BYTE* byteStart, WCHAR* charEnd)
-    {
-        this->byteStart = byteStart;
-        this->charEnd = charEnd;
-    }
-
-    // Fallback the current byte by sticking it into the remaining char buffer.
-    // This can only be called by our encodings (other have to use the public fallback methods), so
-    // we can use our DecoderNLS here too (except we don't).
-    // Returns true if we are successful, false if we can't fallback the character (no buffer space)
-    // So caller needs to throw buffer space if return false.
-    // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
-    // array, and we might need the index, hence the byte*
-    // Don't touch ref chars unless we succeed
-    virtual bool InternalFallback(BYTE bytes[], BYTE* pBytes, WCHAR** chars, int size)
-    {
-
-        Contract::Assert(byteStart != nullptr, "[DecoderFallback.InternalFallback]Used InternalFallback without calling InternalInitialize");
-
-        // See if there's a fallback character and we have an output buffer then copy our string.
-        if (this->Fallback(bytes, (int)(pBytes - byteStart - size), size))
-        {
-            // Copy the chars to our output
-            WCHAR ch;
-            WCHAR* charTemp = *chars;
-            bool bHighSurrogate = false;
-            while ((ch = GetNextChar()) != 0)
-            {
-                // Make sure no mixed up surrogates
-                if (Char::IsSurrogate(ch))
-                {
-                    if (Char::IsHighSurrogate(ch))
-                    {
-                        // High Surrogate
-                        if (bHighSurrogate)
-                            throw ArgumentException("String 'chars' contains invalid Unicode code points.");
-                        bHighSurrogate = true;
-                    }
-                    else
-                    {
-                        // Low surrogate
-                        if (!bHighSurrogate)
-                            throw ArgumentException("String 'chars' contains invalid Unicode code points.");
-                        bHighSurrogate = false;
-                    }
-                }
-
-                if (charTemp >= charEnd)
-                {
-                    // No buffer space
-                    return false;
-                }
-
-                *(charTemp++) = ch;
-            }
-
-            // Need to make sure that bHighSurrogate isn't true
-            if (bHighSurrogate)
-                throw ArgumentException("String 'chars' contains invalid Unicode code points.");
-
-            // Now we aren't going to be false, so its OK to update chars
-            *chars = charTemp;
-        }
-
-        return true;
-    }
-
-    // This version just counts the fallback and doesn't actually copy anything.
-    virtual int InternalFallback(BYTE bytes[], BYTE* pBytes, int size)
-        // Right now this has both bytes[] and BYTE* bytes, since we might have extra bytes, hence the
-        // array, and we might need the index, hence the byte*
-    {
-
-        Contract::Assert(byteStart != nullptr, "[DecoderFallback.InternalFallback]Used InternalFallback without calling InternalInitialize");
-
-        // See if there's a fallback character and we have an output buffer then copy our string.
-        if (this->Fallback(bytes, (int)(pBytes - byteStart - size), size))
-        {
-            int count = 0;
-
-            WCHAR ch;
-            bool bHighSurrogate = false;
-            while ((ch = GetNextChar()) != 0)
-            {
-                // Make sure no mixed up surrogates
-                if (Char::IsSurrogate(ch))
-                {
-                    if (Char::IsHighSurrogate(ch))
-                    {
-                        // High Surrogate
-                        if (bHighSurrogate)
-                            throw ArgumentException("String 'chars' contains invalid Unicode code points.");
-                        bHighSurrogate = true;
-                    }
-                    else
-                    {
-                        // Low surrogate
-                        if (!bHighSurrogate)
-                            throw ArgumentException("String 'chars' contains invalid Unicode code points.");
-                        bHighSurrogate = false;
-                    }
-                }
-
-                count++;
-            }
-
-            // Need to make sure that bHighSurrogate isn't true
-            if (bHighSurrogate)
-                throw ArgumentException("String 'chars' contains invalid Unicode code points.");
-
-            return count;
-        }
-
-        // If no fallback return 0
-        return 0;
-    }
-
-    // private helper methods
-    void ThrowLastBytesRecursive(BYTE bytesUnknown[])
-    {
-        throw ArgumentException("Recursive fallback not allowed");
-    }
-};
-
-class DecoderReplacementFallbackBuffer : public DecoderFallbackBuffer
-{
-    // Store our default string
-    WCHAR strDefault[2];
-    int strDefaultLength;
-    int fallbackCount = -1;
-    int fallbackIndex = -1;
-
-public:
-    // Construction
-    DecoderReplacementFallbackBuffer(DecoderReplacementFallback* fallback)
-    {
-        wcscpy_s(strDefault, ARRAY_SIZE(strDefault), fallback->GetDefaultString());
-        strDefaultLength = PAL_wcslen((const WCHAR *)fallback->GetDefaultString());
-    }
-
-    // Fallback Methods
-    virtual bool Fallback(BYTE bytesUnknown[], int index, int size)
-    {
-        // We expect no previous fallback in our buffer
-        // We can't call recursively but others might (note, we don't test on last char!!!)
-        if (fallbackCount >= 1)
-        {
-            ThrowLastBytesRecursive(bytesUnknown);
-        }
-
-        // Go ahead and get our fallback
-        if (strDefaultLength == 0)
-            return false;
-
-        fallbackCount = strDefaultLength;
-        fallbackIndex = -1;
-
-        return true;
-    }
-
-    virtual WCHAR GetNextChar()
-    {
-        // We want it to get < 0 because == 0 means that the current/last character is a fallback
-        // and we need to detect recursion.  We could have a flag but we already have this counter.
-        fallbackCount--;
-        fallbackIndex++;
-
-        // Do we have anything left? 0 is now last fallback char, negative is nothing left
-        if (fallbackCount < 0)
-            return '\0';
-
-        // Need to get it out of the buffer.
-        // Make sure it didn't wrap from the fast count-- path
-        if (fallbackCount == INT_MAX)
-        {
-            fallbackCount = -1;
-            return '\0';
-        }
-
-        // Now make sure its in the expected range
-        Contract::Assert(fallbackIndex < strDefaultLength && fallbackIndex >= 0,
-            "Index exceeds buffer range");
-
-        return strDefault[fallbackIndex];
-    }
-
-    virtual bool MovePrevious()
-    {
-        // Back up one, only if we just processed the last character (or earlier)
-        if (fallbackCount >= -1 && fallbackIndex >= 0)
-        {
-            fallbackIndex--;
-            fallbackCount++;
-            return true;
-        }
-
-        // Return false 'cause we couldn't do it.
-        return false;
-    }
-
-    // How many characters left to output?
-    virtual int GetRemaining()
-    {
-        // Our count is 0 for 1 character left.
-        return (fallbackCount < 0) ? 0 : fallbackCount;
-    }
-
-    // Clear the buffer
-    virtual void Reset()
-    {
-        fallbackCount = -1;
-        fallbackIndex = -1;
-        byteStart = nullptr;
-    }
-
-    // This version just counts the fallback and doesn't actually copy anything.
-    virtual int InternalFallback(BYTE bytes[], BYTE* pBytes, int size)
-        // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
-        // array, and we might need the index, hence the byte*
-    {
-        // return our replacement string Length
-        return strDefaultLength;
-    }
-};
-
-class DecoderExceptionFallbackBuffer : public DecoderFallbackBuffer
-{
-public:
-    DecoderExceptionFallbackBuffer()
-    {
-    }
-
-    virtual bool Fallback(BYTE bytesUnknown[], int index, int size)
-    {
-        throw DecoderFallbackException(
-            "Unable to translate UTF-8 character to Unicode", bytesUnknown, index);
-    }
-
-    virtual WCHAR GetNextChar()
-    {
-        return 0;
-    }
-
-    virtual bool MovePrevious()
-    {
-        // Exception fallback doesn't have anywhere to back up to.
-        return false;
-    }
-
-    // Exceptions are always empty
-    virtual int GetRemaining()
-    {
-        return 0;
-    }
-
-};
-
-class DecoderExceptionFallback : public DecoderFallback
-{
-    // Construction
-public:
-    DecoderExceptionFallback()
-    {
-    }
-
-    virtual DecoderFallbackBuffer* CreateFallbackBuffer()
-    {
-        return InternalNew<DecoderExceptionFallbackBuffer>();
-    }
-
-    // Maximum number of characters that this instance of this fallback could return
-    virtual int GetMaxCharCount()
-    {
-        return 0;
-    }
-};
-
-DecoderFallbackBuffer* DecoderReplacementFallback::CreateFallbackBuffer()
-{
-    return InternalNew<DecoderReplacementFallbackBuffer>(this);
-}
-
-class EncoderFallbackException : public ArgumentException
-{
-    WCHAR   charUnknown;
-    WCHAR   charUnknownHigh;
-    WCHAR   charUnknownLow;
-    int     index;
-
-public:
-    EncoderFallbackException(
-        LPCSTR message, WCHAR charUnknown, int index) : ArgumentException(message)
-    {
-        this->charUnknown = charUnknown;
-        this->index = index;
-    }
-
-    EncoderFallbackException(
-        LPCSTR message, WCHAR charUnknownHigh, WCHAR charUnknownLow, int index) : ArgumentException(message)
-    {
-        if (!Char::IsHighSurrogate(charUnknownHigh))
-        {
-            throw ArgumentOutOfRangeException("charUnknownHigh",
-                "Argument out of range 0xD800..0xDBFF");
-        }
-        if (!Char::IsLowSurrogate(charUnknownLow))
-        {
-            throw ArgumentOutOfRangeException("charUnknownLow",
-                "Argument out of range 0xDC00..0xDFFF");
-        }
-        Contract::EndContractBlock();
-
-        this->charUnknownHigh = charUnknownHigh;
-        this->charUnknownLow = charUnknownLow;
-        this->index = index;
-    }
-
-    WCHAR GetCharUnknown()
-    {
-        return (charUnknown);
-    }
-
-    WCHAR GetCharUnknownHigh()
-    {
-        return (charUnknownHigh);
-    }
-
-    WCHAR GetCharUnknownLow()
-    {
-        return (charUnknownLow);
-    }
-
-    int GetIndex()
-    {
-        return index;
-    }
-
-    // Return true if the unknown character is a surrogate pair.
-    bool IsUnknownSurrogate()
-    {
-        return (charUnknownHigh != '\0');
-    }
-};
-
-class EncoderFallbackBuffer;
-
-class EncoderFallback
-{
-public:
-
-    // Fallback
-    //
-    // Return the appropriate unicode string alternative to the character that need to fall back.
-
-    virtual EncoderFallbackBuffer* CreateFallbackBuffer() = 0;
-
-    // Maximum number of characters that this instance of this fallback could return
-    virtual int GetMaxCharCount() = 0;
-};
-
-class EncoderReplacementFallback : public EncoderFallback
-{
-    // Our variables
-    WCHAR strDefault[2];
-    int strDefaultLength;
-
-public:
-    // Construction.  Default replacement fallback uses no best fit and ? replacement string
-    EncoderReplacementFallback() : EncoderReplacementFallback(W("?"))
-    {
-    }
-
-    EncoderReplacementFallback(const WCHAR* replacement)
-    {
-        // Must not be null
-        if (replacement == nullptr)
-            throw ArgumentNullException("replacement");
-        Contract::EndContractBlock();
-
-        // Make sure it doesn't have bad surrogate pairs
-        bool bFoundHigh = false;
-        int replacementLength = PAL_wcslen((const WCHAR *)replacement);
-        for (int i = 0; i < replacementLength; i++)
-        {
-            // Found a surrogate?
-            if (Char::IsSurrogate(replacement, i))
-            {
-                // High or Low?
-                if (Char::IsHighSurrogate(replacement, i))
-                {
-                    // if already had a high one, stop
-                    if (bFoundHigh)
-                        break;  // break & throw at the bFoundHIgh below
-                    bFoundHigh = true;
-                }
-                else
-                {
-                    // Low, did we have a high?
-                    if (!bFoundHigh)
-                    {
-                        // Didn't have one, make if fail when we stop
-                        bFoundHigh = true;
-                        break;
-                    }
-
-                    // Clear flag
-                    bFoundHigh = false;
-                }
-            }
-            // If last was high we're in trouble (not surrogate so not low surrogate, so break)
-            else if (bFoundHigh)
-                break;
-        }
-        if (bFoundHigh)
-            throw ArgumentException("String 'replacement' contains invalid Unicode code points.", "replacement");
-
-        wcscpy_s(strDefault, ARRAY_SIZE(strDefault), replacement);
-        strDefaultLength = replacementLength;
-    }
-
-    WCHAR* GetDefaultString()
-    {
-        return strDefault;
-    }
-
-    virtual EncoderFallbackBuffer* CreateFallbackBuffer();
-
-    // Maximum number of characters that this instance of this fallback could return
-    virtual int GetMaxCharCount()
-    {
-        return strDefaultLength;
-    }
-};
-
-class EncoderFallbackBuffer
-{
-    friend class UTF8Encoding;
-    // Most implementations will probably need an implementation-specific constructor
-
-    // Public methods that cannot be overridden that let us do our fallback thing
-    // These wrap the internal methods so that we can check for people doing stuff that is incorrect
-
-public:
-    virtual ~EncoderFallbackBuffer() = default;
-
-    virtual bool Fallback(WCHAR charUnknown, int index) = 0;
-
-    virtual bool Fallback(WCHAR charUnknownHigh, WCHAR charUnknownLow, int index) = 0;
-
-    // Get next character
-    virtual WCHAR GetNextChar() = 0;
-
-    // Back up a character
-    virtual bool MovePrevious() = 0;
-
-    // How many chars left in this fallback?
-    virtual int GetRemaining() = 0;
-
-    // Not sure if this should be public or not.
-    // Clear the buffer
-    virtual void Reset()
-    {
-        while (GetNextChar() != (WCHAR)0);
-    }
-
-    // Internal items to help us figure out what we're doing as far as error messages, etc.
-    // These help us with our performance and messages internally
-protected:
-    WCHAR*          charStart;
-    WCHAR*          charEnd;
-    bool            setEncoder;
-    bool            bUsedEncoder;
-    bool            bFallingBack = false;
-    int             iRecursionCount = 0;
-    static const int iMaxRecursion = 250;
-
-    // Internal Reset
-    // For example, what if someone fails a conversion and wants to reset one of our fallback buffers?
-    void InternalReset()
-    {
-        charStart = nullptr;
-        bFallingBack = false;
-        iRecursionCount = 0;
-        Reset();
-    }
-
-    // Set the above values
-    // This can't be part of the constructor because EncoderFallbacks would have to know how to implement these.
-    void InternalInitialize(WCHAR* charStart, WCHAR* charEnd, bool setEncoder)
-    {
-        this->charStart = charStart;
-        this->charEnd = charEnd;
-        this->setEncoder = setEncoder;
-        this->bUsedEncoder = false;
-        this->bFallingBack = false;
-        this->iRecursionCount = 0;
-    }
-
-    WCHAR InternalGetNextChar()
-    {
-        WCHAR ch = GetNextChar();
-        bFallingBack = (ch != 0);
-        if (ch == 0) iRecursionCount = 0;
-        return ch;
-    }
-
-    // Fallback the current character using the remaining buffer and encoder if necessary
-    // This can only be called by our encodings (other have to use the public fallback methods), so
-    // we can use our EncoderNLS here too.
-    // setEncoder is true if we're calling from a GetBytes method, false if we're calling from a GetByteCount
-    //
-    // Note that this could also change the contents of this->encoder, which is the same
-    // object that the caller is using, so the caller could mess up the encoder for us
-    // if they aren't careful.
-    virtual bool InternalFallback(WCHAR ch, WCHAR** chars)
-    {
-        // Shouldn't have null charStart
-        Contract::Assert(charStart != nullptr,
-            "[EncoderFallback.InternalFallbackBuffer]Fallback buffer is not initialized");
-
-        // Get our index, remember chars was preincremented to point at next char, so have to -1
-        int index = (int)(*chars - charStart) - 1;
-
-        // See if it was a high surrogate
-        if (Char::IsHighSurrogate(ch))
-        {
-            // See if there's a low surrogate to go with it
-            if (*chars >= this->charEnd)
-            {
-                // Nothing left in input buffer
-                // No input, return 0
-            }
-            else
-            {
-                // Might have a low surrogate
-                WCHAR cNext = **chars;
-                if (Char::IsLowSurrogate(cNext))
-                {
-                    // If already falling back then fail
-                    if (bFallingBack && iRecursionCount++ > iMaxRecursion)
-                        ThrowLastCharRecursive(ch, cNext);
-
-                    // Next is a surrogate, add it as surrogate pair, and increment chars
-                    (*chars)++;
-                    bFallingBack = Fallback(ch, cNext, index);
-                    return bFallingBack;
-                }
-
-                // Next isn't a low surrogate, just fallback the high surrogate
-            }
-        }
-
-        // If already falling back then fail
-        if (bFallingBack && iRecursionCount++ > iMaxRecursion)
-            ThrowLastCharRecursive((int)ch);
-
-        // Fall back our char
-        bFallingBack = Fallback(ch, index);
-
-        return bFallingBack;
-    }
-
-    // private helper methods
-    void ThrowLastCharRecursive(WCHAR highSurrogate, WCHAR lowSurrogate)
-    {
-        // Throw it, using our complete character
-        throw ArgumentException("Recursive fallback not allowed", "chars");
-    }
-
-    void ThrowLastCharRecursive(int utf32Char)
-    {
-        throw ArgumentException("Recursive fallback not allowed", "chars");
-    }
-
-};
-
-class EncoderReplacementFallbackBuffer : public EncoderFallbackBuffer
-{
-    // Store our default string
-    WCHAR strDefault[4];
-    int strDefaultLength;
-    int fallbackCount = -1;
-    int fallbackIndex = -1;
-public:
-    // Construction
-    EncoderReplacementFallbackBuffer(EncoderReplacementFallback* fallback)
-    {
-        // 2X in case we're a surrogate pair
-        wcscpy_s(strDefault, ARRAY_SIZE(strDefault), fallback->GetDefaultString());
-        wcscat_s(strDefault, ARRAY_SIZE(strDefault), fallback->GetDefaultString());
-        strDefaultLength = 2 * PAL_wcslen((const WCHAR *)fallback->GetDefaultString());
-
-    }
-
-    // Fallback Methods
-    virtual bool Fallback(WCHAR charUnknown, int index)
-    {
-        // If we had a buffer already we're being recursive, throw, it's probably at the suspect
-        // character in our array.
-        if (fallbackCount >= 1)
-        {
-            // If we're recursive we may still have something in our buffer that makes this a surrogate
-            if (Char::IsHighSurrogate(charUnknown) && fallbackCount >= 0 &&
-                Char::IsLowSurrogate(strDefault[fallbackIndex + 1]))
-                ThrowLastCharRecursive(charUnknown, strDefault[fallbackIndex + 1]);
-
-            // Nope, just one character
-            ThrowLastCharRecursive((int)charUnknown);
-        }
-
-        // Go ahead and get our fallback
-        // Divide by 2 because we aren't a surrogate pair
-        fallbackCount = strDefaultLength / 2;
-        fallbackIndex = -1;
-
-        return fallbackCount != 0;
-    }
-
-    virtual bool Fallback(WCHAR charUnknownHigh, WCHAR charUnknownLow, int index)
-    {
-        // Double check input surrogate pair
-        if (!Char::IsHighSurrogate(charUnknownHigh))
-            throw ArgumentOutOfRangeException("charUnknownHigh",
-            "Argument out of range 0xD800..0xDBFF");
-
-        if (!Char::IsLowSurrogate(charUnknownLow))
-            throw ArgumentOutOfRangeException("charUnknownLow",
-            "Argument out of range 0xDC00..0xDFFF");
-        Contract::EndContractBlock();
-
-        // If we had a buffer already we're being recursive, throw, it's probably at the suspect
-        // character in our array.
-        if (fallbackCount >= 1)
-            ThrowLastCharRecursive(charUnknownHigh, charUnknownLow);
-
-        // Go ahead and get our fallback
-        fallbackCount = strDefaultLength;
-        fallbackIndex = -1;
-
-        return fallbackCount != 0;
-    }
-
-    virtual WCHAR GetNextChar()
-    {
-        // We want it to get < 0 because == 0 means that the current/last character is a fallback
-        // and we need to detect recursion.  We could have a flag but we already have this counter.
-        fallbackCount--;
-        fallbackIndex++;
-
-        // Do we have anything left? 0 is now last fallback char, negative is nothing left
-        if (fallbackCount < 0)
-            return '\0';
-
-        // Need to get it out of the buffer.
-        // Make sure it didn't wrap from the fast count-- path
-        if (fallbackCount == INT_MAX)
-        {
-            fallbackCount = -1;
-            return '\0';
-        }
-
-        // Now make sure its in the expected range
-        Contract::Assert(fallbackIndex < strDefaultLength && fallbackIndex >= 0,
-            "Index exceeds buffer range");
-
-        return strDefault[fallbackIndex];
-    }
-
-    virtual bool MovePrevious()
-    {
-        // Back up one, only if we just processed the last character (or earlier)
-        if (fallbackCount >= -1 && fallbackIndex >= 0)
-        {
-            fallbackIndex--;
-            fallbackCount++;
-            return true;
-        }
-
-        // Return false 'cause we couldn't do it.
-        return false;
-    }
-
-    // How many characters left to output?
-    virtual int GetRemaining()
-    {
-        // Our count is 0 for 1 character left.
-        return (fallbackCount < 0) ? 0 : fallbackCount;
-    }
-
-    // Clear the buffer
-    virtual void Reset()
-    {
-        fallbackCount = -1;
-        fallbackIndex = 0;
-        charStart = nullptr;
-        bFallingBack = false;
-    }
-};
-
-class EncoderExceptionFallbackBuffer : public EncoderFallbackBuffer
-{
-public:
-    EncoderExceptionFallbackBuffer()
-    {
-    }
-
-    virtual bool Fallback(WCHAR charUnknown, int index)
-    {
-        // Fall back our char
-        throw EncoderFallbackException("Unable to translate Unicode character to UTF-8", charUnknown, index);
-    }
-
-    virtual bool Fallback(WCHAR charUnknownHigh, WCHAR charUnknownLow, int index)
-    {
-        if (!Char::IsHighSurrogate(charUnknownHigh))
-        {
-            throw ArgumentOutOfRangeException("charUnknownHigh",
-                "Argument out of range 0xD800..0xDBFF");
-        }
-        if (!Char::IsLowSurrogate(charUnknownLow))
-        {
-            throw ArgumentOutOfRangeException("charUnknownLow",
-                "Argument out of range 0xDC00..0xDFFF");
-        }
-        Contract::EndContractBlock();
-
-        //int iTemp = Char::ConvertToUtf32(charUnknownHigh, charUnknownLow);
-
-        // Fall back our char
-        throw EncoderFallbackException(
-            "Unable to translate Unicode character to UTF-8", charUnknownHigh, charUnknownLow, index);
-    }
-
-    virtual WCHAR GetNextChar()
-    {
-        return 0;
-    }
-
-    virtual bool MovePrevious()
-    {
-        // Exception fallback doesn't have anywhere to back up to.
-        return false;
-    }
-
-    // Exceptions are always empty
-    virtual int GetRemaining()
-    {
-        return 0;
-    }
-};
-
-class EncoderExceptionFallback : public EncoderFallback
-{
-    // Construction
-public:
-    EncoderExceptionFallback()
-    {
-    }
-
-    virtual EncoderFallbackBuffer* CreateFallbackBuffer()
-    {
-        return InternalNew<EncoderExceptionFallbackBuffer>();
-    }
-
-    // Maximum number of characters that this instance of this fallback could return
-    virtual int GetMaxCharCount()
-    {
-        return 0;
-    }
-};
-
-EncoderFallbackBuffer* EncoderReplacementFallback::CreateFallbackBuffer()
-{
-    return InternalNew<EncoderReplacementFallbackBuffer>(this);
-}
-
-class UTF8Encoding
-{
-    EncoderFallback* encoderFallback;
-    // Instances of the two possible fallbacks. The constructor parameter
-    // determines which one to use.
-    EncoderReplacementFallback encoderReplacementFallback;
-    EncoderExceptionFallback encoderExceptionFallback;
-
-    DecoderFallback* decoderFallback;
-    // Instances of the two possible fallbacks. The constructor parameter
-    // determines which one to use.
-    DecoderReplacementFallback decoderReplacementFallback;
-    DecoderExceptionFallback decoderExceptionFallback;
-
-    bool InRange(int c, int begin, int end)
-    {
-        return begin <= c && c <= end;
-    }
-
-    size_t PtrDiff(WCHAR* ptr1, WCHAR* ptr2)
-    {
-        return ptr1 - ptr2;
-    }
-
-    size_t PtrDiff(BYTE* ptr1, BYTE* ptr2)
-    {
-        return ptr1 - ptr2;
-    }
-
-    void ThrowBytesOverflow()
-    {
-        // Special message to include fallback type in case fallback's GetMaxCharCount is broken
-        // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
-        throw InsufficientBufferException("The output byte buffer is too small to contain the encoded data", "bytes");
-    }
-
-    void ThrowBytesOverflow(bool nothingEncoded)
-    {
-        // Special message to include fallback type in case fallback's GetMaxCharCount is broken
-        // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
-        if (nothingEncoded){
-            ThrowBytesOverflow();
-        }
-    }
-
-    void ThrowCharsOverflow()
-    {
-        // Special message to include fallback type in case fallback's GetMaxCharCount is broken
-        // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
-        throw InsufficientBufferException("The output char buffer is too small to contain the encoded data", "chars");
-    }
-
-    void ThrowCharsOverflow(bool nothingEncoded)
-    {
-        // Special message to include fallback type in case fallback's GetMaxCharCount is broken
-        // This happens if user has implemented an decoder fallback with a broken GetMaxCharCount
-        if (nothingEncoded){
-            ThrowCharsOverflow();
-        }
-    }
-
-    // During GetChars we had an invalid byte sequence
-    // pSrc is backed up to the start of the bad sequence if we didn't have room to
-    // fall it back.  Otherwise pSrc remains where it is.
-    bool FallbackInvalidByteSequence(BYTE** pSrc, int ch, DecoderFallbackBuffer* fallback, WCHAR** pTarget)
-    {
-        // Get our byte[]
-        BYTE* pStart = *pSrc;
-        BYTE bytesUnknown[3];
-        int size = GetBytesUnknown(pStart, ch, bytesUnknown);
-
-        // Do the actual fallback
-        if (!fallback->InternalFallback(bytesUnknown, *pSrc, pTarget, size))
-        {
-            // Oops, it failed, back up to pStart
-            *pSrc = pStart;
-            return false;
-        }
-
-        // It worked
-        return true;
-    }
-
-    int FallbackInvalidByteSequence(BYTE* pSrc, int ch, DecoderFallbackBuffer *fallback)
-    {
-        // Get our byte[]
-        BYTE bytesUnknown[3];
-        int size = GetBytesUnknown(pSrc, ch, bytesUnknown);
-
-        // Do the actual fallback
-        int count = fallback->InternalFallback(bytesUnknown, pSrc, size);
-
-        // # of fallback chars expected.
-        // Note that we only get here for "long" sequences, and have already unreserved
-        // the count that we prereserved for the input bytes
-        return count;
-    }
-
-    int GetBytesUnknown(BYTE* pSrc, int ch, BYTE* bytesUnknown)
-    {
-        int size;
-
-        // See if it was a plain char
-        // (have to check >= 0 because we have all sorts of weird bit flags)
-        if (ch < 0x100 && ch >= 0)
-        {
-            pSrc--;
-            bytesUnknown[0] = (BYTE)ch;
-            size =  1;
-        }
-        // See if its an unfinished 2 byte sequence
-        else if ((ch & (SupplimentarySeq | ThreeByteSeq)) == 0)
-        {
-            pSrc--;
-            bytesUnknown[0] = (BYTE)((ch & 0x1F) | 0xc0);
-            size = 1;
-        }
-        // So now we're either 2nd byte of 3 or 4 byte sequence or
-        // we hit a non-trail byte or we ran out of space for 3rd byte of 4 byte sequence
-        // 1st check if its a 4 byte sequence
-        else if ((ch & SupplimentarySeq) != 0)
-        {
-            //  3rd byte of 4 byte sequence?
-            if ((ch & (FinalByte >> 6)) != 0)
-            {
-                // 3rd byte of 4 byte sequence
-                pSrc -= 3;
-                bytesUnknown[0] = (BYTE)(((ch >> 12) & 0x07) | 0xF0);
-                bytesUnknown[1] = (BYTE)(((ch >> 6) & 0x3F) | 0x80);
-                bytesUnknown[2] = (BYTE)(((ch)& 0x3F) | 0x80);
-                size = 3;
-            }
-            else if ((ch & (FinalByte >> 12)) != 0)
-            {
-                // 2nd byte of a 4 byte sequence
-                pSrc -= 2;
-                bytesUnknown[0] = (BYTE)(((ch >> 6) & 0x07) | 0xF0);
-                bytesUnknown[1] = (BYTE)(((ch)& 0x3F) | 0x80);
-                size = 2;
-            }
-            else
-            {
-                // 4th byte of a 4 byte sequence
-                pSrc--;
-                bytesUnknown[0] = (BYTE)(((ch)& 0x07) | 0xF0);
-                size = 1;
-            }
-        }
-        else
-        {
-            // 2nd byte of 3 byte sequence?
-            if ((ch & (FinalByte >> 6)) != 0)
-            {
-                // So its 2nd byte of a 3 byte sequence
-                pSrc -= 2;
-                bytesUnknown[0] = (BYTE)(((ch >> 6) & 0x0F) | 0xE0);
-                bytesUnknown[1] = (BYTE)(((ch)& 0x3F) | 0x80);
-                size = 2;
-            }
-            else
-            {
-                // 1st byte of a 3 byte sequence
-                pSrc--;
-                bytesUnknown[0] = (BYTE)(((ch)& 0x0F) | 0xE0);
-                size = 1;
-            }
-        }
-
-        return size;
-    }
-
-public:
-
-    UTF8Encoding(bool isThrowException)
-        : encoderReplacementFallback(W("\xFFFD")), decoderReplacementFallback(W("\xFFFD"))
-    {
-        if (isThrowException)
-        {
-            encoderFallback = &encoderExceptionFallback;
-            decoderFallback = &decoderExceptionFallback;
-        }
-        else
-        {
-            encoderFallback = &encoderReplacementFallback;
-            decoderFallback = &decoderReplacementFallback;
-        }
-    }
-
-    // These are bitmasks used to maintain the state in the decoder. They occupy the higher bits
-    // while the actual character is being built in the lower bits. They are shifted together
-    // with the actual bits of the character.
-
-    // bits 30 & 31 are used for pending bits fixup
-    const int FinalByte = 1 << 29;
-    const int SupplimentarySeq = 1 << 28;
-    const int ThreeByteSeq = 1 << 27;
-
-    int GetCharCount(BYTE* bytes, int count)
-    {
-        Contract::Assert(bytes != nullptr, "[UTF8Encoding.GetCharCount]bytes!=nullptr");
-        Contract::Assert(count >= 0, "[UTF8Encoding.GetCharCount]count >=0");
-
-        // Initialize stuff
-        BYTE *pSrc = bytes;
-        BYTE *pEnd = pSrc + count;
-
-        // Start by assuming we have as many as count, charCount always includes the adjustment
-        // for the character being decoded
-        int charCount = count;
-        int ch = 0;
-        DecoderFallbackBuffer *fallback = nullptr;
-
-        while (true)
-        {
-            // SLOWLOOP: does all range checks, handles all special cases, but it is slow
-            if (pSrc >= pEnd) {
-                break;
-            }
-
-            // read next byte. The JIT optimization seems to be getting confused when
-            // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
-            int cha = *pSrc;
-
-            if (ch == 0) {
-                // no pending bits
-                goto ReadChar;
-            }
-
-            pSrc++;
-
-            // we are expecting to see trailing bytes like 10vvvvvv
-            if ((cha & 0xC0) != 0x80) {
-                // This can be a valid starting byte for another UTF8 byte sequence, so let's put
-                // the current byte back, and try to see if this is a valid byte for another UTF8 byte sequence
-                pSrc--;
-                charCount += (ch >> 30);
-                goto InvalidByteSequence;
-            }
-
-            // fold in the new byte
-            ch = (ch << 6) | (cha & 0x3F);
-
-            if ((ch & FinalByte) == 0) {
-                Contract::Assert((ch & (SupplimentarySeq | ThreeByteSeq)) != 0,
-                    "[UTF8Encoding.GetChars]Invariant volation");
-
-                if ((ch & SupplimentarySeq) != 0) {
-                    if ((ch & (FinalByte >> 6)) != 0) {
-                        // this is 3rd byte (of 4 byte supplimentary) - nothing to do
-                        continue;
-                    }
-
-                    // 2nd byte, check for non-shortest form of supplimentary char and the valid
-                    // supplimentary characters in range 0x010000 - 0x10FFFF at the same time
-                    if (!InRange(ch & 0x1F0, 0x10, 0x100)) {
-                        goto InvalidByteSequence;
-                    }
-                }
-                else {
-                    // Must be 2nd byte of a 3-byte sequence
-                    // check for non-shortest form of 3 byte seq
-                    if ((ch & (0x1F << 5)) == 0 ||                  // non-shortest form
-                        (ch & (0xF800 >> 6)) == (0xD800 >> 6))     // illegal individually encoded surrogate
-                    {
-                        goto InvalidByteSequence;
-                    }
-                }
-                continue;
-            }
-
-            // ready to punch
-
-            // adjust for surrogates in non-shortest form
-            if ((ch & (SupplimentarySeq | 0x1F0000)) == SupplimentarySeq) {
-                charCount--;
-            }
-            goto EncodeChar;
-
-        InvalidByteSequence:
-            // this code fragment should be close to the gotos referencing it
-            // Have to do fallback for invalid bytes
-            if (fallback == nullptr)
-            {
-                fallback = decoderFallback->CreateFallbackBuffer();
-                fallback->InternalInitialize(bytes, nullptr);
-            }
-            charCount += FallbackInvalidByteSequence(pSrc, ch, fallback);
-
-            ch = 0;
-            continue;
-
-        ReadChar:
-            ch = *pSrc;
-            pSrc++;
-
-        ProcessChar:
-            if (ch > 0x7F) {
-                // If its > 0x7F, its start of a new multi-byte sequence
-
-                // Long sequence, so unreserve our char.
-                charCount--;
-
-                // bit 6 has to be non-zero for start of multibyte chars.
-                if ((ch & 0x40) == 0) {
-                    // Unexpected trail byte
-                    goto InvalidByteSequence;
-                }
-
-                // start a new long code
-                if ((ch & 0x20) != 0) {
-                    if ((ch & 0x10) != 0) {
-                        // 4 byte encoding - supplimentary character (2 surrogates)
-
-                        ch &= 0x0F;
-
-                        // check that bit 4 is zero and the valid supplimentary character
-                        // range 0x000000 - 0x10FFFF at the same time
-                        if (ch > 0x04) {
-                            ch |= 0xf0;
-                            goto InvalidByteSequence;
-                        }
-
-                        // Add bit flags so that when we check new characters & rotate we'll be flagged correctly.
-                        // Final byte flag, count fix if we don't make final byte & supplimentary sequence flag.
-                        ch |= (FinalByte >> 3 * 6) |  // Final byte is 3 more bytes from now
-                            (1 << 30) |           // If it dies on next byte we'll need an extra char
-                            (3 << (30 - 2 * 6)) |     // If it dies on last byte we'll need to subtract a char
-                            (SupplimentarySeq) | (SupplimentarySeq >> 6) |
-                            (SupplimentarySeq >> 2 * 6) | (SupplimentarySeq >> 3 * 6);
-
-                        // Our character count will be 2 characters for these 4 bytes, so subtract another char
-                        charCount--;
-                    }
-                    else {
-                        // 3 byte encoding
-                        // Add bit flags so that when we check new characters & rotate we'll be flagged correctly.
-                        ch = (ch & 0x0F) | ((FinalByte >> 2 * 6) | (1 << 30) |
-                            (ThreeByteSeq) | (ThreeByteSeq >> 6) | (ThreeByteSeq >> 2 * 6));
-
-                        // We'll expect 1 character for these 3 bytes, so subtract another char.
-                        charCount--;
-                    }
-                }
-                else {
-                    // 2 byte encoding
-
-                    ch &= 0x1F;
-
-                    // check for non-shortest form
-                    if (ch <= 1) {
-                        ch |= 0xc0;
-                        goto InvalidByteSequence;
-                    }
-
-                    // Add bit flags so we'll be flagged correctly
-                    ch |= (FinalByte >> 6);
-                }
-                continue;
-            }
-
-        EncodeChar:
-
-#ifdef FASTLOOP
-            int availableBytes = PtrDiff(pEnd, pSrc);
-
-            // don't fall into the fast decoding loop if we don't have enough bytes
-            if (availableBytes <= 13) {
-                // try to get over the remainder of the ascii characters fast though
-            BYTE* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
-                while (pSrc < pLocalEnd) {
-                    ch = *pSrc;
-                    pSrc++;
-
-                    if (ch > 0x7F)
-                        goto ProcessChar;
-                }
-                // we are done
-                ch = 0;
-                break;
-            }
-
-            // To compute the upper bound, assume that all characters are ASCII characters at this point,
-            //  the boundary will be decreased for every non-ASCII character we encounter
-            // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
-            BYTE *pStop = pSrc + availableBytes - 7;
-
-            while (pSrc < pStop) {
-                ch = *pSrc;
-                pSrc++;
-
-                if (ch > 0x7F) {
-                    goto LongCode;
-                }
-
-                // get pSrc 2-byte aligned
-                if (((size_t)pSrc & 0x1) != 0) {
-                    ch = *pSrc;
-                    pSrc++;
-                    if (ch > 0x7F) {
-                        goto LongCode;
-                    }
-                }
-
-                // get pSrc 4-byte aligned
-                if (((size_t)pSrc & 0x2) != 0) {
-                    ch = *(USHORT*)pSrc;
-                    if ((ch & 0x8080) != 0) {
-                        goto LongCodeWithMask16;
-                    }
-                    pSrc += 2;
-                }
-
-
-                // Run 8 + 8 characters at a time!
-                while (pSrc < pStop) {
-                    ch = *(int*)pSrc;
-                    int chb = *(int*)(pSrc + 4);
-                    if (((ch | chb) & (int)0x80808080) != 0) {
-                        goto LongCodeWithMask32;
-                    }
-                    pSrc += 8;
-
-                    // This is a really small loop - unroll it
-                    if (pSrc >= pStop)
-                        break;
-
-                    ch = *(int*)pSrc;
-                    chb = *(int*)(pSrc + 4);
-                    if (((ch | chb) & (int)0x80808080) != 0) {
-                        goto LongCodeWithMask32;
-                    }
-                    pSrc += 8;
-                }
-                break;
-
-#if BIGENDIAN
-            LongCodeWithMask32 :
-                // be careful about the sign extension
-                ch = (int)(((uint)ch) >> 16);
-            LongCodeWithMask16:
-                ch = (int)(((uint)ch) >> 8);
-#else // BIGENDIAN
-            LongCodeWithMask32:
-            LongCodeWithMask16:
-                ch &= 0xFF;
-#endif // BIGENDIAN
-                pSrc++;
-                if (ch <= 0x7F) {
-                    continue;
-                }
-
-            LongCode:
-                int chc = *pSrc;
-                pSrc++;
-
-                if (
-                    // bit 6 has to be zero
-                    (ch & 0x40) == 0 ||
-                    // we are expecting to see trailing bytes like 10vvvvvv
-                    (chc & 0xC0) != 0x80)
-                {
-                    goto BadLongCode;
-                }
-
-                chc &= 0x3F;
-
-                // start a new long code
-                if ((ch & 0x20) != 0) {
-
-                    // fold the first two bytes together
-                    chc |= (ch & 0x0F) << 6;
-
-                    if ((ch & 0x10) != 0) {
-                        // 4 byte encoding - surrogate
-                        ch = *pSrc;
-                        if (
-                            // check that bit 4 is zero, the non-shortest form of surrogate
-                            // and the valid surrogate range 0x000000 - 0x10FFFF at the same time
-                            !InRange(chc >> 4, 0x01, 0x10) ||
-                            // we are expecting to see trailing bytes like 10vvvvvv
-                            (ch & 0xC0) != 0x80)
-                        {
-                            goto BadLongCode;
-                        }
-
-                        chc = (chc << 6) | (ch & 0x3F);
-
-                        ch = *(pSrc + 1);
-                        // we are expecting to see trailing bytes like 10vvvvvv
-                        if ((ch & 0xC0) != 0x80) {
-                            goto BadLongCode;
-                        }
-                        pSrc += 2;
-
-                        // extra byte
-                        charCount--;
-                    }
-                    else {
-                        // 3 byte encoding
-                        ch = *pSrc;
-                        if (
-                            // check for non-shortest form of 3 byte seq
-                            (chc & (0x1F << 5)) == 0 ||
-                            // Can't have surrogates here.
-                            (chc & (0xF800 >> 6)) == (0xD800 >> 6) ||
-                            // we are expecting to see trailing bytes like 10vvvvvv
-                            (ch & 0xC0) != 0x80)
-                        {
-                            goto BadLongCode;
-                        }
-                        pSrc++;
-
-                        // extra byte
-                        charCount--;
-                    }
-                }
-                else {
-                    // 2 byte encoding
-
-                    // check for non-shortest form
-                    if ((ch & 0x1E) == 0) {
-                        goto BadLongCode;
-                    }
-                }
-
-                // extra byte
-                charCount--;
-            }
-#endif // FASTLOOP
-
-            // no pending bits at this point
-            ch = 0;
-            continue;
-
-        BadLongCode:
-            pSrc -= 2;
-            ch = 0;
-            continue;
-        }
-
-        // May have a problem if we have to flush
-        if (ch != 0)
-        {
-            // We were already adjusting for these, so need to unadjust
-            charCount += (ch >> 30);
-            // Have to do fallback for invalid bytes
-            if (fallback == nullptr)
-            {
-                fallback = decoderFallback->CreateFallbackBuffer();
-                fallback->InternalInitialize(bytes, nullptr);
-            }
-            charCount += FallbackInvalidByteSequence(pSrc, ch, fallback);
-        }
-
-        // Shouldn't have anything in fallback buffer for GetCharCount
-        // (don't have to check m_throwOnOverflow for count)
-        Contract::Assert(fallback == nullptr || fallback->GetRemaining() == 0,
-            "[UTF8Encoding.GetCharCount]Expected empty fallback buffer at end");
-
-        InternalDelete(fallback);
-
-        return charCount;
-
-    }
-
-    int GetChars(BYTE* bytes, int byteCount, WCHAR* chars, int charCount)
-    {
-        Contract::Assert(chars != nullptr, "[UTF8Encoding.GetChars]chars!=nullptr");
-        Contract::Assert(byteCount >= 0, "[UTF8Encoding.GetChars]byteCount >=0");
-        Contract::Assert(charCount >= 0, "[UTF8Encoding.GetChars]charCount >=0");
-        Contract::Assert(bytes != nullptr, "[UTF8Encoding.GetChars]bytes!=nullptr");
-
-        BYTE *pSrc = bytes;
-        WCHAR *pTarget = chars;
-
-        BYTE *pEnd = pSrc + byteCount;
-        WCHAR *pAllocatedBufferEnd = pTarget + charCount;
-
-        int ch = 0;
-
-        DecoderFallbackBuffer *fallback = nullptr;
-
-        while (true)
-        {
-            // SLOWLOOP: does all range checks, handles all special cases, but it is slow
-
-            if (pSrc >= pEnd) {
-                break;
-            }
-
-            // read next byte. The JIT optimization seems to be getting confused when
-            // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
-            int cha = *pSrc;
-
-            if (ch == 0) {
-                // no pending bits
-                goto ReadChar;
-            }
-
-            pSrc++;
-
-            // we are expecting to see trailing bytes like 10vvvvvv
-            if ((cha & 0xC0) != 0x80) {
-                // This can be a valid starting byte for another UTF8 byte sequence, so let's put
-                // the current byte back, and try to see if this is a valid byte for another UTF8 byte sequence
-                pSrc--;
-                goto InvalidByteSequence;
-            }
-
-            // fold in the new byte
-            ch = (ch << 6) | (cha & 0x3F);
-
-            if ((ch & FinalByte) == 0) {
-                // Not at last byte yet
-                Contract::Assert((ch & (SupplimentarySeq | ThreeByteSeq)) != 0,
-                    "[UTF8Encoding.GetChars]Invariant volation");
-
-                if ((ch & SupplimentarySeq) != 0) {
-                    // Its a 4-byte supplimentary sequence
-                    if ((ch & (FinalByte >> 6)) != 0) {
-                        // this is 3rd byte of 4 byte sequence - nothing to do
-                        continue;
-                    }
-
-                    // 2nd byte of 4 bytes
-                    // check for non-shortest form of surrogate and the valid surrogate
-                    // range 0x000000 - 0x10FFFF at the same time
-                    if (!InRange(ch & 0x1F0, 0x10, 0x100)) {
-                        goto InvalidByteSequence;
-                    }
-                }
-                else {
-                    // Must be 2nd byte of a 3-byte sequence
-                    // check for non-shortest form of 3 byte seq
-                    if ((ch & (0x1F << 5)) == 0 ||                  // non-shortest form
-                        (ch & (0xF800 >> 6)) == (0xD800 >> 6))     // illegal individually encoded surrogate
-                    {
-                        goto InvalidByteSequence;
-                    }
-                }
-                continue;
-            }
-
-            // ready to punch
-
-            // surrogate in shortest form?
-            // Might be possible to get rid of this?  Already did non-shortest check for 4-byte sequence when reading 2nd byte?
-            if ((ch & (SupplimentarySeq | 0x1F0000)) > SupplimentarySeq) {
-                // let the range check for the second char throw the exception
-                if (pTarget < pAllocatedBufferEnd) {
-                    *pTarget = (WCHAR)(((ch >> 10) & 0x7FF) +
-                        (SHORT)((CharUnicodeInfo::HIGH_SURROGATE_START - (0x10000 >> 10))));
-                    pTarget++;
-
-                    ch = (ch & 0x3FF) +
-                        (int)(CharUnicodeInfo::LOW_SURROGATE_START);
-                }
-            }
-
-            goto EncodeChar;
-
-        InvalidByteSequence:
-            // this code fragment should be close to the gotos referencing it
-            // Have to do fallback for invalid bytes
-            if (fallback == nullptr)
-            {
-                fallback = decoderFallback->CreateFallbackBuffer();
-                fallback->InternalInitialize(bytes, pAllocatedBufferEnd);
-            }
-
-            // That'll back us up the appropriate # of bytes if we didn't get anywhere
-            if (!FallbackInvalidByteSequence(&pSrc, ch, fallback, &pTarget))
-            {
-                // Ran out of buffer space
-                // Need to throw an exception?
-                Contract::Assert(pSrc >= bytes || pTarget == chars,
-                    "[UTF8Encoding.GetChars]Expected to throw or remain in byte buffer after fallback");
-                fallback->InternalReset();
-                ThrowCharsOverflow(pTarget == chars);
-                ch = 0;
-                break;
-            }
-            Contract::Assert(pSrc >= bytes,
-                "[UTF8Encoding.GetChars]Expected invalid byte sequence to have remained within the byte array");
-            ch = 0;
-            continue;
-
-        ReadChar:
-            ch = *pSrc;
-            pSrc++;
-
-        ProcessChar:
-            if (ch > 0x7F) {
-                // If its > 0x7F, its start of a new multi-byte sequence
-
-                // bit 6 has to be non-zero
-                if ((ch & 0x40) == 0) {
-                    goto InvalidByteSequence;
-                }
-
-                // start a new long code
-                if ((ch & 0x20) != 0) {
-                    if ((ch & 0x10) != 0) {
-                        // 4 byte encoding - supplimentary character (2 surrogates)
-
-                        ch &= 0x0F;
-
-                        // check that bit 4 is zero and the valid supplimentary character
-                        // range 0x000000 - 0x10FFFF at the same time
-                        if (ch > 0x04) {
-                            ch |= 0xf0;
-                            goto InvalidByteSequence;
-                        }
-
-                        ch |= (FinalByte >> 3 * 6) | (1 << 30) | (3 << (30 - 2 * 6)) |
-                            (SupplimentarySeq) | (SupplimentarySeq >> 6) |
-                            (SupplimentarySeq >> 2 * 6) | (SupplimentarySeq >> 3 * 6);
-                    }
-                    else {
-                        // 3 byte encoding
-                        ch = (ch & 0x0F) | ((FinalByte >> 2 * 6) | (1 << 30) |
-                            (ThreeByteSeq) | (ThreeByteSeq >> 6) | (ThreeByteSeq >> 2 * 6));
-                    }
-                }
-                else {
-                    // 2 byte encoding
-
-                    ch &= 0x1F;
-
-                    // check for non-shortest form
-                    if (ch <= 1) {
-                        ch |= 0xc0;
-                        goto InvalidByteSequence;
-                    }
-
-                    ch |= (FinalByte >> 6);
-                }
-                continue;
-            }
-
-        EncodeChar:
-            // write the pending character
-            if (pTarget >= pAllocatedBufferEnd)
-            {
-                // Fix chars so we make sure to throw if we didn't output anything
-                ch &= 0x1fffff;
-                if (ch > 0x7f)
-                {
-                    if (ch > 0x7ff)
-                    {
-                        if (ch >= CharUnicodeInfo::LOW_SURROGATE_START &&
-                            ch <= CharUnicodeInfo::LOW_SURROGATE_END)
-                        {
-                            pSrc--;     // It was 4 bytes
-                            pTarget--;  // 1 was stored already, but we can't remember 1/2, so back up
-                        }
-                        else if (ch > 0xffff)
-                        {
-                            pSrc--;     // It was 4 bytes, nothing was stored
-                        }
-                        pSrc--;         // It was at least 3 bytes
-                    }
-                    pSrc--;             // It was at least 2 bytes
-                }
-                pSrc--;
-
-                // Throw that we don't have enough room (pSrc could be < chars if we had started to process
-                // a 4 byte sequence already)
-                Contract::Assert(pSrc >= bytes || pTarget == chars,
-                    "[UTF8Encoding.GetChars]Expected pSrc to be within input buffer or throw due to no output]");
-                ThrowCharsOverflow(pTarget == chars);
-
-                // Don't store ch in decoder, we already backed up to its start
-                ch = 0;
-
-                // Didn't throw, just use this buffer size.
-                break;
-            }
-            *pTarget = (WCHAR)ch;
-            pTarget++;
-
-#ifdef FASTLOOP
-            int availableChars = PtrDiff(pAllocatedBufferEnd, pTarget);
-            int availableBytes = PtrDiff(pEnd, pSrc);
-
-            // don't fall into the fast decoding loop if we don't have enough bytes
-            // Test for availableChars is done because pStop would be <= pTarget.
-            if (availableBytes <= 13) {
-                // we may need as many as 1 character per byte
-                if (availableChars < availableBytes) {
-                    // not enough output room.  no pending bits at this point
-                    ch = 0;
-                    continue;
-                }
-
-                // try to get over the remainder of the ascii characters fast though
-                BYTE* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
-                while (pSrc < pLocalEnd) {
-                    ch = *pSrc;
-                    pSrc++;
-
-                    if (ch > 0x7F)
-                        goto ProcessChar;
-
-                    *pTarget = (WCHAR)ch;
-                    pTarget++;
-                }
-                // we are done
-                ch = 0;
-                break;
-            }
-
-            // we may need as many as 1 character per byte, so reduce the byte count if necessary.
-            // If availableChars is too small, pStop will be before pTarget and we won't do fast loop.
-            if (availableChars < availableBytes) {
-                availableBytes = availableChars;
-            }
-
-            // To compute the upper bound, assume that all characters are ASCII characters at this point,
-            //  the boundary will be decreased for every non-ASCII character we encounter
-            // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
-            WCHAR *pStop = pTarget + availableBytes - 7;
-
-            while (pTarget < pStop) {
-                ch = *pSrc;
-                pSrc++;
-
-                if (ch > 0x7F) {
-                    goto LongCode;
-                }
-                *pTarget = (WCHAR)ch;
-                pTarget++;
-
-                // get pSrc to be 2-byte aligned
-                if ((((size_t)pSrc) & 0x1) != 0) {
-                    ch = *pSrc;
-                    pSrc++;
-                    if (ch > 0x7F) {
-                        goto LongCode;
-                    }
-                    *pTarget = (WCHAR)ch;
-                    pTarget++;
-                }
-
-                // get pSrc to be 4-byte aligned
-                if ((((size_t)pSrc) & 0x2) != 0) {
-                    ch = *(USHORT*)pSrc;
-                    if ((ch & 0x8080) != 0) {
-                        goto LongCodeWithMask16;
-                    }
-
-                    // Unfortunately, this is endianness sensitive
-#if BIGENDIAN
-                    *pTarget = (WCHAR)((ch >> 8) & 0x7F);
-                    pSrc += 2;
-                    *(pTarget + 1) = (WCHAR)(ch & 0x7F);
-                    pTarget += 2;
-#else // BIGENDIAN
-                    *pTarget = (WCHAR)(ch & 0x7F);
-                    pSrc += 2;
-                    *(pTarget + 1) = (WCHAR)((ch >> 8) & 0x7F);
-                    pTarget += 2;
-#endif // BIGENDIAN
-                }
-
-                // Run 8 characters at a time!
-                while (pTarget < pStop) {
-                    ch = *(int*)pSrc;
-                    int chb = *(int*)(pSrc + 4);
-                    if (((ch | chb) & (int)0x80808080) != 0) {
-                        goto LongCodeWithMask32;
-                    }
-
-                    // Unfortunately, this is endianness sensitive
-#if BIGENDIAN
-                    *pTarget = (WCHAR)((ch >> 24) & 0x7F);
-                    *(pTarget + 1) = (WCHAR)((ch >> 16) & 0x7F);
-                    *(pTarget + 2) = (WCHAR)((ch >> 8) & 0x7F);
-                    *(pTarget + 3) = (WCHAR)(ch & 0x7F);
-                    pSrc += 8;
-                    *(pTarget + 4) = (WCHAR)((chb >> 24) & 0x7F);
-                    *(pTarget + 5) = (WCHAR)((chb >> 16) & 0x7F);
-                    *(pTarget + 6) = (WCHAR)((chb >> 8) & 0x7F);
-                    *(pTarget + 7) = (WCHAR)(chb & 0x7F);
-                    pTarget += 8;
-#else // BIGENDIAN
-                    *pTarget = (WCHAR)(ch & 0x7F);
-                    *(pTarget + 1) = (WCHAR)((ch >> 8) & 0x7F);
-                    *(pTarget + 2) = (WCHAR)((ch >> 16) & 0x7F);
-                    *(pTarget + 3) = (WCHAR)((ch >> 24) & 0x7F);
-                    pSrc += 8;
-                    *(pTarget + 4) = (WCHAR)(chb & 0x7F);
-                    *(pTarget + 5) = (WCHAR)((chb >> 8) & 0x7F);
-                    *(pTarget + 6) = (WCHAR)((chb >> 16) & 0x7F);
-                    *(pTarget + 7) = (WCHAR)((chb >> 24) & 0x7F);
-                    pTarget += 8;
-#endif // BIGENDIAN
-                }
-                break;
-
-#if BIGENDIAN
-                LongCodeWithMask32 :
-                    // be careful about the sign extension
-                    ch = (int)(((uint)ch) >> 16);
-                LongCodeWithMask16:
-                    ch = (int)(((uint)ch) >> 8);
-#else // BIGENDIAN
-            LongCodeWithMask32:
-            LongCodeWithMask16:
-                ch &= 0xFF;
-#endif // BIGENDIAN
-                pSrc++;
-                if (ch <= 0x7F) {
-                    *pTarget = (WCHAR)ch;
-                    pTarget++;
-                    continue;
-                }
-
-            LongCode:
-                int chc = *pSrc;
-                pSrc++;
-
-                if (
-                    // bit 6 has to be zero
-                    (ch & 0x40) == 0 ||
-                    // we are expecting to see trailing bytes like 10vvvvvv
-                    (chc & 0xC0) != 0x80)
-                {
-                    goto BadLongCode;
-                }
-
-                chc &= 0x3F;
-
-                // start a new long code
-                if ((ch & 0x20) != 0) {
-
-                    // fold the first two bytes together
-                    chc |= (ch & 0x0F) << 6;
-
-                    if ((ch & 0x10) != 0) {
-                        // 4 byte encoding - surrogate
-                        ch = *pSrc;
-                        if (
-                            // check that bit 4 is zero, the non-shortest form of surrogate
-                            // and the valid surrogate range 0x000000 - 0x10FFFF at the same time
-                            !InRange(chc >> 4, 0x01, 0x10) ||
-                            // we are expecting to see trailing bytes like 10vvvvvv
-                            (ch & 0xC0) != 0x80)
-                        {
-                            goto BadLongCode;
-                        }
-
-                        chc = (chc << 6) | (ch & 0x3F);
-
-                        ch = *(pSrc + 1);
-                        // we are expecting to see trailing bytes like 10vvvvvv
-                        if ((ch & 0xC0) != 0x80) {
-                            goto BadLongCode;
-                        }
-                        pSrc += 2;
-
-                        ch = (chc << 6) | (ch & 0x3F);
-
-                        *pTarget = (WCHAR)(((ch >> 10) & 0x7FF) +
-                            (SHORT)(CharUnicodeInfo::HIGH_SURROGATE_START - (0x10000 >> 10)));
-                        pTarget++;
-
-                        ch = (ch & 0x3FF) +
-                            (SHORT)(CharUnicodeInfo::LOW_SURROGATE_START);
-
-                        // extra byte, we're already planning 2 chars for 2 of these bytes,
-                        // but the big loop is testing the target against pStop, so we need
-                        // to subtract 2 more or we risk overrunning the input.  Subtract
-                        // one here and one below.
-                        pStop--;
-                    }
-                    else {
-                        // 3 byte encoding
-                        ch = *pSrc;
-                        if (
-                            // check for non-shortest form of 3 byte seq
-                            (chc & (0x1F << 5)) == 0 ||
-                            // Can't have surrogates here.
-                            (chc & (0xF800 >> 6)) == (0xD800 >> 6) ||
-                            // we are expecting to see trailing bytes like 10vvvvvv
-                            (ch & 0xC0) != 0x80)
-                        {
-                            goto BadLongCode;
-                        }
-                        pSrc++;
-
-                        ch = (chc << 6) | (ch & 0x3F);
-
-                        // extra byte, we're only expecting 1 char for each of these 3 bytes,
-                        // but the loop is testing the target (not source) against pStop, so
-                        // we need to subtract 2 more or we risk overrunning the input.
-                        // Subtract 1 here and one more below
-                        pStop--;
-                    }
-                }
-                else {
-                    // 2 byte encoding
-
-                    ch &= 0x1F;
-
-                    // check for non-shortest form
-                    if (ch <= 1) {
-                        goto BadLongCode;
-                    }
-                    ch = (ch << 6) | chc;
-                }
-
-                *pTarget = (WCHAR)ch;
-                pTarget++;
-
-                // extra byte, we're only expecting 1 char for each of these 2 bytes,
-                // but the loop is testing the target (not source) against pStop.
-                // subtract an extra count from pStop so that we don't overrun the input.
-                pStop--;
-            }
-#endif // FASTLOOP
-
-            Contract::Assert(pTarget <= pAllocatedBufferEnd, "[UTF8Encoding.GetChars]pTarget <= pAllocatedBufferEnd");
-
-            // no pending bits at this point
-            ch = 0;
-            continue;
-
-        BadLongCode:
-            pSrc -= 2;
-            ch = 0;
-            continue;
-        }
-
-        if (ch != 0)
-        {
-            // Have to do fallback for invalid bytes
-            if (fallback == nullptr)
-            {
-                fallback = decoderFallback->CreateFallbackBuffer();
-                fallback->InternalInitialize(bytes, pAllocatedBufferEnd);
-            }
-
-            // This'll back us up the appropriate # of bytes if we didn't get anywhere
-            if (!FallbackInvalidByteSequence(pSrc, ch, fallback))
-            {
-                Contract::Assert(pSrc >= bytes || pTarget == chars,
-                    "[UTF8Encoding.GetChars]Expected to throw or remain in byte buffer while flushing");
-
-                // Ran out of buffer space
-                // Need to throw an exception?
-                fallback->InternalReset();
-                ThrowCharsOverflow(pTarget == chars);
-            }
-            Contract::Assert(pSrc >= bytes,
-                "[UTF8Encoding.GetChars]Expected flushing invalid byte sequence to have remained within the byte array");
-            ch = 0;
-        }
-
-        // Shouldn't have anything in fallback buffer for GetChars
-        // (don't have to check m_throwOnOverflow for chars)
-        Contract::Assert(fallback == nullptr || fallback->GetRemaining() == 0,
-            "[UTF8Encoding.GetChars]Expected empty fallback buffer at end");
-
-        InternalDelete(fallback);
-
-        return PtrDiff(pTarget, chars);
-    }
-
-    int GetBytes(WCHAR* chars, int charCount, BYTE* bytes, int byteCount)
-    {
-        Contract::Assert(chars != nullptr, "[UTF8Encoding.GetBytes]chars!=nullptr");
-        Contract::Assert(byteCount >= 0, "[UTF8Encoding.GetBytes]byteCount >=0");
-        Contract::Assert(charCount >= 0, "[UTF8Encoding.GetBytes]charCount >=0");
-        Contract::Assert(bytes != nullptr, "[UTF8Encoding.GetBytes]bytes!=nullptr");
-
-        // For fallback we may need a fallback buffer.
-        // We wait to initialize it though in case we don't have any broken input unicode
-        EncoderFallbackBuffer* fallbackBuffer = nullptr;
-        WCHAR *pSrc = chars;
-        BYTE *pTarget = bytes;
-
-        WCHAR *pEnd = pSrc + charCount;
-        BYTE *pAllocatedBufferEnd = pTarget + byteCount;
-
-        int ch = 0;
-
-        // assume that JIT will enregister pSrc, pTarget and ch
-
-        while (true) {
-            // SLOWLOOP: does all range checks, handles all special cases, but it is slow
-
-            if (pSrc >= pEnd) {
-
-                if (ch == 0) {
-                    // Check if there's anything left to get out of the fallback buffer
-                    ch = fallbackBuffer != nullptr ? fallbackBuffer->InternalGetNextChar() : 0;
-                    if (ch > 0) {
-                        goto ProcessChar;
-                    }
-                }
-                else {
-                    // Case of leftover surrogates in the fallback buffer
-                    if (fallbackBuffer != nullptr && fallbackBuffer->bFallingBack) {
-                        Contract::Assert(ch >= 0xD800 && ch <= 0xDBFF,
-                            "[UTF8Encoding.GetBytes]expected high surrogate"); //, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
-
-                        int cha = ch;
-
-                        ch = fallbackBuffer->InternalGetNextChar();
-
-                        if (InRange(ch, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
-                            ch = ch + (cha << 10) + (0x10000 - CharUnicodeInfo::LOW_SURROGATE_START - (CharUnicodeInfo::HIGH_SURROGATE_START << 10));
-                            goto EncodeChar;
-                        }
-                        else if (ch > 0){
-                            goto ProcessChar;
-                        }
-                        else {
-                            break;
-                        }
-                    }
-                }
-
-                // attempt to encode the partial surrogate (will fail or ignore)
-                if (ch > 0)
-                    goto EncodeChar;
-
-                // We're done
-                break;
-            }
-
-            if (ch > 0) {
-                // We have a high surrogate left over from a previous loop.
-                Contract::Assert(ch >= 0xD800 && ch <= 0xDBFF,
-                    "[UTF8Encoding.GetBytes]expected high surrogate");//, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
-
-                // use separate helper variables for local contexts so that the jit optimizations
-                // won't get confused about the variable lifetimes
-                int cha = *pSrc;
-
-                // In previous byte, we encountered a high surrogate, so we are expecting a low surrogate here.
-                // if (IsLowSurrogate(cha)) {
-                if (InRange(cha, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
-                    ch = cha + (ch << 10) +
-                        (0x10000
-                        - CharUnicodeInfo::LOW_SURROGATE_START
-                        - (CharUnicodeInfo::HIGH_SURROGATE_START << 10));
-
-                    pSrc++;
-                }
-                // else ch is still high surrogate and encoding will fail
-
-                // attempt to encode the surrogate or partial surrogate
-                goto EncodeChar;
-            }
-
-            // If we've used a fallback, then we have to check for it
-            if (fallbackBuffer != nullptr)
-            {
-                ch = fallbackBuffer->InternalGetNextChar();
-                if (ch > 0) goto ProcessChar;
-            }
-
-            // read next char. The JIT optimization seems to be getting confused when
-            // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
-            ch = *pSrc;
-            pSrc++;
-
-        ProcessChar:
-            if (InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::HIGH_SURROGATE_END)) {
-                continue;
-            }
-            // either good char or partial surrogate
-
-        EncodeChar:
-            // throw exception on partial surrogate if necessary
-            if (InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END))
-            {
-                // Lone surrogates aren't allowed, we have to do fallback for them
-                // Have to make a fallback buffer if we don't have one
-                if (fallbackBuffer == nullptr)
-                {
-                    // wait on fallbacks if we can
-                    // For fallback we may need a fallback buffer
-                    fallbackBuffer = encoderFallback->CreateFallbackBuffer();
-
-                    // Set our internal fallback interesting things.
-                    fallbackBuffer->InternalInitialize(chars, pEnd, true);
-                }
-
-                // Do our fallback.  Actually we already know its a mixed up surrogate,
-                // so the ref pSrc isn't gonna do anything.
-                fallbackBuffer->InternalFallback((WCHAR)ch, &pSrc);
-
-                // Ignore it if we don't throw
-                ch = 0;
-                continue;
-            }
-
-            // Count bytes needed
-            int bytesNeeded = 1;
-            if (ch > 0x7F) {
-                if (ch > 0x7FF) {
-                    if (ch > 0xFFFF) {
-                        bytesNeeded++;  // 4 bytes (surrogate pair)
-                    }
-                    bytesNeeded++;      // 3 bytes (800-FFFF)
-                }
-                bytesNeeded++;          // 2 bytes (80-7FF)
-            }
-
-            if (pTarget > pAllocatedBufferEnd - bytesNeeded) {
-                // Left over surrogate from last time will cause pSrc == chars, so we'll throw
-                if (fallbackBuffer != nullptr && fallbackBuffer->bFallingBack)
-                {
-                    fallbackBuffer->MovePrevious();              // Didn't use this fallback char
-                    if (ch > 0xFFFF)
-                        fallbackBuffer->MovePrevious();          // Was surrogate, didn't use 2nd part either
-                }
-                else
-                {
-                    pSrc--;                                     // Didn't use this char
-                    if (ch > 0xFFFF)
-                        pSrc--;                                 // Was surrogate, didn't use 2nd part either
-                }
-                Contract::Assert(pSrc >= chars || pTarget == bytes,
-                    "[UTF8Encoding.GetBytes]Expected pSrc to be within buffer or to throw with insufficient room.");
-                ThrowBytesOverflow(pTarget == bytes);  // Throw if we must
-                ch = 0;                                         // Nothing left over (we backed up to start of pair if supplimentary)
-                break;
-            }
-
-            if (ch <= 0x7F) {
-                *pTarget = (BYTE)ch;
-            }
-            else {
-                // use separate helper variables for local contexts so that the jit optimizations
-                // won't get confused about the variable lifetimes
-                int chb;
-                if (ch <= 0x7FF) {
-                    // 2 BYTE encoding
-                    chb = (BYTE)(0xC0 | (ch >> 6));
-                }
-                else
-                {
-                    if (ch <= 0xFFFF) {
-                        chb = (BYTE)(0xE0 | (ch >> 12));
-                    }
-                    else
-                    {
-                        *pTarget = (BYTE)(0xF0 | (ch >> 18));
-                        pTarget++;
-
-                        chb = 0x80 | ((ch >> 12) & 0x3F);
-                    }
-                    *pTarget = (BYTE)chb;
-                    pTarget++;
-
-                    chb = 0x80 | ((ch >> 6) & 0x3F);
-                }
-                *pTarget = (BYTE)chb;
-                pTarget++;
-
-                *pTarget = (BYTE)0x80 | (ch & 0x3F);
-            }
-            pTarget++;
-
-
-#ifdef FASTLOOP
-            // If still have fallback don't do fast loop
-            if (fallbackBuffer != nullptr && (ch = fallbackBuffer->InternalGetNextChar()) != 0)
-                goto ProcessChar;
-
-            int availableChars = PtrDiff(pEnd, pSrc);
-            int availableBytes = PtrDiff(pAllocatedBufferEnd, pTarget);
-
-            // don't fall into the fast decoding loop if we don't have enough characters
-            // Note that if we don't have enough bytes, pStop will prevent us from entering the fast loop.
-            if (availableChars <= 13) {
-                // we are hoping for 1 BYTE per char
-                if (availableBytes < availableChars) {
-                    // not enough output room.  no pending bits at this point
-                    ch = 0;
-                    continue;
-                }
-
-                // try to get over the remainder of the ascii characters fast though
-                WCHAR* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
-                while (pSrc < pLocalEnd) {
-                    ch = *pSrc;
-                    pSrc++;
-
-                    // Not ASCII, need more than 1 BYTE per char
-                    if (ch > 0x7F)
-                        goto ProcessChar;
-
-                    *pTarget = (BYTE)ch;
-                    pTarget++;
-                }
-                // we are done, let ch be 0 to clear encoder
-                ch = 0;
-                break;
-            }
-
-            // we need at least 1 BYTE per character, but Convert might allow us to convert
-            // only part of the input, so try as much as we can.  Reduce charCount if necessary
-            if (availableBytes < availableChars)
-            {
-                availableChars = availableBytes;
-            }
-
-            // FASTLOOP:
-            // - optimistic range checks
-            // - fallbacks to the slow loop for all special cases, exception throwing, etc.
-
-            // To compute the upper bound, assume that all characters are ASCII characters at this point,
-            //  the boundary will be decreased for every non-ASCII character we encounter
-            // Also, we need 5 chars reserve for the unrolled ansi decoding loop and for decoding of surrogates
-            // If there aren't enough bytes for the output, then pStop will be <= pSrc and will bypass the loop.
-            WCHAR *pStop = pSrc + availableChars - 5;
-
-            while (pSrc < pStop) {
-                ch = *pSrc;
-                pSrc++;
-
-                if (ch > 0x7F) {
-                    goto LongCode;
-                }
-                *pTarget = (BYTE)ch;
-                pTarget++;
-
-                // get pSrc aligned
-                if (((size_t)pSrc & 0x2) != 0) {
-                    ch = *pSrc;
-                    pSrc++;
-                    if (ch > 0x7F) {
-                        goto LongCode;
-                    }
-                    *pTarget = (BYTE)ch;
-                    pTarget++;
-                }
-
-                // Run 4 characters at a time!
-                while (pSrc < pStop) {
-                    ch = *(int*)pSrc;
-                    int chc = *(int*)(pSrc + 2);
-                    if (((ch | chc) & (int)0xFF80FF80) != 0) {
-                        goto LongCodeWithMask;
-                    }
-
-                    // Unfortunately, this is endianness sensitive
-#if BIGENDIAN
-                    *pTarget = (BYTE)(ch >> 16);
-                    *(pTarget + 1) = (BYTE)ch;
-                    pSrc += 4;
-                    *(pTarget + 2) = (BYTE)(chc >> 16);
-                    *(pTarget + 3) = (BYTE)chc;
-                    pTarget += 4;
-#else // BIGENDIAN
-                    *pTarget = (BYTE)ch;
-                    *(pTarget + 1) = (BYTE)(ch >> 16);
-                    pSrc += 4;
-                    *(pTarget + 2) = (BYTE)chc;
-                    *(pTarget + 3) = (BYTE)(chc >> 16);
-                    pTarget += 4;
-#endif // BIGENDIAN
-                }
-                continue;
-
-            LongCodeWithMask:
-#if BIGENDIAN
-                // be careful about the sign extension
-                ch = (int)(((uint)ch) >> 16);
-#else // BIGENDIAN
-                ch = (WCHAR)ch;
-#endif // BIGENDIAN
-                pSrc++;
-
-                if (ch > 0x7F) {
-                    goto LongCode;
-                }
-                *pTarget = (BYTE)ch;
-                pTarget++;
-                continue;
-
-            LongCode:
-                // use separate helper variables for slow and fast loop so that the jit optimizations
-                // won't get confused about the variable lifetimes
-                int chd;
-                if (ch <= 0x7FF) {
-                    // 2 BYTE encoding
-                    chd = 0xC0 | (ch >> 6);
-                }
-                else {
-                    if (!InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
-                        // 3 BYTE encoding
-                        chd = 0xE0 | (ch >> 12);
-                    }
-                    else
-                    {
-                        // 4 BYTE encoding - high surrogate + low surrogate
-                        if (ch > CharUnicodeInfo::HIGH_SURROGATE_END) {
-                            // low without high -> bad, try again in slow loop
-                            pSrc -= 1;
-                            break;
-                        }
-
-                        chd = *pSrc;
-                        pSrc++;
-
-                        // if (!IsLowSurrogate(chd)) {
-                        if (!InRange(chd, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
-                            // high not followed by low -> bad, try again in slow loop
-                            pSrc -= 2;
-                            break;
-                        }
-
-                        ch = chd + (ch << 10) +
-                            (0x10000
-                            - CharUnicodeInfo::LOW_SURROGATE_START
-                            - (CharUnicodeInfo::HIGH_SURROGATE_START << 10));
-
-                        *pTarget = (BYTE)(0xF0 | (ch >> 18));
-                        // pStop - this BYTE is compensated by the second surrogate character
-                        // 2 input chars require 4 output bytes.  2 have been anticipated already
-                        // and 2 more will be accounted for by the 2 pStop-- calls below.
-                        pTarget++;
-
-                        chd = 0x80 | ((ch >> 12) & 0x3F);
-                    }
-                    *pTarget = (BYTE)chd;
-                    pStop--;                    // 3 BYTE sequence for 1 char, so need pStop-- and the one below too.
-                    pTarget++;
-
-                    chd = 0x80 | ((ch >> 6) & 0x3F);
-                }
-                *pTarget = (BYTE)chd;
-                pStop--;                        // 2 BYTE sequence for 1 char so need pStop--.
-                pTarget++;
-
-                *pTarget = (BYTE)(0x80 | (ch & 0x3F));
-                // pStop - this BYTE is already included
-                pTarget++;
-            }
-
-            Contract::Assert(pTarget <= pAllocatedBufferEnd, "[UTF8Encoding.GetBytes]pTarget <= pAllocatedBufferEnd");
-
-#endif // FASTLOOP
-
-            // no pending char at this point
-            ch = 0;
-        }
-
-        InternalDelete(fallbackBuffer);
-
-        return (int)(pTarget - bytes);
-    }
-
-    int GetByteCount(WCHAR *chars, int count)
-    {
-        // For fallback we may need a fallback buffer.
-        // We wait to initialize it though in case we don't have any broken input unicode
-        EncoderFallbackBuffer* fallbackBuffer = nullptr;
-        WCHAR *pSrc = chars;
-        WCHAR *pEnd = pSrc + count;
-
-        // Start by assuming we have as many as count
-        int byteCount = count;
-
-        int ch = 0;
-
-        while (true) {
-            // SLOWLOOP: does all range checks, handles all special cases, but it is slow
-            if (pSrc >= pEnd) {
-
-                if (ch == 0) {
-                    // Unroll any fallback that happens at the end
-                    ch = fallbackBuffer != nullptr ? fallbackBuffer->InternalGetNextChar() : 0;
-                    if (ch > 0) {
-                        byteCount++;
-                        goto ProcessChar;
-                    }
-                }
-                else {
-                    // Case of surrogates in the fallback.
-                    if (fallbackBuffer != nullptr && fallbackBuffer->bFallingBack) {
-                        Contract::Assert(ch >= 0xD800 && ch <= 0xDBFF,
-                            "[UTF8Encoding.GetBytes]expected high surrogate");// , not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
-
-                        ch = fallbackBuffer->InternalGetNextChar();
-                        byteCount++;
-
-                        if (InRange(ch, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
-                            ch = 0xfffd;
-                            byteCount++;
-                            goto EncodeChar;
-                        }
-                        else if (ch > 0){
-                            goto ProcessChar;
-                        }
-                        else {
-                            byteCount--; // ignore last one.
-                            break;
-                        }
-                    }
-                }
-
-                if (ch <= 0) {
-                    break;
-                }
-
-                // attempt to encode the partial surrogate (will fallback or ignore it), it'll also subtract 1.
-                byteCount++;
-                goto EncodeChar;
-            }
-
-            if (ch > 0) {
-                Contract::Assert(ch >= 0xD800 && ch <= 0xDBFF,
-                    "[UTF8Encoding.GetBytes]expected high surrogate"); // , not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
-
-                // use separate helper variables for local contexts so that the jit optimizations
-                // won't get confused about the variable lifetimes
-                int cha = *pSrc;
-
-                // count the pending surrogate
-                byteCount++;
-
-                // In previous byte, we encountered a high surrogate, so we are expecting a low surrogate here.
-                // if (IsLowSurrogate(cha)) {
-                if (InRange(cha, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
-                    // Don't need a real # because we're just counting, anything > 0x7ff ('cept surrogate) will do.
-                    ch = 0xfffd;
-                    //                        ch = cha + (ch << 10) +
-                    //                            (0x10000
-                    //                            - CharUnicodeInfo::LOW_SURROGATE_START
-                    //                            - (CharUnicodeInfo::HIGH_SURROGATE_START << 10) );
-
-                    // Use this next char
-                    pSrc++;
-                }
-                // else ch is still high surrogate and encoding will fail (so don't add count)
-
-                // attempt to encode the surrogate or partial surrogate
-                goto EncodeChar;
-            }
-
-            // If we've used a fallback, then we have to check for it
-            if (fallbackBuffer != nullptr)
-            {
-                ch = fallbackBuffer->InternalGetNextChar();
-                if (ch > 0)
-                {
-                    // We have an extra byte we weren't expecting.
-                    byteCount++;
-                    goto ProcessChar;
-                }
-            }
-
-            // read next char. The JIT optimization seems to be getting confused when
-            // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
-            ch = *pSrc;
-            pSrc++;
-
-        ProcessChar:
-            if (InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::HIGH_SURROGATE_END)) {
-                // we will count this surrogate next time around
-                byteCount--;
-                continue;
-            }
-            // either good char or partial surrogate
-
-        EncodeChar:
-            // throw exception on partial surrogate if necessary
-            if (InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END))
-            {
-                // Lone surrogates aren't allowed
-                // Have to make a fallback buffer if we don't have one
-                if (fallbackBuffer == nullptr)
-                {
-                    // wait on fallbacks if we can
-                    // For fallback we may need a fallback buffer
-                    fallbackBuffer = encoderFallback->CreateFallbackBuffer();
-
-                    // Set our internal fallback interesting things.
-                    fallbackBuffer->InternalInitialize(chars, chars + count, false);
-                }
-
-                // Do our fallback.  Actually we already know its a mixed up surrogate,
-                // so the ref pSrc isn't gonna do anything.
-                fallbackBuffer->InternalFallback((WCHAR)ch, &pSrc);
-
-                // Ignore it if we don't throw (we had preallocated this ch)
-                byteCount--;
-                ch = 0;
-                continue;
-            }
-
-            // Count them
-            if (ch > 0x7F) {
-                if (ch > 0x7FF) {
-                    // the extra surrogate byte was compensated by the second surrogate character
-                    // (2 surrogates make 4 bytes.  We've already counted 2 bytes, 1 per char)
-                    byteCount++;
-                }
-                byteCount++;
-            }
-
-#if WIN64
-            // check for overflow
-            if (byteCount < 0) {
-                break;
-            }
-#endif
-
-#ifdef FASTLOOP
-            // If still have fallback don't do fast loop
-            if (fallbackBuffer != nullptr && (ch = fallbackBuffer->InternalGetNextChar()) != 0)
-            {
-                // We're reserving 1 byte for each char by default
-                byteCount++;
-                goto ProcessChar;
-            }
-
-            int availableChars = PtrDiff(pEnd, pSrc);
-
-            // don't fall into the fast decoding loop if we don't have enough characters
-            if (availableChars <= 13) {
-                // try to get over the remainder of the ascii characters fast though
-                WCHAR* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
-                while (pSrc < pLocalEnd) {
-                    ch = *pSrc;
-                    pSrc++;
-                    if (ch > 0x7F)
-                        goto ProcessChar;
-                }
-
-                // we are done
-                break;
-            }
-
-#if WIN64
-            // make sure that we won't get a silent overflow inside the fast loop
-            // (Fall out to slow loop if we have this many characters)
-            availableChars &= 0x0FFFFFFF;
-#endif
-
-            // To compute the upper bound, assume that all characters are ASCII characters at this point,
-            //  the boundary will be decreased for every non-ASCII character we encounter
-            // Also, we need 3 + 4 chars reserve for the unrolled ansi decoding loop and for decoding of surrogates
-            WCHAR *pStop = pSrc + availableChars - (3 + 4);
-
-            while (pSrc < pStop) {
-                ch = *pSrc;
-                pSrc++;
-
-                if (ch > 0x7F)                                                  // Not ASCII
-                {
-                    if (ch > 0x7FF)                                             // Not 2 Byte
-                    {
-                        if ((ch & 0xF800) == 0xD800)                            // See if its a Surrogate
-                            goto LongCode;
-                        byteCount++;
-                    }
-                    byteCount++;
-                }
-
-                // get pSrc aligned
-                if (((size_t)pSrc & 0x2) != 0) {
-                    ch = *pSrc;
-                    pSrc++;
-                    if (ch > 0x7F)                                              // Not ASCII
-                    {
-                        if (ch > 0x7FF)                                         // Not 2 Byte
-                        {
-                            if ((ch & 0xF800) == 0xD800)                        // See if its a Surrogate
-                                goto LongCode;
-                            byteCount++;
-                        }
-                        byteCount++;
-                    }
-                }
-
-                // Run 2 * 4 characters at a time!
-                while (pSrc < pStop) {
-                    ch = *(int*)pSrc;
-                    int chc = *(int*)(pSrc + 2);
-                    if (((ch | chc) & (int)0xFF80FF80) != 0)         // See if not ASCII
-                    {
-                        if (((ch | chc) & (int)0xF800F800) != 0)     // See if not 2 Byte
-                        {
-                            goto LongCodeWithMask;
-                        }
-
-
-                        if ((ch & (int)0xFF800000) != 0)             // Actually 0x07800780 is all we care about (4 bits)
-                            byteCount++;
-                        if ((ch & (int)0xFF80) != 0)
-                            byteCount++;
-                        if ((chc & (int)0xFF800000) != 0)
-                            byteCount++;
-                        if ((chc & (int)0xFF80) != 0)
-                            byteCount++;
-                    }
-                    pSrc += 4;
-
-                    ch = *(int*)pSrc;
-                    chc = *(int*)(pSrc + 2);
-                    if (((ch | chc) & (int)0xFF80FF80) != 0)         // See if not ASCII
-                    {
-                        if (((ch | chc) & (int)0xF800F800) != 0)     // See if not 2 Byte
-                        {
-                            goto LongCodeWithMask;
-                        }
-
-                        if ((ch & (int)0xFF800000) != 0)
-                            byteCount++;
-                        if ((ch & (int)0xFF80) != 0)
-                            byteCount++;
-                        if ((chc & (int)0xFF800000) != 0)
-                            byteCount++;
-                        if ((chc & (int)0xFF80) != 0)
-                            byteCount++;
-                    }
-                    pSrc += 4;
-                }
-                break;
-
-            LongCodeWithMask:
-#if BIGENDIAN
-                // be careful about the sign extension
-                ch = (int)(((uint)ch) >> 16);
-#else // BIGENDIAN
-                ch = (WCHAR)ch;
-#endif // BIGENDIAN
-                pSrc++;
-
-                if (ch <= 0x7F) {
-                    continue;
-                }
-
-            LongCode:
-                // use separate helper variables for slow and fast loop so that the jit optimizations
-                // won't get confused about the variable lifetimes
-                if (ch > 0x7FF) {
-                    if (InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
-                        // 4 byte encoding - high surrogate + low surrogate
-
-                        int chd = *pSrc;
-                        if (
-                            ch > CharUnicodeInfo::HIGH_SURROGATE_END ||
-                            !InRange(chd, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END))
-                        {
-                            // Back up and drop out to slow loop to figure out error
-                            pSrc--;
-                            break;
-                        }
-                        pSrc++;
-
-                        // byteCount - this byte is compensated by the second surrogate character
-                    }
-                    byteCount++;
-                }
-                byteCount++;
-
-                // byteCount - the last byte is already included
-            }
-#endif // FASTLOOP
-
-            // no pending char at this point
-            ch = 0;
-        }
-
-#if WIN64
-        // check for overflow
-        if (byteCount < 0) {
-            throw ArgumentException("Conversion buffer overflow.");
-        }
-#endif
-
-        Contract::Assert(fallbackBuffer == nullptr || fallbackBuffer->GetRemaining() == 0,
-            "[UTF8Encoding.GetByteCount]Expected Empty fallback buffer");
-
-        InternalDelete(fallbackBuffer);
-
-        return byteCount;
-    }
-
-};
-
-
-////////////////////////////////////////////////////////////////////////////
-//
-//  UTF8ToUnicode
-//
-//  Maps a UTF-8 character string to its wide character string counterpart.
-//
-////////////////////////////////////////////////////////////////////////////
-
-int UTF8ToUnicode(
-    LPCSTR lpSrcStr,
-    int cchSrc,
-    LPWSTR lpDestStr,
-    int cchDest,
-    DWORD dwFlags
-    )
-{
-    int ret;
-    UTF8Encoding enc(dwFlags & MB_ERR_INVALID_CHARS);
-    try {
-        ret = enc.GetCharCount((BYTE*)lpSrcStr, cchSrc);
-        if (cchDest){
-            if (ret > cchDest){
-                SetLastError(ERROR_INSUFFICIENT_BUFFER);
-                ret = 0;
-            }
-            enc.GetChars((BYTE*)lpSrcStr, cchSrc, (WCHAR*)lpDestStr, ret);
-        }
-    }
-    catch (const InsufficientBufferException& e){
-        SetLastError(ERROR_INSUFFICIENT_BUFFER);
-        return 0;
-    }
-    catch (const DecoderFallbackException& e){
-        SetLastError(ERROR_NO_UNICODE_TRANSLATION);
-        return 0;
-    }
-    catch (const ArgumentException& e){
-        SetLastError(ERROR_INVALID_PARAMETER);
-        return 0;
-    }
-    return ret;
-}
-
-////////////////////////////////////////////////////////////////////////////
-//
-//  UnicodeToUTF8
-//
-//  Maps a Unicode character string to its UTF-8 string counterpart.
-//
-////////////////////////////////////////////////////////////////////////////
-
-int UnicodeToUTF8(
-    LPCWSTR lpSrcStr,
-    int cchSrc,
-    LPSTR lpDestStr,
-    int cchDest)
-{
-    int ret;
-    UTF8Encoding enc(false);
-    try{
-        ret = enc.GetByteCount((WCHAR*)lpSrcStr, cchSrc);
-        if (cchDest){
-            if (ret > cchDest){
-                SetLastError(ERROR_INSUFFICIENT_BUFFER);
-                ret = 0;
-            }
-            enc.GetBytes((WCHAR*)lpSrcStr, cchSrc, (BYTE*)lpDestStr, ret);
-        }
-    }
-    catch (const InsufficientBufferException& e){
-        SetLastError(ERROR_INSUFFICIENT_BUFFER);
-        return 0;
-    }
-    catch (const EncoderFallbackException& e){
-        SetLastError(ERROR_NO_UNICODE_TRANSLATION);
-        return 0;
-    }
-    catch (const ArgumentException& e){
-        SetLastError(ERROR_INVALID_PARAMETER);
-        return 0;
-    }
-    return ret;
-}
diff --git a/src/coreclr/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp b/src/coreclr/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp
index cab71f15e7098e..2b9f67b17bfbde 100644
--- a/src/coreclr/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp
+++ b/src/coreclr/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp
@@ -217,7 +217,7 @@ PALTEST(locale_info_MultiByteToWideChar_test4_paltest_multibytetowidechar_test4,
         
         if (wcscmp(wideBuffer, unicodeStrings[i]) != 0)
         {
-            Fail("MultiByteToWideChar string %d: the resulting string doesn't match the expected one!\n", i);
+            printf("MultiByteToWideChar string %d: the resulting string doesn't match the expected one!\n", i);
         }
         
         free(wideBuffer);
diff --git a/src/coreclr/pal/tests/palsuite/locale_info/WideCharToMultiByte/test5/test5.cpp b/src/coreclr/pal/tests/palsuite/locale_info/WideCharToMultiByte/test5/test5.cpp
index bf2dabedefa880..387015f0af71cc 100644
--- a/src/coreclr/pal/tests/palsuite/locale_info/WideCharToMultiByte/test5/test5.cpp
+++ b/src/coreclr/pal/tests/palsuite/locale_info/WideCharToMultiByte/test5/test5.cpp
@@ -141,7 +141,7 @@ PALTEST(locale_info_WideCharToMultiByte_test5_paltest_widechartomultibyte_test5,
         
         if (strcmp(utf8Buffer, utf8Strings[i]) != 0)
         {
-            Fail("WideCharToMultiByte string %d: the resulting string doesn't match the expected one!\n", i);
+            printf("WideCharToMultiByte string %d: the resulting string doesn't match the expected one!\n", i);
         }
         
         free(utf8Buffer);
diff --git a/src/libraries/System.Reflection.Emit/tests/TypeBuilder/TypeBuilderDefineEvent.cs b/src/libraries/System.Reflection.Emit/tests/TypeBuilder/TypeBuilderDefineEvent.cs
index 05cda03a777379..a01945d7f64911 100644
--- a/src/libraries/System.Reflection.Emit/tests/TypeBuilder/TypeBuilderDefineEvent.cs
+++ b/src/libraries/System.Reflection.Emit/tests/TypeBuilder/TypeBuilderDefineEvent.cs
@@ -26,7 +26,7 @@ public static IEnumerable<object[]> TestData()
         }
 
         [Theory]
-        [ActiveIssue("https://github.com/dotnet/runtime/issues/2389", TestRuntimes.Mono)]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/2389", TestRuntimes.Mono | TestRuntimes.CoreCLR)]
         [MemberData(nameof(TestData))]
         public void DefineEvent(string name, EventAttributes attributes, Type eventType, string expectedName, EventAttributes expectedAttributes)
         {
diff --git a/src/libraries/System.Reflection.Emit/tests/TypeBuilder/TypeBuilderDefineProperty.cs b/src/libraries/System.Reflection.Emit/tests/TypeBuilder/TypeBuilderDefineProperty.cs
index 84d27ee2f98e31..500ee8104766d7 100644
--- a/src/libraries/System.Reflection.Emit/tests/TypeBuilder/TypeBuilderDefineProperty.cs
+++ b/src/libraries/System.Reflection.Emit/tests/TypeBuilder/TypeBuilderDefineProperty.cs
@@ -29,7 +29,7 @@ public static IEnumerable<object[]> TestData()
         }
 
         [Theory]
-        [ActiveIssue("https://github.com/dotnet/runtime/issues/2389", TestRuntimes.Mono)]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/2389", TestRuntimes.Mono | TestRuntimes.CoreCLR)]
         [MemberData(nameof(TestData))]
         public void DefineProperty(string name, PropertyAttributes attributes, Type returnType, Type[] parameterTypes, string expectedName, PropertyAttributes expectedPropertyAttributes)
         {
diff --git a/src/mono/mono/eglib/CMakeLists.txt b/src/mono/mono/eglib/CMakeLists.txt
index 3de4a9c83d2f5b..b2945231711711 100644
--- a/src/mono/mono/eglib/CMakeLists.txt
+++ b/src/mono/mono/eglib/CMakeLists.txt
@@ -20,7 +20,7 @@ set(eglib_common_sources
     gbytearray.c
     gerror.c
     ghashtable.c
-    giconv.c
+    ${CLR_SRC_NATIVE_DIR}/minipal/utf8converter.c
     gmem.c
     goutput.c
     gstr.c
@@ -32,8 +32,7 @@ set(eglib_common_sources
     gpath.c
     gspawn.c
     gfile.c
-    gfile-posix.c
-    gutf8.c)
+    gfile-posix.c)
 
 set(eglib_headers
   glib.h
diff --git a/src/mono/mono/eglib/glib.h b/src/mono/mono/eglib/glib.h
index e438c00298ec72..ef2101315eee98 100644
--- a/src/mono/mono/eglib/glib.h
+++ b/src/mono/mono/eglib/glib.h
@@ -29,6 +29,7 @@
 #include <eglib-config.h>
 #include <minipal/utils.h>
 #include <time.h>
+#include "../utils/mono-errno.h"
 
 #ifndef EGLIB_NO_REMAP
 #include <eglib-remap.h>
diff --git a/src/mono/mono/eglib/gutf8.c b/src/mono/mono/eglib/gutf8.c
deleted file mode 100644
index 965a69f42e655d..00000000000000
--- a/src/mono/mono/eglib/gutf8.c
+++ /dev/null
@@ -1,323 +0,0 @@
-/*
- * gutf8.c: UTF-8 conversion
- *
- * Author:
- *   Atsushi Enomoto  <atsushi@ximian.com>
- *
- * (C) 2006 Novell, Inc.
- * Copyright 2012 Xamarin Inc
- */
-#include "config.h"
-#include <stdio.h>
-#include <glib.h>
-
-/*
- * Index into the table below with the first byte of a UTF-8 sequence to get
- * the number of bytes that are supposed to follow it to complete the sequence.
- *
- * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is left
- * as-is for anyone who may want to do such conversion, which was allowed in
- * earlier algorithms.
-*/
-const guchar g_utf8_jump_table[256] = {
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-	3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
-};
-
-static gboolean
-utf8_validate (const unsigned char *inptr, size_t len)
-{
-	const unsigned char *ptr = inptr + len;
-	unsigned char c;
-
-	/* Everything falls through when TRUE... */
-	switch (len) {
-	default:
-		return FALSE;
-	case 4:
-		if ((c = (*--ptr)) < 0x80 || c > 0xBF)
-			return FALSE;
-
-		if ((c == 0xBF || c == 0xBE) && ptr[-1] == 0xBF) {
-			if (ptr[-2] == 0x8F || ptr[-2] == 0x9F ||
-			    ptr[-2] == 0xAF || ptr[-2] == 0xBF)
-				return FALSE;
-		}
-	case 3:
-		if ((c = (*--ptr)) < 0x80 || c > 0xBF)
-			return FALSE;
-	case 2:
-		if ((c = (*--ptr)) < 0x80 || c > 0xBF)
-			return FALSE;
-
-		/* no fall-through in this inner switch */
-		switch (*inptr) {
-		case 0xE0: if (c < 0xA0) return FALSE; break;
-		case 0xED: if (c > 0x9F) return FALSE; break;
-		case 0xEF: if (c == 0xB7 && (ptr[1] > 0x8F && ptr[1] < 0xB0)) return FALSE;
-			if (c == 0xBF && (ptr[1] == 0xBE || ptr[1] == 0xBF)) return FALSE;
-			break;
-		case 0xF0: if (c < 0x90) return FALSE; break;
-		case 0xF4: if (c > 0x8F) return FALSE; break;
-		default:   if (c < 0x80) return FALSE; break;
-		}
-	case 1: if (*inptr >= 0x80 && *inptr < 0xC2) return FALSE;
-	}
-
-	if (*inptr > 0xF4)
-		return FALSE;
-
-	return TRUE;
-}
-
-/**
- * g_utf8_validate:
- * @str: a utf-8 encoded string
- * @max_len: max number of bytes to validate (or -1 to validate the entire null-terminated string)
- * @end: output parameter to mark the end of the valid input
- *
- * Checks @utf for being valid UTF-8. @str is assumed to be
- * null-terminated. This function is not super-strict, as it will
- * allow longer UTF-8 sequences than necessary. Note that Java is
- * capable of producing these sequences if provoked. Also note, this
- * routine checks for the 4-byte maximum size, but does not check for
- * 0x10ffff maximum value.
- *
- * Return value: %TRUE if @str is valid or %FALSE otherwise.
- **/
-gboolean
-g_utf8_validate (const gchar *str, gssize max_len, const gchar **end)
-{
-	guchar *inptr = (guchar *) str;
-	gboolean valid = TRUE;
-	guint length, min;
-	gssize n = 0;
-
-	if (max_len == 0)
-		return FALSE;
-
-	if (max_len < 0) {
-		while (*inptr != 0) {
-			length = g_utf8_jump_table[*inptr];
-			if (!utf8_validate (inptr, length)) {
-				valid = FALSE;
-				break;
-			}
-
-			inptr += length;
-		}
-	} else {
-		while (n < max_len) {
-			if (*inptr == 0) {
-				/* Note: return FALSE if we encounter nul-byte
-				 * before max_len is reached. */
-				valid = FALSE;
-				break;
-			}
-
-			length = g_utf8_jump_table[*inptr];
-			min = MIN (length, GSSIZE_TO_UINT (max_len - n));
-
-			if (!utf8_validate (inptr, min)) {
-				valid = FALSE;
-				break;
-			}
-
-			if (min < length) {
-				valid = FALSE;
-				break;
-			}
-
-			inptr += length;
-			n += length;
-		}
-	}
-
-	if (end != NULL)
-		*end = (gchar *) inptr;
-
-	return valid;
-}
-
-gunichar
-g_utf8_get_char_validated (const gchar *str, gssize max_len)
-{
-	unsigned char *inptr = (unsigned char *) str;
-	gunichar u = *inptr;
-	int n, i;
-
-	if (max_len == 0)
-		return -2;
-
-	if (u < 0x80) {
-		/* simple ascii case */
-		return u;
-	} else if (u < 0xc2) {
-		return -1;
-	} else if (u < 0xe0) {
-		u &= 0x1f;
-		n = 2;
-	} else if (u < 0xf0) {
-		u &= 0x0f;
-		n = 3;
-	} else if (u < 0xf8) {
-		u &= 0x07;
-		n = 4;
-	} else if (u < 0xfc) {
-		u &= 0x03;
-		n = 5;
-	} else if (u < 0xfe) {
-		u &= 0x01;
-		n = 6;
-	} else {
-		return -1;
-	}
-
-	if (max_len > 0) {
-		if (!utf8_validate (inptr, MIN (max_len, n)))
-			return -1;
-
-		if (max_len < n)
-			return -2;
-	} else {
-		if (!utf8_validate (inptr, n))
-			return -1;
-	}
-
-	for (i = 1; i < n; i++)
-		u = (u << 6) | (*++inptr ^ 0x80);
-
-	return u;
-}
-
-glong
-g_utf8_strlen (const gchar *str, gssize max_len)
-{
-	const guchar *inptr = (const guchar *) str;
-	glong clen = 0, len = 0, n;
-
-	if (max_len == 0)
-		return 0;
-
-	if (max_len < 0) {
-		while (*inptr) {
-			inptr += g_utf8_jump_table[*inptr];
-			len++;
-		}
-	} else {
-		while (len < max_len && *inptr) {
-			n = g_utf8_jump_table[*inptr];
-			if ((clen + n) > max_len)
-				break;
-
-			inptr += n;
-			clen += n;
-			len++;
-		}
-	}
-
-	return len;
-}
-
-gunichar
-g_utf8_get_char (const gchar *src)
-{
-	unsigned char *inptr = (unsigned char *) src;
-	gunichar u = *inptr;
-	int n, i;
-
-	if (u < 0x80) {
-		/* simple ascii case */
-		return u;
-	} else if (u < 0xe0) {
-		u &= 0x1f;
-		n = 2;
-	} else if (u < 0xf0) {
-		u &= 0x0f;
-		n = 3;
-	} else if (u < 0xf8) {
-		u &= 0x07;
-		n = 4;
-	} else if (u < 0xfc) {
-		u &= 0x03;
-		n = 5;
-	} else {
-		u &= 0x01;
-		n = 6;
-	}
-
-	for (i = 1; i < n; i++)
-		u = (u << 6) | (*++inptr ^ 0x80);
-
-	return u;
-}
-
-gchar *
-g_utf8_offset_to_pointer (const gchar *str, glong offset)
-{
-	const gchar *p = str;
-
-	if (offset > 0) {
-		do {
-			p = g_utf8_next_char (p);
-			offset --;
-		} while (offset > 0);
-	}
-	else if (offset < 0) {
-		const gchar *jump = str;
-		do {
-			// since the minimum size of a character is 1
-			// we know we can step back at least offset bytes
-			jump = jump + offset;
-
-			// if we land in the middle of a character
-			// walk to the beginning
-			while ((*jump & 0xc0) == 0x80)
-				jump --;
-
-			// count how many characters we've actually walked
-			// by going forward
-			p = jump;
-			do {
-				p = g_utf8_next_char (p);
-				offset ++;
-			} while (p < jump);
-
-		} while (offset < 0);
-	}
-
-	return (gchar *)p;
-}
-
-glong
-g_utf8_pointer_to_offset (const gchar *str, const gchar *pos)
-{
-	const gchar *inptr, *inend;
-	glong offset = 0;
-	glong sign = 1;
-
-	if (pos == str)
-		return 0;
-
-	if (str < pos) {
-		inptr = str;
-		inend = pos;
-	} else {
-		inptr = pos;
-		inend = str;
-		sign = -1;
-	}
-
-	do {
-		inptr = g_utf8_next_char (inptr);
-		offset++;
-	} while (inptr < inend);
-
-	return offset * sign;
-}
diff --git a/src/mono/mono/eglib/giconv.c b/src/native/minipal/utf8converter.c
similarity index 68%
rename from src/mono/mono/eglib/giconv.c
rename to src/native/minipal/utf8converter.c
index 664ad31bba258a..0aeada3f4773a0 100644
--- a/src/mono/mono/eglib/giconv.c
+++ b/src/native/minipal/utf8converter.c
@@ -1,32 +1,7 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-/*
- *  Copyright (C) 2011 Jeffrey Stedfast
- *
- *  Permission is hereby granted, free of charge, to any person
- *  obtaining a copy of this software and associated documentation
- *  files (the "Software"), to deal in the Software without
- *  restriction, including without limitation the rights to use, copy,
- *  modify, merge, publish, distribute, sublicense, and/or sell copies
- *  of the Software, and to permit persons to whom the Software is
- *  furnished to do so, subject to the following conditions:
- *
- *  The above copyright notice and this permission notice shall be
- *  included in all copies or substantial portions of the Software.
- *
- *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- *  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- *  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- *  DEALINGS IN THE SOFTWARE.
- */
-#include <config.h>
-#include <glib.h>
-#include <string.h>
-#include <errno.h>
-#include "../utils/mono-errno.h"
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include <minipal/utf8converter.h>
 
 #ifdef _MSC_VER
 #define FORCE_INLINE(RET_TYPE) __forceinline RET_TYPE
@@ -34,40 +9,333 @@
 #define FORCE_INLINE(RET_TYPE) inline RET_TYPE __attribute__((always_inline))
 #endif
 
-
-#define UNROLL_DECODE_UTF8 0
-#define UNROLL_ENCODE_UTF8 0
-
-static int decode_utf32be (char *inbuf, size_t inleft, gunichar *outchar);
-static int encode_utf32be (gunichar c, char *outbuf, size_t outleft);
-
-static int decode_utf32le (char *inbuf, size_t inleft, gunichar *outchar);
-static int encode_utf32le (gunichar c, char *outbuf, size_t outleft);
-
-static int decode_utf16be (char *inbuf, size_t inleft, gunichar *outchar);
-static int encode_utf16be (gunichar c, char *outbuf, size_t outleft);
-
-static int decode_utf16le (char *inbuf, size_t inleft, gunichar *outchar);
-static int encode_utf16le (gunichar c, char *outbuf, size_t outleft);
-
-static FORCE_INLINE (int) decode_utf8 (char *inbuf, size_t inleft, gunichar *outchar);
-static int encode_utf8 (gunichar c, char *outbuf, size_t outleft);
-
-static int decode_latin1 (char *inbuf, size_t inleft, gunichar *outchar);
-static int encode_latin1 (gunichar c, char *outbuf, size_t outleft);
-
 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
 #define decode_utf32 decode_utf32le
 #define encode_utf32 encode_utf32le
 #define decode_utf16 decode_utf16le
 #define encode_utf16 encode_utf16le
+#define GUINT16_TO_LE(x) (x)
+#define GUINT16_TO_BE(x) GUINT16_SWAP_LE_BE(x)
 #else
 #define decode_utf32 decode_utf32be
 #define encode_utf32 encode_utf32be
 #define decode_utf16 decode_utf16be
 #define encode_utf16 encode_utf16be
+#define GUINT16_TO_LE(x) GUINT16_SWAP_LE_BE(x)
+#define GUINT16_TO_BE(x) (x)
 #endif
 
+/*
+ * Index into the table below with the first byte of a UTF-8 sequence to get
+ * the number of bytes that are supposed to follow it to complete the sequence.
+ *
+ * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is left
+ * as-is for anyone who may want to do such conversion, which was allowed in
+ * earlier algorithms.
+*/
+const guchar g_utf8_jump_table[256] = {
+	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+	3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
+};
+
+static gboolean
+utf8_validate (const unsigned char *inptr, size_t len)
+{
+	const unsigned char *ptr = inptr + len;
+	unsigned char c;
+
+	/* Everything falls through when TRUE... */
+	switch (len) {
+	default:
+		return FALSE;
+	case 4:
+		if ((c = (*--ptr)) < 0x80 || c > 0xBF)
+			return FALSE;
+
+		if ((c == 0xBF || c == 0xBE) && ptr[-1] == 0xBF) {
+			if (ptr[-2] == 0x8F || ptr[-2] == 0x9F ||
+			    ptr[-2] == 0xAF || ptr[-2] == 0xBF)
+				return FALSE;
+		}
+	case 3:
+		if ((c = (*--ptr)) < 0x80 || c > 0xBF)
+			return FALSE;
+	case 2:
+		if ((c = (*--ptr)) < 0x80 || c > 0xBF)
+			return FALSE;
+
+		/* no fall-through in this inner switch */
+		switch (*inptr) {
+		case 0xE0: if (c < 0xA0) return FALSE; break;
+		case 0xED: if (c > 0x9F) return FALSE; break;
+		case 0xEF: if (c == 0xB7 && (ptr[1] > 0x8F && ptr[1] < 0xB0)) return FALSE;
+			if (c == 0xBF && (ptr[1] == 0xBE || ptr[1] == 0xBF)) return FALSE;
+			break;
+		case 0xF0: if (c < 0x90) return FALSE; break;
+		case 0xF4: if (c > 0x8F) return FALSE; break;
+		default:   if (c < 0x80) return FALSE; break;
+		}
+	case 1: if (*inptr >= 0x80 && *inptr < 0xC2) return FALSE;
+	}
+
+	if (*inptr > 0xF4)
+		return FALSE;
+
+	return TRUE;
+}
+
+/**
+ * g_utf8_validate:
+ * @str: a utf-8 encoded string
+ * @max_len: max number of bytes to validate (or -1 to validate the entire null-terminated string)
+ * @end: output parameter to mark the end of the valid input
+ *
+ * Checks @utf for being valid UTF-8. @str is assumed to be
+ * null-terminated. This function is not super-strict, as it will
+ * allow longer UTF-8 sequences than necessary. Note that Java is
+ * capable of producing these sequences if provoked. Also note, this
+ * routine checks for the 4-byte maximum size, but does not check for
+ * 0x10ffff maximum value.
+ *
+ * Return value: %TRUE if @str is valid or %FALSE otherwise.
+ **/
+gboolean
+g_utf8_validate (const gchar *str, gssize max_len, const gchar **end)
+{
+	guchar *inptr = (guchar *) str;
+	gboolean valid = TRUE;
+	guint length, min;
+	gssize n = 0;
+
+	if (max_len == 0)
+		return FALSE;
+
+	if (max_len < 0) {
+		while (*inptr != 0) {
+			length = g_utf8_jump_table[*inptr];
+			if (!utf8_validate (inptr, length)) {
+				valid = FALSE;
+				break;
+			}
+
+			inptr += length;
+		}
+	} else {
+		while (n < max_len) {
+			if (*inptr == 0) {
+				/* Note: return FALSE if we encounter nul-byte
+				 * before max_len is reached. */
+				valid = FALSE;
+				break;
+			}
+
+			length = g_utf8_jump_table[*inptr];
+			min = MIN (length, GSSIZE_TO_UINT (max_len - n));
+
+			if (!utf8_validate (inptr, min)) {
+				valid = FALSE;
+				break;
+			}
+
+			if (min < length) {
+				valid = FALSE;
+				break;
+			}
+
+			inptr += length;
+			n += length;
+		}
+	}
+
+	if (end != NULL)
+		*end = (gchar *) inptr;
+
+	return valid;
+}
+
+gunichar
+g_utf8_get_char_validated (const gchar *str, gssize max_len)
+{
+	unsigned char *inptr = (unsigned char *) str;
+	gunichar u = *inptr;
+	int n, i;
+
+	if (max_len == 0)
+		return -2;
+
+	if (u < 0x80) {
+		/* simple ascii case */
+		return u;
+	} else if (u < 0xc2) {
+		return -1;
+	} else if (u < 0xe0) {
+		u &= 0x1f;
+		n = 2;
+	} else if (u < 0xf0) {
+		u &= 0x0f;
+		n = 3;
+	} else if (u < 0xf8) {
+		u &= 0x07;
+		n = 4;
+	} else if (u < 0xfc) {
+		u &= 0x03;
+		n = 5;
+	} else if (u < 0xfe) {
+		u &= 0x01;
+		n = 6;
+	} else {
+		return -1;
+	}
+
+	if (max_len > 0) {
+		if (!utf8_validate (inptr, MIN (max_len, n)))
+			return -1;
+
+		if (max_len < n)
+			return -2;
+	} else {
+		if (!utf8_validate (inptr, n))
+			return -1;
+	}
+
+	for (i = 1; i < n; i++)
+		u = (u << 6) | (*++inptr ^ 0x80);
+
+	return u;
+}
+
+glong
+g_utf8_strlen (const gchar *str, gssize max_len)
+{
+	const guchar *inptr = (const guchar *) str;
+	glong clen = 0, len = 0, n;
+
+	if (max_len == 0)
+		return 0;
+
+	if (max_len < 0) {
+		while (*inptr) {
+			inptr += g_utf8_jump_table[*inptr];
+			len++;
+		}
+	} else {
+		while (len < max_len && *inptr) {
+			n = g_utf8_jump_table[*inptr];
+			if ((clen + n) > max_len)
+				break;
+
+			inptr += n;
+			clen += n;
+			len++;
+		}
+	}
+
+	return len;
+}
+
+gunichar
+g_utf8_get_char (const gchar *src)
+{
+	unsigned char *inptr = (unsigned char *) src;
+	gunichar u = *inptr;
+	int n, i;
+
+	if (u < 0x80) {
+		/* simple ascii case */
+		return u;
+	} else if (u < 0xe0) {
+		u &= 0x1f;
+		n = 2;
+	} else if (u < 0xf0) {
+		u &= 0x0f;
+		n = 3;
+	} else if (u < 0xf8) {
+		u &= 0x07;
+		n = 4;
+	} else if (u < 0xfc) {
+		u &= 0x03;
+		n = 5;
+	} else {
+		u &= 0x01;
+		n = 6;
+	}
+
+	for (i = 1; i < n; i++)
+		u = (u << 6) | (*++inptr ^ 0x80);
+
+	return u;
+}
+
+gchar *
+g_utf8_offset_to_pointer (const gchar *str, glong offset)
+{
+	const gchar *p = str;
+
+	if (offset > 0) {
+		do {
+			p = g_utf8_next_char (p);
+			offset --;
+		} while (offset > 0);
+	}
+	else if (offset < 0) {
+		const gchar *jump = str;
+		do {
+			// since the minimum size of a character is 1
+			// we know we can step back at least offset bytes
+			jump = jump + offset;
+
+			// if we land in the middle of a character
+			// walk to the beginning
+			while ((*jump & 0xc0) == 0x80)
+				jump --;
+
+			// count how many characters we've actually walked
+			// by going forward
+			p = jump;
+			do {
+				p = g_utf8_next_char (p);
+				offset ++;
+			} while (p < jump);
+
+		} while (offset < 0);
+	}
+
+	return (gchar *)p;
+}
+
+glong
+g_utf8_pointer_to_offset (const gchar *str, const gchar *pos)
+{
+	const gchar *inptr, *inend;
+	glong offset = 0;
+	glong sign = 1;
+
+	if (pos == str)
+		return 0;
+
+	if (str < pos) {
+		inptr = str;
+		inend = pos;
+	} else {
+		inptr = pos;
+		inend = str;
+		sign = -1;
+	}
+
+	do {
+		inptr = g_utf8_next_char (inptr);
+		offset++;
+	} while (inptr < inend);
+
+	return offset * sign;
+}
+
 /*
  * Unicode encoders and decoders
  */
@@ -419,12 +687,12 @@ encode_latin1 (gunichar c, char *outbuf, size_t outleft)
  * Simple conversion API
  */
 
-static gpointer error_quark = (gpointer)"ConvertError";
+static gpointer g_error_quark = (gpointer)"ConvertError";
 
 gpointer
 g_convert_error_quark (void)
 {
-	return error_quark;
+	return g_error_quark;
 }
 /*
  * Unicode conversion
@@ -546,7 +814,7 @@ g_utf8_to_ucs4_fast (const gchar *str, glong len, glong *items_written)
 	if (items_written)
 		*items_written = n;
 
-	outptr = outbuf = g_malloc ((n + 1) * sizeof (gunichar));
+	outptr = outbuf = (gunichar *)g_malloc ((n + 1) * sizeof (gunichar));
 	inptr = (char *) str;
 
 	for (i = 0; i < n; i++) {
@@ -560,7 +828,7 @@ g_utf8_to_ucs4_fast (const gchar *str, glong len, glong *items_written)
 }
 
 static gunichar2 *
-eg_utf8_to_utf16_general (const gchar *str, glong len, glong *items_read, glong *items_written, gboolean include_nuls, gboolean replace_invalid_codepoints, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err, unsigned endian)
+eg_utf8_to_utf16_general (const gchar *str, glong len, glong *items_read, glong *items_written, gboolean include_nuls, gboolean replace_invalid_codepoints, gboolean null_terminate, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err, unsigned endian)
 {
 	gunichar2 *outbuf, *outptr;
 	size_t outlen = 0;
@@ -611,7 +879,7 @@ eg_utf8_to_utf16_general (const gchar *str, glong len, glong *items_read, glong
 		*items_written = (glong)outlen;
 
 	if (G_LIKELY (!custom_alloc_func))
-		outptr = outbuf = g_malloc ((outlen + 1) * sizeof (gunichar2));
+		outptr = outbuf = (gunichar2 *)g_malloc ((outlen + 1) * sizeof (gunichar2));
 	else
 		outptr = outbuf = (gunichar2 *)custom_alloc_func ((outlen + 1) * sizeof (gunichar2), custom_alloc_data);
 
@@ -642,7 +910,8 @@ eg_utf8_to_utf16_general (const gchar *str, glong len, glong *items_read, glong
 		inptr += n;
 	}
 
-	*outptr = '\0';
+	if (null_terminate)
+		*outptr = '\0';
 
 	return outbuf;
 
@@ -672,49 +941,55 @@ eg_utf8_to_utf16_general (const gchar *str, glong len, glong *items_read, glong
 gunichar2 *
 g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, NULL, NULL, err, G_BYTE_ORDER);
+	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, NULL, NULL, err, G_BYTE_ORDER);
 }
 
 gunichar2 *
 g_utf8_to_utf16be (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, NULL, NULL, err, G_BIG_ENDIAN);
+	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, NULL, NULL, err, G_BIG_ENDIAN);
 }
 
 gunichar2 *
 g_utf8_to_utf16le (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, NULL, NULL, err, G_LITTLE_ENDIAN);
+	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, NULL, NULL, err, G_LITTLE_ENDIAN);
 }
 
 gunichar2 *
 g_utf8_to_utf16_custom_alloc (const gchar *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, custom_alloc_func, custom_alloc_data, err, G_BYTE_ORDER);
+	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, custom_alloc_func, custom_alloc_data, err, G_BYTE_ORDER);
+}
+
+gunichar2 *
+g_utf8_to_utf16_custom_alloc_optional (const gchar *str, glong len, glong *items_read, glong *items_written, gboolean include_nuls, gboolean replace_invalid_codepoints, gboolean null_terminate, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
+{
+	return eg_utf8_to_utf16_general (str, len, items_read, items_written, include_nuls, replace_invalid_codepoints, null_terminate, custom_alloc_func, custom_alloc_data, err, G_BYTE_ORDER);
 }
 
 gunichar2 *
 g_utf8_to_utf16be_custom_alloc (const gchar *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, custom_alloc_func, custom_alloc_data, err, G_BIG_ENDIAN);
+	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, custom_alloc_func, custom_alloc_data, err, G_BIG_ENDIAN);
 }
 
 gunichar2 *
 g_utf8_to_utf16le_custom_alloc (const gchar *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, custom_alloc_func, custom_alloc_data, err, G_LITTLE_ENDIAN);
+	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, custom_alloc_func, custom_alloc_data, err, G_LITTLE_ENDIAN);
 }
 
 gunichar2 *
 eg_utf8_to_utf16_with_nuls (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, TRUE, FALSE, NULL, NULL, err, G_BYTE_ORDER);
+	return eg_utf8_to_utf16_general (str, len, items_read, items_written, TRUE, FALSE, TRUE, NULL, NULL, err, G_BYTE_ORDER);
 }
 
 gunichar2 *
 eg_wtf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, TRUE, TRUE, NULL, NULL, err, G_BYTE_ORDER);
+	return eg_utf8_to_utf16_general (str, len, items_read, items_written, TRUE, TRUE, TRUE, NULL, NULL, err, G_BYTE_ORDER);
 }
 
 gunichar *
@@ -769,7 +1044,7 @@ g_utf8_to_ucs4 (const gchar *str, glong len, glong *items_read, glong *items_wri
 	if (items_read)
 		*items_read = GPTRDIFF_TO_LONG (inptr - str);
 
-	outptr = outbuf = g_malloc (outlen + 4);
+	outptr = outbuf = (gunichar *)g_malloc (outlen + 4);
 	inptr = (char *) str;
 	inleft = len;
 
@@ -791,17 +1066,23 @@ g_utf8_to_ucs4 (const gchar *str, glong len, glong *items_read, glong *items_wri
 
 static
 gchar *
-eg_utf16_to_utf8_general (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err, unsigned endian)
+eg_utf16_to_utf8_general (const gunichar2 *str, glong len, glong *items_read, glong *items_written, gboolean include_nuls, gboolean replace_invalid_codepoints, gboolean null_terminate, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err, unsigned endian)
 {
 	char *inptr, *outbuf, *outptr;
 	size_t outlen = 0;
 	size_t inleft;
 	gunichar c;
+	gboolean replaced = FALSE;
 	int n;
 
 	g_return_val_if_fail (str != NULL, NULL);
 
 	if (len < 0) {
+		if (include_nuls) {
+			g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED, "Conversions with embedded nulls must pass the string length");
+			return NULL;
+		}
+
 		len = 0;
 		while (str[len])
 			len++;
@@ -818,30 +1099,37 @@ eg_utf16_to_utf8_general (const gunichar2 *str, glong len, glong *items_read, gl
 				inptr += 2;
 			}
 
-			if (errno == EILSEQ) {
+			if (errno == EILSEQ && !replace_invalid_codepoints) {
 				g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
 					     "Illegal byte sequence encountered in the input.");
-			} else if (items_read) {
+			} else if (items_read && !replace_invalid_codepoints) {
 				/* partial input is ok if we can let our caller know... */
 				break;
-			} else {
+			} else if (!replace_invalid_codepoints) {
 				g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
 					     "Partial byte sequence encountered in the input.");
 			}
 
-			if (items_read)
-				*items_read = GPTRDIFF_TO_LONG ((inptr - (char *) str) / 2);
+			if (replace_invalid_codepoints) {
+				n = sizeof(gunichar);
+				c = '?';
+				replaced = TRUE;
+			} else {
+				if (items_read)
+					*items_read = GPTRDIFF_TO_LONG ((inptr - (char *) str) / 2);
 
-			if (items_written)
-				*items_written = 0;
+				if (items_written)
+					*items_written = 0;
 
-			return NULL;
-		} else if (c == 0)
+				return NULL;
+			}
+		} else if (c == 0 && !include_nuls)
 			break;
 
-		outlen += g_unichar_to_utf8 (c, NULL);
+		outlen += (replaced && replace_invalid_codepoints) ? n - 1 : g_unichar_to_utf8 (c, NULL);
 		inleft -= n;
 		inptr += n;
+		replaced = FALSE;
 	}
 
 	if (items_read)
@@ -851,7 +1139,7 @@ eg_utf16_to_utf8_general (const gunichar2 *str, glong len, glong *items_read, gl
 		*items_written = (glong)outlen;
 
 	if (G_LIKELY (!custom_alloc_func))
-		outptr = outbuf = g_malloc (outlen + 1);
+		outptr = outbuf = (char *)g_malloc (outlen + 1);
 	else
 		outptr = outbuf = (char *)custom_alloc_func (outlen + 1, custom_alloc_data);
 
@@ -866,17 +1154,24 @@ eg_utf16_to_utf8_general (const gunichar2 *str, glong len, glong *items_read, gl
 	inleft = len * 2;
 
 	while (inleft > 0) {
-		if ((n = decode_utf16_endian (inptr, inleft, &c, endian)) < 0)
-			break;
-		else if (c == 0)
+		if ((n = decode_utf16_endian (inptr, inleft, &c, endian)) < 0) {
+			if (replace_invalid_codepoints) {
+				outptr += '?';
+				n = sizeof(gunichar);
+			} else
+				break;
+		} else if (c == 0 && !include_nuls) {
 			break;
+		} else {
+			outptr += g_unichar_to_utf8 (c, outptr);
+		}
 
-		outptr += g_unichar_to_utf8 (c, outptr);
 		inleft -= n;
 		inptr += n;
 	}
 
-	*outptr = '\0';
+	if (null_terminate)
+		*outptr = '\0';
 
 	return outbuf;
 }
@@ -884,25 +1179,31 @@ eg_utf16_to_utf8_general (const gunichar2 *str, glong len, glong *items_read, gl
 gchar *
 g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf16_to_utf8_general (str, len, items_read, items_written, NULL, NULL, err, G_BYTE_ORDER);
+	return eg_utf16_to_utf8_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, NULL, NULL, err, G_BYTE_ORDER);
 }
 
 gchar *
 g_utf16le_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf16_to_utf8_general (str, len, items_read, items_written, NULL, NULL, err, G_LITTLE_ENDIAN);
+	return eg_utf16_to_utf8_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, NULL, NULL, err, G_LITTLE_ENDIAN);
 }
 
 gchar *
 g_utf16be_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf16_to_utf8_general (str, len, items_read, items_written, NULL, NULL, err, G_BIG_ENDIAN);
+	return eg_utf16_to_utf8_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, NULL, NULL, err, G_BIG_ENDIAN);
 }
 
 gchar *
 g_utf16_to_utf8_custom_alloc (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
 {
-	return eg_utf16_to_utf8_general (str, len, items_read, items_written, custom_alloc_func, custom_alloc_data, err, G_BYTE_ORDER);
+	return eg_utf16_to_utf8_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, custom_alloc_func, custom_alloc_data, err, G_BYTE_ORDER);
+}
+
+gchar *
+g_utf16_to_utf8_custom_alloc_with_nulls (const gunichar2 *str, glong len, glong *items_read, glong *items_written, gboolean include_nuls, gboolean null_terminate, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
+{
+	return eg_utf16_to_utf8_general (str, len, items_read, items_written, include_nuls, TRUE, null_terminate, custom_alloc_func, custom_alloc_data, err, G_BYTE_ORDER);
 }
 
 gunichar *
@@ -966,7 +1267,7 @@ g_utf16_to_ucs4 (const gunichar2 *str, glong len, glong *items_read, glong *item
 	if (items_written)
 		*items_written = (glong)(outlen / 4);
 
-	outptr = outbuf = g_malloc (outlen + 4);
+	outptr = outbuf = (gunichar *)g_malloc (outlen + 4);
 	inptr = (char *) str;
 	inleft = len * 2;
 
@@ -1034,7 +1335,7 @@ g_ucs4_to_utf8 (const gunichar *str, glong len, glong *items_read, glong *items_
 
 	len = i;
 
-	outptr = outbuf = g_malloc (outlen + 1);
+	outptr = outbuf = (char *)g_malloc (outlen + 1);
 	for (i = 0; i < len; i++)
 		outptr += g_unichar_to_utf8 (str[i], outptr);
 	*outptr = 0;
@@ -1096,7 +1397,7 @@ g_ucs4_to_utf16 (const gunichar *str, glong len, glong *items_read, glong *items
 
 	len = i;
 
-	outptr = outbuf = g_malloc ((outlen + 1) * sizeof (gunichar2));
+	outptr = outbuf = (gunichar2 *)g_malloc ((outlen + 1) * sizeof (gunichar2));
 	for (i = 0; i < len; i++)
 		outptr += g_unichar_to_utf16 (str[i], outptr);
 	*outptr = 0;
diff --git a/src/native/minipal/utf8converter.h b/src/native/minipal/utf8converter.h
new file mode 100644
index 00000000000000..06cd677dfe1955
--- /dev/null
+++ b/src/native/minipal/utf8converter.h
@@ -0,0 +1,200 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef HAVE_MINIPAL_UTF8CONVERTER_H
+#define HAVE_MINIPAL_UTF8CONVERTER_H
+
+#include <config.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#ifndef CORECLR
+#include "glib.h"
+#endif
+
+#ifdef _MSC_VER
+#define FORCE_INLINE(RET_TYPE) __forceinline RET_TYPE
+#else
+#define FORCE_INLINE(RET_TYPE) inline RET_TYPE __attribute__((always_inline))
+#endif
+
+#if G_BYTE_ORDER == G_LITTLE_ENDIAN
+#define decode_utf32 decode_utf32le
+#define encode_utf32 encode_utf32le
+#define decode_utf16 decode_utf16le
+#define encode_utf16 encode_utf16le
+#define GUINT16_TO_LE(x) (x)
+#define GUINT16_TO_BE(x) GUINT16_SWAP_LE_BE(x)
+#else
+#define decode_utf32 decode_utf32be
+#define encode_utf32 encode_utf32be
+#define decode_utf16 decode_utf16be
+#define encode_utf16 encode_utf16be
+#define GUINT16_TO_LE(x) GUINT16_SWAP_LE_BE(x)
+#define GUINT16_TO_BE(x) (x)
+#endif
+
+#ifdef CORECLR
+
+#ifdef TARGET_64BIT
+#define ptrdiff_t int64_t
+#else
+#define ptrdiff_t int32_t
+#endif
+
+#define gunichar uint32_t
+#define gunichar2 uint16_t
+#define guint uint32_t
+#define gchar char
+#define guchar unsigned char
+#define gboolean bool
+#define gsize size_t
+#define gssize ptrdiff_t
+#define gint int32_t
+#define glong long
+#define gptrdiff ptrdiff_t
+#define guint8 uint8_t
+#define guint16 uint16_t
+#define gpointer void*
+#define g_malloc malloc
+#define TRUE 1
+#define FALSE 0
+#ifndef MIN
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#endif
+
+typedef void* (*GCustomAllocator) (size_t req_size, void* custom_alloc_data);
+
+typedef struct {
+	/* In the real glib, this is a GQuark, but we dont use/need that */
+	void* domain;
+	int32_t code;
+	char *message;
+} GError;
+
+typedef struct {
+	void* buffer;
+	size_t buffer_size;
+	size_t req_buffer_size;
+} GFixedBufferCustomAllocatorData;
+
+typedef enum {
+	G_CONVERT_ERROR_NO_CONVERSION,
+	G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+	G_CONVERT_ERROR_FAILED,
+	G_CONVERT_ERROR_PARTIAL_INPUT,
+	G_CONVERT_ERROR_BAD_URI,
+	G_CONVERT_ERROR_NOT_ABSOLUTE_PATH,
+	G_CONVERT_ERROR_NO_MEMORY
+} GConvertError;
+
+#define UNROLL_DECODE_UTF8 0
+#define UNROLL_ENCODE_UTF8 0
+
+static int decode_utf32be (char *inbuf, size_t inleft, uint32_t *outchar);
+static int encode_utf32be (uint32_t c, char *outbuf, size_t outleft);
+
+static int decode_utf32le (char *inbuf, size_t inleft, uint32_t *outchar);
+static int encode_utf32le (uint32_t c, char *outbuf, size_t outleft);
+
+static int decode_utf16be (char *inbuf, size_t inleft, uint32_t *outchar);
+static int encode_utf16be (uint32_t c, char *outbuf, size_t outleft);
+
+static int decode_utf16le (char *inbuf, size_t inleft, uint32_t *outchar);
+static int encode_utf16le (uint32_t c, char *outbuf, size_t outleft);
+
+static FORCE_INLINE (int) decode_utf8 (char *inbuf, size_t inleft, uint32_t *outchar);
+static int encode_utf8 (uint32_t c, char *outbuf, size_t outleft);
+
+static int decode_latin1 (char *inbuf, size_t inleft, uint32_t *outchar);
+static int encode_latin1 (uint32_t c, char *outbuf, size_t outleft);
+
+#define G_LITTLE_ENDIAN 1234
+#define G_BIG_ENDIAN 4321
+#define GUINT16_SWAP_LE_BE(x) ((uint16_t) (((uint16_t) x) >> 8) | ((((uint16_t)(x)) & 0xff) << 8))
+
+#ifdef BIGENDIAN
+#define G_BYTE_ORDER G_BIG_ENDIAN
+#else
+#define G_BYTE_ORDER G_LITTLE_ENDIAN
+#endif
+
+#define G_CAST_TYPE_TO_TYPE(src,dest,v) ((dest)(v))
+#define G_CAST_PTRTYPE_TO_STYPE(src,dest,v) ((dest)(gssize)(v))
+#define GUINT32_TO_UINT16(v) G_CAST_TYPE_TO_TYPE(guint32, guint16, v)
+#define GSIZE_TO_INT(v) G_CAST_TYPE_TO_TYPE(gsize, gint, v)
+#define GSSIZE_TO_UINT(v) G_CAST_TYPE_TO_TYPE(gssize, guint, v)
+#define GUNICHAR_TO_UINT8(v) G_CAST_TYPE_TO_TYPE(gunichar, guint8, v)
+#define GUNICHAR_TO_UINT16(v) G_CAST_TYPE_TO_TYPE(gunichar, guint16, v)
+#define GUNICHAR_TO_CHAR(v) G_CAST_TYPE_TO_TYPE(gunichar, gchar, v)
+#define GPTRDIFF_TO_LONG(v) G_CAST_PTRTYPE_TO_STYPE(gptrdiff, glong, v)
+#define g_return_val_if_fail(x,e)  do { if (!(x)) { printf ("%s:%d: assertion '%s' failed\n", __FILE__, __LINE__, #x); return (e); } } while(0)
+#define g_utf8_next_char(p) ((p) + g_utf8_jump_table[(unsigned char)(*p)])
+
+#if defined(__GNUC__) && (__GNUC__ > 2)
+#define G_LIKELY(expr) (__builtin_expect ((expr) != 0, 1))
+#define G_UNLIKELY(expr) (__builtin_expect ((expr) != 0, 0))
+#else
+#define G_LIKELY(x) (x)
+#define G_UNLIKELY(x) (x)
+#endif
+
+void
+g_set_error (GError **err, void* domain, int32_t code, const char *format, ...)
+{
+	va_list args;
+
+	if (err) {
+		*err = (GError *) malloc (sizeof (GError));
+		(*err)->domain = domain;
+		(*err)->code = code;
+
+		va_start (args, format);
+		int s = vsnprintf(NULL, 0, format, args);
+		va_end(args);
+
+		if (s > -1)
+		{
+			(*err)->message = (char*)malloc(s);
+
+			va_start(args, format);
+			vsnprintf((*err)->message, s, format, args);
+			va_end (args);
+		}
+	}
+}
+
+#define G_CONVERT_ERROR g_convert_error_quark()
+
+inline static void
+mono_set_errno (int errno_val)
+{
+	errno = errno_val;
+}
+
+#endif // CORECLR
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Unicode encoders and decoders
+ */
+
+gunichar2 *
+g_utf8_to_utf16_custom_alloc_optional (const gchar *str, glong len, glong *items_read, glong *items_written, gboolean include_nuls, gboolean replace_invalid_codepoints, gboolean null_terminate, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err);
+
+gchar *
+g_utf16_to_utf8_custom_alloc_with_nulls (const gunichar2 *str, glong len, glong *items_read, glong *items_written, gboolean include_nuls, gboolean null_terminate, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err);
+
+#ifdef __cplusplus
+}
+#endif // extern "C"
+
+#endif //HAVE_MINIPAL_UTF8CONVERTER_H

From 3eca7a47f0913226206714f8fc75c820a79b951d Mon Sep 17 00:00:00 2001
From: Adeel <3840695+am11@users.noreply.github.com>
Date: Thu, 18 May 2023 20:37:27 +0300
Subject: [PATCH 2/9] Revert "Share UTF8 converters between coreclr and mono -
 v1"

This reverts commit f9845ac6f53dc95fb747eb21351dfa9412397217.
---
 src/coreclr/pal/src/CMakeLists.txt            |    4 +-
 src/coreclr/pal/src/locale/unicode.cpp        |   93 +-
 src/coreclr/pal/src/locale/utf8.cpp           | 2937 +++++++++++++++++
 .../MultiByteToWideChar/test4/test4.cpp       |    2 +-
 .../WideCharToMultiByte/test5/test5.cpp       |    2 +-
 .../TypeBuilder/TypeBuilderDefineEvent.cs     |    2 +-
 .../TypeBuilder/TypeBuilderDefineProperty.cs  |    2 +-
 src/mono/mono/eglib/CMakeLists.txt            |    5 +-
 .../mono/eglib/giconv.c}                      |  481 +--
 src/mono/mono/eglib/glib.h                    |    1 -
 src/mono/mono/eglib/gutf8.c                   |  323 ++
 src/native/minipal/utf8converter.h            |  200 --
 12 files changed, 3369 insertions(+), 683 deletions(-)
 create mode 100644 src/coreclr/pal/src/locale/utf8.cpp
 rename src/{native/minipal/utf8converter.c => mono/mono/eglib/giconv.c} (68%)
 create mode 100644 src/mono/mono/eglib/gutf8.c
 delete mode 100644 src/native/minipal/utf8converter.h

diff --git a/src/coreclr/pal/src/CMakeLists.txt b/src/coreclr/pal/src/CMakeLists.txt
index 22d9c29594dd27..bd5a6bdf4d5b22 100644
--- a/src/coreclr/pal/src/CMakeLists.txt
+++ b/src/coreclr/pal/src/CMakeLists.txt
@@ -152,7 +152,7 @@ set(SOURCES
   loader/module.cpp
   locale/unicode.cpp
   locale/unicodedata.cpp
-  ${CLR_SRC_NATIVE_DIR}/minipal/utf8converter.c
+  locale/utf8.cpp
   map/common.cpp
   map/map.cpp
   map/virtual.cpp
@@ -213,8 +213,6 @@ set(SOURCES
   thread/threadsusp.cpp
 )
 
-set_source_files_properties("${CLR_SRC_NATIVE_DIR}/minipal/utf8converter.c" PROPERTIES COMPILE_FLAGS -Wno-implicit-fallthrough)
-
 if(NOT CLR_CMAKE_USE_SYSTEM_LIBUNWIND)
   set(LIBUNWIND_OBJECTS $<TARGET_OBJECTS:libunwind>)
 endif(NOT CLR_CMAKE_USE_SYSTEM_LIBUNWIND)
diff --git a/src/coreclr/pal/src/locale/unicode.cpp b/src/coreclr/pal/src/locale/unicode.cpp
index b4c832c3d2e599..f29eabc07d9be3 100644
--- a/src/coreclr/pal/src/locale/unicode.cpp
+++ b/src/coreclr/pal/src/locale/unicode.cpp
@@ -34,7 +34,6 @@ Revision History:
 #include <errno.h>
 
 #include <debugmacrosext.h>
-#include <minipal/utf8converter.h>
 
 using namespace CorUnix;
 
@@ -228,7 +227,7 @@ MultiByteToWideChar(
         OUT LPWSTR lpWideCharStr,
         IN int cchWideChar)
 {
-    long retval = 0;
+    INT retval =0;
 
     PERF_ENTRY(MultiByteToWideChar);
     ENTRY("MultiByteToWideChar(CodePage=%u, dwFlags=%#x, lpMultiByteStr=%p (%s),"
@@ -254,51 +253,16 @@ MultiByteToWideChar(
         goto EXIT;
     }
 
-    // Use g_utf8_to_utf16_custom_alloc_optional on all systems, since it replaces
+    // Use UTF8ToUnicode on all systems, since it replaces
     // invalid characters and Core Foundation doesn't do that.
     if (CodePage == CP_UTF8 || CodePage == CP_ACP)
     {
-        int inputLength = (int)strlen(lpMultiByteStr);
-        bool allowNulls = (cbMultiByte > 0 && lpMultiByteStr[cbMultiByte - 1] != '\0');
-        bool subtractOne = cbMultiByte == cchWideChar || allowNulls;
         if (cbMultiByte <= -1)
         {
-            cbMultiByte = inputLength + 1;
+        cbMultiByte = strlen(lpMultiByteStr) + 1;
         }
 
-        size_t allocSize = 0;
-        struct cookie { LPWSTR str; size_t* allocSize; int* count; };
-        cookie callbackCookie = { .str = lpWideCharStr,  .allocSize = &allocSize, .count = &cchWideChar };
-
-        long itemsWritten;
-        GError *gerror = NULL;
-        lpWideCharStr = (LPWSTR)g_utf8_to_utf16_custom_alloc_optional(lpMultiByteStr, cbMultiByte, &retval, &itemsWritten, allowNulls,
-            !(dwFlags & MB_ERR_INVALID_CHARS), cbMultiByte > inputLength,
-            [](size_t req_size, void* custom_alloc_data)
-            {
-                cookie* callbackCookie = (cookie*)(custom_alloc_data);
-                *(callbackCookie->allocSize) = (req_size / sizeof (gunichar2));
-                int count = *(callbackCookie->count);
-                return (void*)(callbackCookie->str && !(count && *(callbackCookie->allocSize) - 1 > (size_t)count) ? callbackCookie->str : NULL);
-            }, &callbackCookie, &gerror);
-
-        if (gerror && (lpWideCharStr || (cchWideChar && allocSize > (size_t)cchWideChar)))
-        {
-            retval = 0;
-            ERROR ("The error is %d %s\n", gerror->code, gerror->message);
-            switch (gerror->code)
-            {
-                case G_CONVERT_ERROR_ILLEGAL_SEQUENCE: SetLastError(ERROR_NO_UNICODE_TRANSLATION); break;
-                case G_CONVERT_ERROR_NO_MEMORY: SetLastError(ERROR_INSUFFICIENT_BUFFER); break;
-                default: SetLastError(ERROR_INVALID_PARAMETER); break;
-            }
-            free(gerror);
-            goto EXIT;
-        }
-
-        retval = allocSize;
-        if (retval > 1 && subtractOne) retval -= 1;
-
+        retval = UTF8ToUnicode(lpMultiByteStr, cbMultiByte, lpWideCharStr, cchWideChar, dwFlags);
         goto EXIT;
     }
 
@@ -310,7 +274,7 @@ MultiByteToWideChar(
 
     LOGEXIT("MultiByteToWideChar returns %d.\n",retval);
     PERF_EXIT(MultiByteToWideChar);
-    return (int)retval;
+    return retval;
 }
 
 
@@ -333,7 +297,7 @@ WideCharToMultiByte(
         IN LPCSTR lpDefaultChar,
         OUT LPBOOL lpUsedDefaultChar)
 {
-    long retval = 0;
+    INT retval =0;
     char defaultChar = '?';
     BOOL usedDefaultChar = FALSE;
 
@@ -374,50 +338,15 @@ WideCharToMultiByte(
         defaultChar = *lpDefaultChar;
     }
 
-    // Use g_utf16_to_utf8_custom_alloc_with_nulls on all systems because we use
-    // g_utf8_to_utf16 in MultiByteToWideChar() on all systems.
+    // Use UnicodeToUTF8 on all systems because we use
+    // UTF8ToUnicode in MultiByteToWideChar() on all systems.
     if (CodePage == CP_UTF8 || CodePage == CP_ACP)
     {
-        int inputLength = (int)PAL_wcslen(lpWideCharStr);
-        bool allowNulls = (cchWideChar > 0 && lpWideCharStr[cchWideChar - 1] != '\0');
-        bool subtractOne = cchWideChar == cbMultiByte || allowNulls;
         if (cchWideChar == -1)
         {
-            cchWideChar = inputLength + 1;
+            cchWideChar = PAL_wcslen(lpWideCharStr) + 1;
         }
-
-        size_t allocSize = 0;
-        struct cookie { LPSTR str; size_t* allocSize; int* count; };
-        cookie callbackCookie = { .str = lpMultiByteStr,  .allocSize = &allocSize, .count = &cbMultiByte };
-
-        long itemsWritten;
-        GError *gerror = NULL;
-        lpMultiByteStr = g_utf16_to_utf8_custom_alloc_with_nulls((unsigned short*)lpWideCharStr, cchWideChar, &retval, &itemsWritten, allowNulls, cchWideChar > inputLength,
-            [](size_t req_size, void* custom_alloc_data)
-            {
-                cookie* callbackCookie = (cookie*)(custom_alloc_data);
-                *(callbackCookie->allocSize) = req_size;
-                int count = (size_t)*(callbackCookie->count);
-                return (void*)(callbackCookie->str && !(count && *(callbackCookie->allocSize) - 1 > (size_t)count) ? callbackCookie->str : NULL);
-            }, &callbackCookie, &gerror);
-
-        if (gerror && (lpMultiByteStr || (cbMultiByte && allocSize > (size_t)cbMultiByte)))
-        {
-            retval = 0;
-            ERROR ("The error is %d %s\n", gerror->code, gerror->message);
-            switch (gerror->code)
-            {
-                case G_CONVERT_ERROR_ILLEGAL_SEQUENCE: SetLastError(ERROR_NO_UNICODE_TRANSLATION); break;
-                case G_CONVERT_ERROR_NO_MEMORY: SetLastError(ERROR_INSUFFICIENT_BUFFER); break;
-                default: SetLastError(ERROR_INVALID_PARAMETER); break;
-            }
-            free(gerror);
-            goto EXIT;
-        }
-
-        retval = allocSize;
-        if (retval > 1  && subtractOne) retval -= 1;
-
+        retval = UnicodeToUTF8(lpWideCharStr, cchWideChar, lpMultiByteStr, cbMultiByte);
         goto EXIT;
     }
 
@@ -445,7 +374,7 @@ WideCharToMultiByte(
 
     LOGEXIT("WideCharToMultiByte returns INT %d\n", retval);
     PERF_EXIT(WideCharToMultiByte);
-    return (int)retval;
+    return retval;
 }
 
 extern char * g_szCoreCLRPath;
diff --git a/src/coreclr/pal/src/locale/utf8.cpp b/src/coreclr/pal/src/locale/utf8.cpp
new file mode 100644
index 00000000000000..f07c69ff7e15f3
--- /dev/null
+++ b/src/coreclr/pal/src/locale/utf8.cpp
@@ -0,0 +1,2937 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+/*++
+
+Module Name:
+
+    unicode/utf8.c
+
+Abstract:
+    Functions to encode and decode UTF-8 strings. This is a port of the C# version from Utf8Encoding.cs.
+
+Revision History:
+
+--*/
+
+#include "pal/utf8.h"
+#include "pal/malloc.hpp"
+
+using namespace CorUnix;
+
+#define FASTLOOP
+
+struct CharUnicodeInfo
+{
+    static const WCHAR HIGH_SURROGATE_START = 0xd800;
+    static const WCHAR HIGH_SURROGATE_END = 0xdbff;
+    static const WCHAR LOW_SURROGATE_START = 0xdc00;
+    static const WCHAR LOW_SURROGATE_END = 0xdfff;
+};
+
+struct Char
+{
+    // Test if the wide character is a high surrogate
+    static bool IsHighSurrogate(const WCHAR c)
+    {
+        return (c & 0xFC00) == CharUnicodeInfo::HIGH_SURROGATE_START;
+    }
+
+    // Test if the wide character is a low surrogate
+    static bool IsLowSurrogate(const WCHAR c)
+    {
+        return (c & 0xFC00) == CharUnicodeInfo::LOW_SURROGATE_START;
+    }
+
+    // Test if the wide character is a surrogate half
+    static bool IsSurrogate(const WCHAR c)
+    {
+        return (c & 0xF800) == CharUnicodeInfo::HIGH_SURROGATE_START;
+    }
+
+    // Test if the wide character is a high surrogate
+    static bool IsHighSurrogate(const WCHAR* s, int index)
+    {
+        return IsHighSurrogate(s[index]);
+    }
+
+    // Test if the wide character is a low surrogate
+    static bool IsLowSurrogate(const WCHAR* s, int index)
+    {
+        return IsLowSurrogate(s[index]);
+    }
+
+    // Test if the wide character is a surrogate half
+    static bool IsSurrogate(const WCHAR* s, int index)
+    {
+        return IsSurrogate(s[index]);
+    }
+};
+
+class ArgumentException
+{
+
+public:
+    ArgumentException(LPCSTR message)
+    {
+    }
+
+    ArgumentException(LPCSTR message, LPCSTR argName)
+    {
+    }
+};
+
+class ArgumentNullException : public ArgumentException
+{
+public:
+    ArgumentNullException(LPCSTR argName)
+        : ArgumentException("Argument is NULL", argName)
+    {
+
+    }
+};
+
+class ArgumentOutOfRangeException : public ArgumentException
+{
+public:
+    ArgumentOutOfRangeException(LPCSTR argName, LPCSTR message)
+        : ArgumentException(message, argName)
+    {
+
+    }
+};
+
+class InsufficientBufferException : public ArgumentException
+{
+public:
+    InsufficientBufferException(LPCSTR message, LPCSTR argName)
+        : ArgumentException(message, argName)
+    {
+
+    }
+};
+
+class Contract
+{
+public:
+    static void Assert(bool cond, LPCSTR str)
+    {
+        if (!cond)
+        {
+            throw ArgumentException(str);
+        }
+    }
+
+    static void EndContractBlock()
+    {
+    }
+};
+
+class DecoderFallbackException : public ArgumentException
+{
+    BYTE *bytesUnknown;
+    int index;
+
+public:
+    DecoderFallbackException(
+        LPCSTR message, BYTE bytesUnknown[], int index) : ArgumentException(message)
+    {
+        this->bytesUnknown = bytesUnknown;
+        this->index = index;
+    }
+
+    BYTE *BytesUnknown()
+    {
+        return (bytesUnknown);
+    }
+
+    int GetIndex()
+    {
+        return index;
+    }
+};
+
+class DecoderFallbackBuffer;
+
+class DecoderFallback
+{
+public:
+
+    // Fallback
+    //
+    // Return the appropriate unicode string alternative to the character that need to fall back.
+
+    virtual DecoderFallbackBuffer* CreateFallbackBuffer() = 0;
+
+    // Maximum number of characters that this instance of this fallback could return
+
+    virtual int GetMaxCharCount() = 0;
+};
+
+class DecoderReplacementFallback : public DecoderFallback
+{
+    // Our variables
+    WCHAR strDefault[2];
+    int strDefaultLength;
+
+public:
+    // Construction.  Default replacement fallback uses no best fit and ? replacement string
+    DecoderReplacementFallback() : DecoderReplacementFallback(W("?"))
+    {
+    }
+
+    DecoderReplacementFallback(const WCHAR* replacement)
+    {
+        // Must not be null
+        if (replacement == nullptr)
+            throw ArgumentNullException("replacement");
+        Contract::EndContractBlock();
+
+        // Make sure it doesn't have bad surrogate pairs
+        bool bFoundHigh = false;
+        int replacementLength = PAL_wcslen((const WCHAR *)replacement);
+        for (int i = 0; i < replacementLength; i++)
+        {
+            // Found a surrogate?
+            if (Char::IsSurrogate(replacement, i))
+            {
+                // High or Low?
+                if (Char::IsHighSurrogate(replacement, i))
+                {
+                    // if already had a high one, stop
+                    if (bFoundHigh)
+                        break;  // break & throw at the bFoundHIgh below
+                    bFoundHigh = true;
+                }
+                else
+                {
+                    // Low, did we have a high?
+                    if (!bFoundHigh)
+                    {
+                        // Didn't have one, make if fail when we stop
+                        bFoundHigh = true;
+                        break;
+                    }
+
+                    // Clear flag
+                    bFoundHigh = false;
+                }
+            }
+            // If last was high we're in trouble (not surrogate so not low surrogate, so break)
+            else if (bFoundHigh)
+                break;
+        }
+        if (bFoundHigh)
+            throw ArgumentException("String 'replacement' contains invalid Unicode code points.", "replacement");
+
+        wcscpy_s(strDefault, ARRAY_SIZE(strDefault), replacement);
+        strDefaultLength = replacementLength;
+    }
+
+    WCHAR* GetDefaultString()
+    {
+        return strDefault;
+    }
+
+    virtual DecoderFallbackBuffer* CreateFallbackBuffer();
+
+    // Maximum number of characters that this instance of this fallback could return
+    virtual int GetMaxCharCount()
+    {
+        return strDefaultLength;
+    }
+};
+
+class DecoderFallbackBuffer
+{
+    friend class UTF8Encoding;
+    // Most implementations will probably need an implementation-specific constructor
+
+    // internal methods that cannot be overridden that let us do our fallback thing
+    // These wrap the internal methods so that we can check for people doing stuff that's incorrect
+
+public:
+    virtual ~DecoderFallbackBuffer() = default;
+
+    virtual bool Fallback(BYTE bytesUnknown[], int index, int size) = 0;
+
+    // Get next character
+    virtual WCHAR GetNextChar() = 0;
+
+    //Back up a character
+    virtual bool MovePrevious() = 0;
+
+    // How many chars left in this fallback?
+    virtual int GetRemaining() = 0;
+
+    // Clear the buffer
+    virtual void Reset()
+    {
+        while (GetNextChar() != (WCHAR)0);
+    }
+
+    // Internal items to help us figure out what we're doing as far as error messages, etc.
+    // These help us with our performance and messages internally
+protected:
+    BYTE*           byteStart;
+    WCHAR*          charEnd;
+
+    // Internal reset
+    void InternalReset()
+    {
+        byteStart = nullptr;
+        Reset();
+    }
+
+    // Set the above values
+    // This can't be part of the constructor because EncoderFallbacks would have to know how to implement these.
+    void InternalInitialize(BYTE* byteStart, WCHAR* charEnd)
+    {
+        this->byteStart = byteStart;
+        this->charEnd = charEnd;
+    }
+
+    // Fallback the current byte by sticking it into the remaining char buffer.
+    // This can only be called by our encodings (other have to use the public fallback methods), so
+    // we can use our DecoderNLS here too (except we don't).
+    // Returns true if we are successful, false if we can't fallback the character (no buffer space)
+    // So caller needs to throw buffer space if return false.
+    // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
+    // array, and we might need the index, hence the byte*
+    // Don't touch ref chars unless we succeed
+    virtual bool InternalFallback(BYTE bytes[], BYTE* pBytes, WCHAR** chars, int size)
+    {
+
+        Contract::Assert(byteStart != nullptr, "[DecoderFallback.InternalFallback]Used InternalFallback without calling InternalInitialize");
+
+        // See if there's a fallback character and we have an output buffer then copy our string.
+        if (this->Fallback(bytes, (int)(pBytes - byteStart - size), size))
+        {
+            // Copy the chars to our output
+            WCHAR ch;
+            WCHAR* charTemp = *chars;
+            bool bHighSurrogate = false;
+            while ((ch = GetNextChar()) != 0)
+            {
+                // Make sure no mixed up surrogates
+                if (Char::IsSurrogate(ch))
+                {
+                    if (Char::IsHighSurrogate(ch))
+                    {
+                        // High Surrogate
+                        if (bHighSurrogate)
+                            throw ArgumentException("String 'chars' contains invalid Unicode code points.");
+                        bHighSurrogate = true;
+                    }
+                    else
+                    {
+                        // Low surrogate
+                        if (!bHighSurrogate)
+                            throw ArgumentException("String 'chars' contains invalid Unicode code points.");
+                        bHighSurrogate = false;
+                    }
+                }
+
+                if (charTemp >= charEnd)
+                {
+                    // No buffer space
+                    return false;
+                }
+
+                *(charTemp++) = ch;
+            }
+
+            // Need to make sure that bHighSurrogate isn't true
+            if (bHighSurrogate)
+                throw ArgumentException("String 'chars' contains invalid Unicode code points.");
+
+            // Now we aren't going to be false, so its OK to update chars
+            *chars = charTemp;
+        }
+
+        return true;
+    }
+
+    // This version just counts the fallback and doesn't actually copy anything.
+    virtual int InternalFallback(BYTE bytes[], BYTE* pBytes, int size)
+        // Right now this has both bytes[] and BYTE* bytes, since we might have extra bytes, hence the
+        // array, and we might need the index, hence the byte*
+    {
+
+        Contract::Assert(byteStart != nullptr, "[DecoderFallback.InternalFallback]Used InternalFallback without calling InternalInitialize");
+
+        // See if there's a fallback character and we have an output buffer then copy our string.
+        if (this->Fallback(bytes, (int)(pBytes - byteStart - size), size))
+        {
+            int count = 0;
+
+            WCHAR ch;
+            bool bHighSurrogate = false;
+            while ((ch = GetNextChar()) != 0)
+            {
+                // Make sure no mixed up surrogates
+                if (Char::IsSurrogate(ch))
+                {
+                    if (Char::IsHighSurrogate(ch))
+                    {
+                        // High Surrogate
+                        if (bHighSurrogate)
+                            throw ArgumentException("String 'chars' contains invalid Unicode code points.");
+                        bHighSurrogate = true;
+                    }
+                    else
+                    {
+                        // Low surrogate
+                        if (!bHighSurrogate)
+                            throw ArgumentException("String 'chars' contains invalid Unicode code points.");
+                        bHighSurrogate = false;
+                    }
+                }
+
+                count++;
+            }
+
+            // Need to make sure that bHighSurrogate isn't true
+            if (bHighSurrogate)
+                throw ArgumentException("String 'chars' contains invalid Unicode code points.");
+
+            return count;
+        }
+
+        // If no fallback return 0
+        return 0;
+    }
+
+    // private helper methods
+    void ThrowLastBytesRecursive(BYTE bytesUnknown[])
+    {
+        throw ArgumentException("Recursive fallback not allowed");
+    }
+};
+
+class DecoderReplacementFallbackBuffer : public DecoderFallbackBuffer
+{
+    // Store our default string
+    WCHAR strDefault[2];
+    int strDefaultLength;
+    int fallbackCount = -1;
+    int fallbackIndex = -1;
+
+public:
+    // Construction
+    DecoderReplacementFallbackBuffer(DecoderReplacementFallback* fallback)
+    {
+        wcscpy_s(strDefault, ARRAY_SIZE(strDefault), fallback->GetDefaultString());
+        strDefaultLength = PAL_wcslen((const WCHAR *)fallback->GetDefaultString());
+    }
+
+    // Fallback Methods
+    virtual bool Fallback(BYTE bytesUnknown[], int index, int size)
+    {
+        // We expect no previous fallback in our buffer
+        // We can't call recursively but others might (note, we don't test on last char!!!)
+        if (fallbackCount >= 1)
+        {
+            ThrowLastBytesRecursive(bytesUnknown);
+        }
+
+        // Go ahead and get our fallback
+        if (strDefaultLength == 0)
+            return false;
+
+        fallbackCount = strDefaultLength;
+        fallbackIndex = -1;
+
+        return true;
+    }
+
+    virtual WCHAR GetNextChar()
+    {
+        // We want it to get < 0 because == 0 means that the current/last character is a fallback
+        // and we need to detect recursion.  We could have a flag but we already have this counter.
+        fallbackCount--;
+        fallbackIndex++;
+
+        // Do we have anything left? 0 is now last fallback char, negative is nothing left
+        if (fallbackCount < 0)
+            return '\0';
+
+        // Need to get it out of the buffer.
+        // Make sure it didn't wrap from the fast count-- path
+        if (fallbackCount == INT_MAX)
+        {
+            fallbackCount = -1;
+            return '\0';
+        }
+
+        // Now make sure its in the expected range
+        Contract::Assert(fallbackIndex < strDefaultLength && fallbackIndex >= 0,
+            "Index exceeds buffer range");
+
+        return strDefault[fallbackIndex];
+    }
+
+    virtual bool MovePrevious()
+    {
+        // Back up one, only if we just processed the last character (or earlier)
+        if (fallbackCount >= -1 && fallbackIndex >= 0)
+        {
+            fallbackIndex--;
+            fallbackCount++;
+            return true;
+        }
+
+        // Return false 'cause we couldn't do it.
+        return false;
+    }
+
+    // How many characters left to output?
+    virtual int GetRemaining()
+    {
+        // Our count is 0 for 1 character left.
+        return (fallbackCount < 0) ? 0 : fallbackCount;
+    }
+
+    // Clear the buffer
+    virtual void Reset()
+    {
+        fallbackCount = -1;
+        fallbackIndex = -1;
+        byteStart = nullptr;
+    }
+
+    // This version just counts the fallback and doesn't actually copy anything.
+    virtual int InternalFallback(BYTE bytes[], BYTE* pBytes, int size)
+        // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
+        // array, and we might need the index, hence the byte*
+    {
+        // return our replacement string Length
+        return strDefaultLength;
+    }
+};
+
+class DecoderExceptionFallbackBuffer : public DecoderFallbackBuffer
+{
+public:
+    DecoderExceptionFallbackBuffer()
+    {
+    }
+
+    virtual bool Fallback(BYTE bytesUnknown[], int index, int size)
+    {
+        throw DecoderFallbackException(
+            "Unable to translate UTF-8 character to Unicode", bytesUnknown, index);
+    }
+
+    virtual WCHAR GetNextChar()
+    {
+        return 0;
+    }
+
+    virtual bool MovePrevious()
+    {
+        // Exception fallback doesn't have anywhere to back up to.
+        return false;
+    }
+
+    // Exceptions are always empty
+    virtual int GetRemaining()
+    {
+        return 0;
+    }
+
+};
+
+class DecoderExceptionFallback : public DecoderFallback
+{
+    // Construction
+public:
+    DecoderExceptionFallback()
+    {
+    }
+
+    virtual DecoderFallbackBuffer* CreateFallbackBuffer()
+    {
+        return InternalNew<DecoderExceptionFallbackBuffer>();
+    }
+
+    // Maximum number of characters that this instance of this fallback could return
+    virtual int GetMaxCharCount()
+    {
+        return 0;
+    }
+};
+
+DecoderFallbackBuffer* DecoderReplacementFallback::CreateFallbackBuffer()
+{
+    return InternalNew<DecoderReplacementFallbackBuffer>(this);
+}
+
+class EncoderFallbackException : public ArgumentException
+{
+    WCHAR   charUnknown;
+    WCHAR   charUnknownHigh;
+    WCHAR   charUnknownLow;
+    int     index;
+
+public:
+    EncoderFallbackException(
+        LPCSTR message, WCHAR charUnknown, int index) : ArgumentException(message)
+    {
+        this->charUnknown = charUnknown;
+        this->index = index;
+    }
+
+    EncoderFallbackException(
+        LPCSTR message, WCHAR charUnknownHigh, WCHAR charUnknownLow, int index) : ArgumentException(message)
+    {
+        if (!Char::IsHighSurrogate(charUnknownHigh))
+        {
+            throw ArgumentOutOfRangeException("charUnknownHigh",
+                "Argument out of range 0xD800..0xDBFF");
+        }
+        if (!Char::IsLowSurrogate(charUnknownLow))
+        {
+            throw ArgumentOutOfRangeException("charUnknownLow",
+                "Argument out of range 0xDC00..0xDFFF");
+        }
+        Contract::EndContractBlock();
+
+        this->charUnknownHigh = charUnknownHigh;
+        this->charUnknownLow = charUnknownLow;
+        this->index = index;
+    }
+
+    WCHAR GetCharUnknown()
+    {
+        return (charUnknown);
+    }
+
+    WCHAR GetCharUnknownHigh()
+    {
+        return (charUnknownHigh);
+    }
+
+    WCHAR GetCharUnknownLow()
+    {
+        return (charUnknownLow);
+    }
+
+    int GetIndex()
+    {
+        return index;
+    }
+
+    // Return true if the unknown character is a surrogate pair.
+    bool IsUnknownSurrogate()
+    {
+        return (charUnknownHigh != '\0');
+    }
+};
+
+class EncoderFallbackBuffer;
+
+class EncoderFallback
+{
+public:
+
+    // Fallback
+    //
+    // Return the appropriate unicode string alternative to the character that need to fall back.
+
+    virtual EncoderFallbackBuffer* CreateFallbackBuffer() = 0;
+
+    // Maximum number of characters that this instance of this fallback could return
+    virtual int GetMaxCharCount() = 0;
+};
+
+class EncoderReplacementFallback : public EncoderFallback
+{
+    // Our variables
+    WCHAR strDefault[2];
+    int strDefaultLength;
+
+public:
+    // Construction.  Default replacement fallback uses no best fit and ? replacement string
+    EncoderReplacementFallback() : EncoderReplacementFallback(W("?"))
+    {
+    }
+
+    EncoderReplacementFallback(const WCHAR* replacement)
+    {
+        // Must not be null
+        if (replacement == nullptr)
+            throw ArgumentNullException("replacement");
+        Contract::EndContractBlock();
+
+        // Make sure it doesn't have bad surrogate pairs
+        bool bFoundHigh = false;
+        int replacementLength = PAL_wcslen((const WCHAR *)replacement);
+        for (int i = 0; i < replacementLength; i++)
+        {
+            // Found a surrogate?
+            if (Char::IsSurrogate(replacement, i))
+            {
+                // High or Low?
+                if (Char::IsHighSurrogate(replacement, i))
+                {
+                    // if already had a high one, stop
+                    if (bFoundHigh)
+                        break;  // break & throw at the bFoundHIgh below
+                    bFoundHigh = true;
+                }
+                else
+                {
+                    // Low, did we have a high?
+                    if (!bFoundHigh)
+                    {
+                        // Didn't have one, make if fail when we stop
+                        bFoundHigh = true;
+                        break;
+                    }
+
+                    // Clear flag
+                    bFoundHigh = false;
+                }
+            }
+            // If last was high we're in trouble (not surrogate so not low surrogate, so break)
+            else if (bFoundHigh)
+                break;
+        }
+        if (bFoundHigh)
+            throw ArgumentException("String 'replacement' contains invalid Unicode code points.", "replacement");
+
+        wcscpy_s(strDefault, ARRAY_SIZE(strDefault), replacement);
+        strDefaultLength = replacementLength;
+    }
+
+    WCHAR* GetDefaultString()
+    {
+        return strDefault;
+    }
+
+    virtual EncoderFallbackBuffer* CreateFallbackBuffer();
+
+    // Maximum number of characters that this instance of this fallback could return
+    virtual int GetMaxCharCount()
+    {
+        return strDefaultLength;
+    }
+};
+
+class EncoderFallbackBuffer
+{
+    friend class UTF8Encoding;
+    // Most implementations will probably need an implementation-specific constructor
+
+    // Public methods that cannot be overridden that let us do our fallback thing
+    // These wrap the internal methods so that we can check for people doing stuff that is incorrect
+
+public:
+    virtual ~EncoderFallbackBuffer() = default;
+
+    virtual bool Fallback(WCHAR charUnknown, int index) = 0;
+
+    virtual bool Fallback(WCHAR charUnknownHigh, WCHAR charUnknownLow, int index) = 0;
+
+    // Get next character
+    virtual WCHAR GetNextChar() = 0;
+
+    // Back up a character
+    virtual bool MovePrevious() = 0;
+
+    // How many chars left in this fallback?
+    virtual int GetRemaining() = 0;
+
+    // Not sure if this should be public or not.
+    // Clear the buffer
+    virtual void Reset()
+    {
+        while (GetNextChar() != (WCHAR)0);
+    }
+
+    // Internal items to help us figure out what we're doing as far as error messages, etc.
+    // These help us with our performance and messages internally
+protected:
+    WCHAR*          charStart;
+    WCHAR*          charEnd;
+    bool            setEncoder;
+    bool            bUsedEncoder;
+    bool            bFallingBack = false;
+    int             iRecursionCount = 0;
+    static const int iMaxRecursion = 250;
+
+    // Internal Reset
+    // For example, what if someone fails a conversion and wants to reset one of our fallback buffers?
+    void InternalReset()
+    {
+        charStart = nullptr;
+        bFallingBack = false;
+        iRecursionCount = 0;
+        Reset();
+    }
+
+    // Set the above values
+    // This can't be part of the constructor because EncoderFallbacks would have to know how to implement these.
+    void InternalInitialize(WCHAR* charStart, WCHAR* charEnd, bool setEncoder)
+    {
+        this->charStart = charStart;
+        this->charEnd = charEnd;
+        this->setEncoder = setEncoder;
+        this->bUsedEncoder = false;
+        this->bFallingBack = false;
+        this->iRecursionCount = 0;
+    }
+
+    WCHAR InternalGetNextChar()
+    {
+        WCHAR ch = GetNextChar();
+        bFallingBack = (ch != 0);
+        if (ch == 0) iRecursionCount = 0;
+        return ch;
+    }
+
+    // Fallback the current character using the remaining buffer and encoder if necessary
+    // This can only be called by our encodings (other have to use the public fallback methods), so
+    // we can use our EncoderNLS here too.
+    // setEncoder is true if we're calling from a GetBytes method, false if we're calling from a GetByteCount
+    //
+    // Note that this could also change the contents of this->encoder, which is the same
+    // object that the caller is using, so the caller could mess up the encoder for us
+    // if they aren't careful.
+    virtual bool InternalFallback(WCHAR ch, WCHAR** chars)
+    {
+        // Shouldn't have null charStart
+        Contract::Assert(charStart != nullptr,
+            "[EncoderFallback.InternalFallbackBuffer]Fallback buffer is not initialized");
+
+        // Get our index, remember chars was preincremented to point at next char, so have to -1
+        int index = (int)(*chars - charStart) - 1;
+
+        // See if it was a high surrogate
+        if (Char::IsHighSurrogate(ch))
+        {
+            // See if there's a low surrogate to go with it
+            if (*chars >= this->charEnd)
+            {
+                // Nothing left in input buffer
+                // No input, return 0
+            }
+            else
+            {
+                // Might have a low surrogate
+                WCHAR cNext = **chars;
+                if (Char::IsLowSurrogate(cNext))
+                {
+                    // If already falling back then fail
+                    if (bFallingBack && iRecursionCount++ > iMaxRecursion)
+                        ThrowLastCharRecursive(ch, cNext);
+
+                    // Next is a surrogate, add it as surrogate pair, and increment chars
+                    (*chars)++;
+                    bFallingBack = Fallback(ch, cNext, index);
+                    return bFallingBack;
+                }
+
+                // Next isn't a low surrogate, just fallback the high surrogate
+            }
+        }
+
+        // If already falling back then fail
+        if (bFallingBack && iRecursionCount++ > iMaxRecursion)
+            ThrowLastCharRecursive((int)ch);
+
+        // Fall back our char
+        bFallingBack = Fallback(ch, index);
+
+        return bFallingBack;
+    }
+
+    // private helper methods
+    void ThrowLastCharRecursive(WCHAR highSurrogate, WCHAR lowSurrogate)
+    {
+        // Throw it, using our complete character
+        throw ArgumentException("Recursive fallback not allowed", "chars");
+    }
+
+    void ThrowLastCharRecursive(int utf32Char)
+    {
+        throw ArgumentException("Recursive fallback not allowed", "chars");
+    }
+
+};
+
+class EncoderReplacementFallbackBuffer : public EncoderFallbackBuffer
+{
+    // Store our default string
+    WCHAR strDefault[4];
+    int strDefaultLength;
+    int fallbackCount = -1;
+    int fallbackIndex = -1;
+public:
+    // Construction
+    EncoderReplacementFallbackBuffer(EncoderReplacementFallback* fallback)
+    {
+        // 2X in case we're a surrogate pair
+        wcscpy_s(strDefault, ARRAY_SIZE(strDefault), fallback->GetDefaultString());
+        wcscat_s(strDefault, ARRAY_SIZE(strDefault), fallback->GetDefaultString());
+        strDefaultLength = 2 * PAL_wcslen((const WCHAR *)fallback->GetDefaultString());
+
+    }
+
+    // Fallback Methods
+    virtual bool Fallback(WCHAR charUnknown, int index)
+    {
+        // If we had a buffer already we're being recursive, throw, it's probably at the suspect
+        // character in our array.
+        if (fallbackCount >= 1)
+        {
+            // If we're recursive we may still have something in our buffer that makes this a surrogate
+            if (Char::IsHighSurrogate(charUnknown) && fallbackCount >= 0 &&
+                Char::IsLowSurrogate(strDefault[fallbackIndex + 1]))
+                ThrowLastCharRecursive(charUnknown, strDefault[fallbackIndex + 1]);
+
+            // Nope, just one character
+            ThrowLastCharRecursive((int)charUnknown);
+        }
+
+        // Go ahead and get our fallback
+        // Divide by 2 because we aren't a surrogate pair
+        fallbackCount = strDefaultLength / 2;
+        fallbackIndex = -1;
+
+        return fallbackCount != 0;
+    }
+
+    virtual bool Fallback(WCHAR charUnknownHigh, WCHAR charUnknownLow, int index)
+    {
+        // Double check input surrogate pair
+        if (!Char::IsHighSurrogate(charUnknownHigh))
+            throw ArgumentOutOfRangeException("charUnknownHigh",
+            "Argument out of range 0xD800..0xDBFF");
+
+        if (!Char::IsLowSurrogate(charUnknownLow))
+            throw ArgumentOutOfRangeException("charUnknownLow",
+            "Argument out of range 0xDC00..0xDFFF");
+        Contract::EndContractBlock();
+
+        // If we had a buffer already we're being recursive, throw, it's probably at the suspect
+        // character in our array.
+        if (fallbackCount >= 1)
+            ThrowLastCharRecursive(charUnknownHigh, charUnknownLow);
+
+        // Go ahead and get our fallback
+        fallbackCount = strDefaultLength;
+        fallbackIndex = -1;
+
+        return fallbackCount != 0;
+    }
+
+    virtual WCHAR GetNextChar()
+    {
+        // We want it to get < 0 because == 0 means that the current/last character is a fallback
+        // and we need to detect recursion.  We could have a flag but we already have this counter.
+        fallbackCount--;
+        fallbackIndex++;
+
+        // Do we have anything left? 0 is now last fallback char, negative is nothing left
+        if (fallbackCount < 0)
+            return '\0';
+
+        // Need to get it out of the buffer.
+        // Make sure it didn't wrap from the fast count-- path
+        if (fallbackCount == INT_MAX)
+        {
+            fallbackCount = -1;
+            return '\0';
+        }
+
+        // Now make sure its in the expected range
+        Contract::Assert(fallbackIndex < strDefaultLength && fallbackIndex >= 0,
+            "Index exceeds buffer range");
+
+        return strDefault[fallbackIndex];
+    }
+
+    virtual bool MovePrevious()
+    {
+        // Back up one, only if we just processed the last character (or earlier)
+        if (fallbackCount >= -1 && fallbackIndex >= 0)
+        {
+            fallbackIndex--;
+            fallbackCount++;
+            return true;
+        }
+
+        // Return false 'cause we couldn't do it.
+        return false;
+    }
+
+    // How many characters left to output?
+    virtual int GetRemaining()
+    {
+        // Our count is 0 for 1 character left.
+        return (fallbackCount < 0) ? 0 : fallbackCount;
+    }
+
+    // Clear the buffer
+    virtual void Reset()
+    {
+        fallbackCount = -1;
+        fallbackIndex = 0;
+        charStart = nullptr;
+        bFallingBack = false;
+    }
+};
+
+class EncoderExceptionFallbackBuffer : public EncoderFallbackBuffer
+{
+public:
+    EncoderExceptionFallbackBuffer()
+    {
+    }
+
+    virtual bool Fallback(WCHAR charUnknown, int index)
+    {
+        // Fall back our char
+        throw EncoderFallbackException("Unable to translate Unicode character to UTF-8", charUnknown, index);
+    }
+
+    virtual bool Fallback(WCHAR charUnknownHigh, WCHAR charUnknownLow, int index)
+    {
+        if (!Char::IsHighSurrogate(charUnknownHigh))
+        {
+            throw ArgumentOutOfRangeException("charUnknownHigh",
+                "Argument out of range 0xD800..0xDBFF");
+        }
+        if (!Char::IsLowSurrogate(charUnknownLow))
+        {
+            throw ArgumentOutOfRangeException("charUnknownLow",
+                "Argument out of range 0xDC00..0xDFFF");
+        }
+        Contract::EndContractBlock();
+
+        //int iTemp = Char::ConvertToUtf32(charUnknownHigh, charUnknownLow);
+
+        // Fall back our char
+        throw EncoderFallbackException(
+            "Unable to translate Unicode character to UTF-8", charUnknownHigh, charUnknownLow, index);
+    }
+
+    virtual WCHAR GetNextChar()
+    {
+        return 0;
+    }
+
+    virtual bool MovePrevious()
+    {
+        // Exception fallback doesn't have anywhere to back up to.
+        return false;
+    }
+
+    // Exceptions are always empty
+    virtual int GetRemaining()
+    {
+        return 0;
+    }
+};
+
+class EncoderExceptionFallback : public EncoderFallback
+{
+    // Construction
+public:
+    EncoderExceptionFallback()
+    {
+    }
+
+    virtual EncoderFallbackBuffer* CreateFallbackBuffer()
+    {
+        return InternalNew<EncoderExceptionFallbackBuffer>();
+    }
+
+    // Maximum number of characters that this instance of this fallback could return
+    virtual int GetMaxCharCount()
+    {
+        return 0;
+    }
+};
+
+EncoderFallbackBuffer* EncoderReplacementFallback::CreateFallbackBuffer()
+{
+    return InternalNew<EncoderReplacementFallbackBuffer>(this);
+}
+
+class UTF8Encoding
+{
+    EncoderFallback* encoderFallback;
+    // Instances of the two possible fallbacks. The constructor parameter
+    // determines which one to use.
+    EncoderReplacementFallback encoderReplacementFallback;
+    EncoderExceptionFallback encoderExceptionFallback;
+
+    DecoderFallback* decoderFallback;
+    // Instances of the two possible fallbacks. The constructor parameter
+    // determines which one to use.
+    DecoderReplacementFallback decoderReplacementFallback;
+    DecoderExceptionFallback decoderExceptionFallback;
+
+    bool InRange(int c, int begin, int end)
+    {
+        return begin <= c && c <= end;
+    }
+
+    size_t PtrDiff(WCHAR* ptr1, WCHAR* ptr2)
+    {
+        return ptr1 - ptr2;
+    }
+
+    size_t PtrDiff(BYTE* ptr1, BYTE* ptr2)
+    {
+        return ptr1 - ptr2;
+    }
+
+    void ThrowBytesOverflow()
+    {
+        // Special message to include fallback type in case fallback's GetMaxCharCount is broken
+        // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
+        throw InsufficientBufferException("The output byte buffer is too small to contain the encoded data", "bytes");
+    }
+
+    void ThrowBytesOverflow(bool nothingEncoded)
+    {
+        // Special message to include fallback type in case fallback's GetMaxCharCount is broken
+        // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
+        if (nothingEncoded){
+            ThrowBytesOverflow();
+        }
+    }
+
+    void ThrowCharsOverflow()
+    {
+        // Special message to include fallback type in case fallback's GetMaxCharCount is broken
+        // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
+        throw InsufficientBufferException("The output char buffer is too small to contain the encoded data", "chars");
+    }
+
+    void ThrowCharsOverflow(bool nothingEncoded)
+    {
+        // Special message to include fallback type in case fallback's GetMaxCharCount is broken
+        // This happens if user has implemented an decoder fallback with a broken GetMaxCharCount
+        if (nothingEncoded){
+            ThrowCharsOverflow();
+        }
+    }
+
+    // During GetChars we had an invalid byte sequence
+    // pSrc is backed up to the start of the bad sequence if we didn't have room to
+    // fall it back.  Otherwise pSrc remains where it is.
+    bool FallbackInvalidByteSequence(BYTE** pSrc, int ch, DecoderFallbackBuffer* fallback, WCHAR** pTarget)
+    {
+        // Get our byte[]
+        BYTE* pStart = *pSrc;
+        BYTE bytesUnknown[3];
+        int size = GetBytesUnknown(pStart, ch, bytesUnknown);
+
+        // Do the actual fallback
+        if (!fallback->InternalFallback(bytesUnknown, *pSrc, pTarget, size))
+        {
+            // Oops, it failed, back up to pStart
+            *pSrc = pStart;
+            return false;
+        }
+
+        // It worked
+        return true;
+    }
+
+    int FallbackInvalidByteSequence(BYTE* pSrc, int ch, DecoderFallbackBuffer *fallback)
+    {
+        // Get our byte[]
+        BYTE bytesUnknown[3];
+        int size = GetBytesUnknown(pSrc, ch, bytesUnknown);
+
+        // Do the actual fallback
+        int count = fallback->InternalFallback(bytesUnknown, pSrc, size);
+
+        // # of fallback chars expected.
+        // Note that we only get here for "long" sequences, and have already unreserved
+        // the count that we prereserved for the input bytes
+        return count;
+    }
+
+    int GetBytesUnknown(BYTE* pSrc, int ch, BYTE* bytesUnknown)
+    {
+        int size;
+
+        // See if it was a plain char
+        // (have to check >= 0 because we have all sorts of weird bit flags)
+        if (ch < 0x100 && ch >= 0)
+        {
+            pSrc--;
+            bytesUnknown[0] = (BYTE)ch;
+            size =  1;
+        }
+        // See if its an unfinished 2 byte sequence
+        else if ((ch & (SupplimentarySeq | ThreeByteSeq)) == 0)
+        {
+            pSrc--;
+            bytesUnknown[0] = (BYTE)((ch & 0x1F) | 0xc0);
+            size = 1;
+        }
+        // So now we're either 2nd byte of 3 or 4 byte sequence or
+        // we hit a non-trail byte or we ran out of space for 3rd byte of 4 byte sequence
+        // 1st check if its a 4 byte sequence
+        else if ((ch & SupplimentarySeq) != 0)
+        {
+            //  3rd byte of 4 byte sequence?
+            if ((ch & (FinalByte >> 6)) != 0)
+            {
+                // 3rd byte of 4 byte sequence
+                pSrc -= 3;
+                bytesUnknown[0] = (BYTE)(((ch >> 12) & 0x07) | 0xF0);
+                bytesUnknown[1] = (BYTE)(((ch >> 6) & 0x3F) | 0x80);
+                bytesUnknown[2] = (BYTE)(((ch)& 0x3F) | 0x80);
+                size = 3;
+            }
+            else if ((ch & (FinalByte >> 12)) != 0)
+            {
+                // 2nd byte of a 4 byte sequence
+                pSrc -= 2;
+                bytesUnknown[0] = (BYTE)(((ch >> 6) & 0x07) | 0xF0);
+                bytesUnknown[1] = (BYTE)(((ch)& 0x3F) | 0x80);
+                size = 2;
+            }
+            else
+            {
+                // 4th byte of a 4 byte sequence
+                pSrc--;
+                bytesUnknown[0] = (BYTE)(((ch)& 0x07) | 0xF0);
+                size = 1;
+            }
+        }
+        else
+        {
+            // 2nd byte of 3 byte sequence?
+            if ((ch & (FinalByte >> 6)) != 0)
+            {
+                // So its 2nd byte of a 3 byte sequence
+                pSrc -= 2;
+                bytesUnknown[0] = (BYTE)(((ch >> 6) & 0x0F) | 0xE0);
+                bytesUnknown[1] = (BYTE)(((ch)& 0x3F) | 0x80);
+                size = 2;
+            }
+            else
+            {
+                // 1st byte of a 3 byte sequence
+                pSrc--;
+                bytesUnknown[0] = (BYTE)(((ch)& 0x0F) | 0xE0);
+                size = 1;
+            }
+        }
+
+        return size;
+    }
+
+public:
+
+    UTF8Encoding(bool isThrowException)
+        : encoderReplacementFallback(W("\xFFFD")), decoderReplacementFallback(W("\xFFFD"))
+    {
+        if (isThrowException)
+        {
+            encoderFallback = &encoderExceptionFallback;
+            decoderFallback = &decoderExceptionFallback;
+        }
+        else
+        {
+            encoderFallback = &encoderReplacementFallback;
+            decoderFallback = &decoderReplacementFallback;
+        }
+    }
+
+    // These are bitmasks used to maintain the state in the decoder. They occupy the higher bits
+    // while the actual character is being built in the lower bits. They are shifted together
+    // with the actual bits of the character.
+
+    // bits 30 & 31 are used for pending bits fixup
+    const int FinalByte = 1 << 29;
+    const int SupplimentarySeq = 1 << 28;
+    const int ThreeByteSeq = 1 << 27;
+
+    int GetCharCount(BYTE* bytes, int count)
+    {
+        Contract::Assert(bytes != nullptr, "[UTF8Encoding.GetCharCount]bytes!=nullptr");
+        Contract::Assert(count >= 0, "[UTF8Encoding.GetCharCount]count >=0");
+
+        // Initialize stuff
+        BYTE *pSrc = bytes;
+        BYTE *pEnd = pSrc + count;
+
+        // Start by assuming we have as many as count, charCount always includes the adjustment
+        // for the character being decoded
+        int charCount = count;
+        int ch = 0;
+        DecoderFallbackBuffer *fallback = nullptr;
+
+        while (true)
+        {
+            // SLOWLOOP: does all range checks, handles all special cases, but it is slow
+            if (pSrc >= pEnd) {
+                break;
+            }
+
+            // read next byte. The JIT optimization seems to be getting confused when
+            // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
+            int cha = *pSrc;
+
+            if (ch == 0) {
+                // no pending bits
+                goto ReadChar;
+            }
+
+            pSrc++;
+
+            // we are expecting to see trailing bytes like 10vvvvvv
+            if ((cha & 0xC0) != 0x80) {
+                // This can be a valid starting byte for another UTF8 byte sequence, so let's put
+                // the current byte back, and try to see if this is a valid byte for another UTF8 byte sequence
+                pSrc--;
+                charCount += (ch >> 30);
+                goto InvalidByteSequence;
+            }
+
+            // fold in the new byte
+            ch = (ch << 6) | (cha & 0x3F);
+
+            if ((ch & FinalByte) == 0) {
+                Contract::Assert((ch & (SupplimentarySeq | ThreeByteSeq)) != 0,
+                    "[UTF8Encoding.GetChars]Invariant volation");
+
+                if ((ch & SupplimentarySeq) != 0) {
+                    if ((ch & (FinalByte >> 6)) != 0) {
+                        // this is 3rd byte (of 4 byte supplimentary) - nothing to do
+                        continue;
+                    }
+
+                    // 2nd byte, check for non-shortest form of supplimentary char and the valid
+                    // supplimentary characters in range 0x010000 - 0x10FFFF at the same time
+                    if (!InRange(ch & 0x1F0, 0x10, 0x100)) {
+                        goto InvalidByteSequence;
+                    }
+                }
+                else {
+                    // Must be 2nd byte of a 3-byte sequence
+                    // check for non-shortest form of 3 byte seq
+                    if ((ch & (0x1F << 5)) == 0 ||                  // non-shortest form
+                        (ch & (0xF800 >> 6)) == (0xD800 >> 6))     // illegal individually encoded surrogate
+                    {
+                        goto InvalidByteSequence;
+                    }
+                }
+                continue;
+            }
+
+            // ready to punch
+
+            // adjust for surrogates in non-shortest form
+            if ((ch & (SupplimentarySeq | 0x1F0000)) == SupplimentarySeq) {
+                charCount--;
+            }
+            goto EncodeChar;
+
+        InvalidByteSequence:
+            // this code fragment should be close to the gotos referencing it
+            // Have to do fallback for invalid bytes
+            if (fallback == nullptr)
+            {
+                fallback = decoderFallback->CreateFallbackBuffer();
+                fallback->InternalInitialize(bytes, nullptr);
+            }
+            charCount += FallbackInvalidByteSequence(pSrc, ch, fallback);
+
+            ch = 0;
+            continue;
+
+        ReadChar:
+            ch = *pSrc;
+            pSrc++;
+
+        ProcessChar:
+            if (ch > 0x7F) {
+                // If its > 0x7F, its start of a new multi-byte sequence
+
+                // Long sequence, so unreserve our char.
+                charCount--;
+
+                // bit 6 has to be non-zero for start of multibyte chars.
+                if ((ch & 0x40) == 0) {
+                    // Unexpected trail byte
+                    goto InvalidByteSequence;
+                }
+
+                // start a new long code
+                if ((ch & 0x20) != 0) {
+                    if ((ch & 0x10) != 0) {
+                        // 4 byte encoding - supplimentary character (2 surrogates)
+
+                        ch &= 0x0F;
+
+                        // check that bit 4 is zero and the valid supplimentary character
+                        // range 0x000000 - 0x10FFFF at the same time
+                        if (ch > 0x04) {
+                            ch |= 0xf0;
+                            goto InvalidByteSequence;
+                        }
+
+                        // Add bit flags so that when we check new characters & rotate we'll be flagged correctly.
+                        // Final byte flag, count fix if we don't make final byte & supplimentary sequence flag.
+                        ch |= (FinalByte >> 3 * 6) |  // Final byte is 3 more bytes from now
+                            (1 << 30) |           // If it dies on next byte we'll need an extra char
+                            (3 << (30 - 2 * 6)) |     // If it dies on last byte we'll need to subtract a char
+                            (SupplimentarySeq) | (SupplimentarySeq >> 6) |
+                            (SupplimentarySeq >> 2 * 6) | (SupplimentarySeq >> 3 * 6);
+
+                        // Our character count will be 2 characters for these 4 bytes, so subtract another char
+                        charCount--;
+                    }
+                    else {
+                        // 3 byte encoding
+                        // Add bit flags so that when we check new characters & rotate we'll be flagged correctly.
+                        ch = (ch & 0x0F) | ((FinalByte >> 2 * 6) | (1 << 30) |
+                            (ThreeByteSeq) | (ThreeByteSeq >> 6) | (ThreeByteSeq >> 2 * 6));
+
+                        // We'll expect 1 character for these 3 bytes, so subtract another char.
+                        charCount--;
+                    }
+                }
+                else {
+                    // 2 byte encoding
+
+                    ch &= 0x1F;
+
+                    // check for non-shortest form
+                    if (ch <= 1) {
+                        ch |= 0xc0;
+                        goto InvalidByteSequence;
+                    }
+
+                    // Add bit flags so we'll be flagged correctly
+                    ch |= (FinalByte >> 6);
+                }
+                continue;
+            }
+
+        EncodeChar:
+
+#ifdef FASTLOOP
+            int availableBytes = PtrDiff(pEnd, pSrc);
+
+            // don't fall into the fast decoding loop if we don't have enough bytes
+            if (availableBytes <= 13) {
+                // try to get over the remainder of the ascii characters fast though
+            BYTE* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
+                while (pSrc < pLocalEnd) {
+                    ch = *pSrc;
+                    pSrc++;
+
+                    if (ch > 0x7F)
+                        goto ProcessChar;
+                }
+                // we are done
+                ch = 0;
+                break;
+            }
+
+            // To compute the upper bound, assume that all characters are ASCII characters at this point,
+            //  the boundary will be decreased for every non-ASCII character we encounter
+            // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
+            BYTE *pStop = pSrc + availableBytes - 7;
+
+            while (pSrc < pStop) {
+                ch = *pSrc;
+                pSrc++;
+
+                if (ch > 0x7F) {
+                    goto LongCode;
+                }
+
+                // get pSrc 2-byte aligned
+                if (((size_t)pSrc & 0x1) != 0) {
+                    ch = *pSrc;
+                    pSrc++;
+                    if (ch > 0x7F) {
+                        goto LongCode;
+                    }
+                }
+
+                // get pSrc 4-byte aligned
+                if (((size_t)pSrc & 0x2) != 0) {
+                    ch = *(USHORT*)pSrc;
+                    if ((ch & 0x8080) != 0) {
+                        goto LongCodeWithMask16;
+                    }
+                    pSrc += 2;
+                }
+
+
+                // Run 8 + 8 characters at a time!
+                while (pSrc < pStop) {
+                    ch = *(int*)pSrc;
+                    int chb = *(int*)(pSrc + 4);
+                    if (((ch | chb) & (int)0x80808080) != 0) {
+                        goto LongCodeWithMask32;
+                    }
+                    pSrc += 8;
+
+                    // This is a really small loop - unroll it
+                    if (pSrc >= pStop)
+                        break;
+
+                    ch = *(int*)pSrc;
+                    chb = *(int*)(pSrc + 4);
+                    if (((ch | chb) & (int)0x80808080) != 0) {
+                        goto LongCodeWithMask32;
+                    }
+                    pSrc += 8;
+                }
+                break;
+
+#if BIGENDIAN
+            LongCodeWithMask32 :
+                // be careful about the sign extension
+                ch = (int)(((uint)ch) >> 16);
+            LongCodeWithMask16:
+                ch = (int)(((uint)ch) >> 8);
+#else // BIGENDIAN
+            LongCodeWithMask32:
+            LongCodeWithMask16:
+                ch &= 0xFF;
+#endif // BIGENDIAN
+                pSrc++;
+                if (ch <= 0x7F) {
+                    continue;
+                }
+
+            LongCode:
+                int chc = *pSrc;
+                pSrc++;
+
+                if (
+                    // bit 6 has to be zero
+                    (ch & 0x40) == 0 ||
+                    // we are expecting to see trailing bytes like 10vvvvvv
+                    (chc & 0xC0) != 0x80)
+                {
+                    goto BadLongCode;
+                }
+
+                chc &= 0x3F;
+
+                // start a new long code
+                if ((ch & 0x20) != 0) {
+
+                    // fold the first two bytes together
+                    chc |= (ch & 0x0F) << 6;
+
+                    if ((ch & 0x10) != 0) {
+                        // 4 byte encoding - surrogate
+                        ch = *pSrc;
+                        if (
+                            // check that bit 4 is zero, the non-shortest form of surrogate
+                            // and the valid surrogate range 0x000000 - 0x10FFFF at the same time
+                            !InRange(chc >> 4, 0x01, 0x10) ||
+                            // we are expecting to see trailing bytes like 10vvvvvv
+                            (ch & 0xC0) != 0x80)
+                        {
+                            goto BadLongCode;
+                        }
+
+                        chc = (chc << 6) | (ch & 0x3F);
+
+                        ch = *(pSrc + 1);
+                        // we are expecting to see trailing bytes like 10vvvvvv
+                        if ((ch & 0xC0) != 0x80) {
+                            goto BadLongCode;
+                        }
+                        pSrc += 2;
+
+                        // extra byte
+                        charCount--;
+                    }
+                    else {
+                        // 3 byte encoding
+                        ch = *pSrc;
+                        if (
+                            // check for non-shortest form of 3 byte seq
+                            (chc & (0x1F << 5)) == 0 ||
+                            // Can't have surrogates here.
+                            (chc & (0xF800 >> 6)) == (0xD800 >> 6) ||
+                            // we are expecting to see trailing bytes like 10vvvvvv
+                            (ch & 0xC0) != 0x80)
+                        {
+                            goto BadLongCode;
+                        }
+                        pSrc++;
+
+                        // extra byte
+                        charCount--;
+                    }
+                }
+                else {
+                    // 2 byte encoding
+
+                    // check for non-shortest form
+                    if ((ch & 0x1E) == 0) {
+                        goto BadLongCode;
+                    }
+                }
+
+                // extra byte
+                charCount--;
+            }
+#endif // FASTLOOP
+
+            // no pending bits at this point
+            ch = 0;
+            continue;
+
+        BadLongCode:
+            pSrc -= 2;
+            ch = 0;
+            continue;
+        }
+
+        // May have a problem if we have to flush
+        if (ch != 0)
+        {
+            // We were already adjusting for these, so need to unadjust
+            charCount += (ch >> 30);
+            // Have to do fallback for invalid bytes
+            if (fallback == nullptr)
+            {
+                fallback = decoderFallback->CreateFallbackBuffer();
+                fallback->InternalInitialize(bytes, nullptr);
+            }
+            charCount += FallbackInvalidByteSequence(pSrc, ch, fallback);
+        }
+
+        // Shouldn't have anything in fallback buffer for GetCharCount
+        // (don't have to check m_throwOnOverflow for count)
+        Contract::Assert(fallback == nullptr || fallback->GetRemaining() == 0,
+            "[UTF8Encoding.GetCharCount]Expected empty fallback buffer at end");
+
+        InternalDelete(fallback);
+
+        return charCount;
+
+    }
+
+    int GetChars(BYTE* bytes, int byteCount, WCHAR* chars, int charCount)
+    {
+        Contract::Assert(chars != nullptr, "[UTF8Encoding.GetChars]chars!=nullptr");
+        Contract::Assert(byteCount >= 0, "[UTF8Encoding.GetChars]byteCount >=0");
+        Contract::Assert(charCount >= 0, "[UTF8Encoding.GetChars]charCount >=0");
+        Contract::Assert(bytes != nullptr, "[UTF8Encoding.GetChars]bytes!=nullptr");
+
+        BYTE *pSrc = bytes;
+        WCHAR *pTarget = chars;
+
+        BYTE *pEnd = pSrc + byteCount;
+        WCHAR *pAllocatedBufferEnd = pTarget + charCount;
+
+        int ch = 0;
+
+        DecoderFallbackBuffer *fallback = nullptr;
+
+        while (true)
+        {
+            // SLOWLOOP: does all range checks, handles all special cases, but it is slow
+
+            if (pSrc >= pEnd) {
+                break;
+            }
+
+            // read next byte. The JIT optimization seems to be getting confused when
+            // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
+            int cha = *pSrc;
+
+            if (ch == 0) {
+                // no pending bits
+                goto ReadChar;
+            }
+
+            pSrc++;
+
+            // we are expecting to see trailing bytes like 10vvvvvv
+            if ((cha & 0xC0) != 0x80) {
+                // This can be a valid starting byte for another UTF8 byte sequence, so let's put
+                // the current byte back, and try to see if this is a valid byte for another UTF8 byte sequence
+                pSrc--;
+                goto InvalidByteSequence;
+            }
+
+            // fold in the new byte
+            ch = (ch << 6) | (cha & 0x3F);
+
+            if ((ch & FinalByte) == 0) {
+                // Not at last byte yet
+                Contract::Assert((ch & (SupplimentarySeq | ThreeByteSeq)) != 0,
+                    "[UTF8Encoding.GetChars]Invariant volation");
+
+                if ((ch & SupplimentarySeq) != 0) {
+                    // Its a 4-byte supplimentary sequence
+                    if ((ch & (FinalByte >> 6)) != 0) {
+                        // this is 3rd byte of 4 byte sequence - nothing to do
+                        continue;
+                    }
+
+                    // 2nd byte of 4 bytes
+                    // check for non-shortest form of surrogate and the valid surrogate
+                    // range 0x000000 - 0x10FFFF at the same time
+                    if (!InRange(ch & 0x1F0, 0x10, 0x100)) {
+                        goto InvalidByteSequence;
+                    }
+                }
+                else {
+                    // Must be 2nd byte of a 3-byte sequence
+                    // check for non-shortest form of 3 byte seq
+                    if ((ch & (0x1F << 5)) == 0 ||                  // non-shortest form
+                        (ch & (0xF800 >> 6)) == (0xD800 >> 6))     // illegal individually encoded surrogate
+                    {
+                        goto InvalidByteSequence;
+                    }
+                }
+                continue;
+            }
+
+            // ready to punch
+
+            // surrogate in shortest form?
+            // Might be possible to get rid of this?  Already did non-shortest check for 4-byte sequence when reading 2nd byte?
+            if ((ch & (SupplimentarySeq | 0x1F0000)) > SupplimentarySeq) {
+                // let the range check for the second char throw the exception
+                if (pTarget < pAllocatedBufferEnd) {
+                    *pTarget = (WCHAR)(((ch >> 10) & 0x7FF) +
+                        (SHORT)((CharUnicodeInfo::HIGH_SURROGATE_START - (0x10000 >> 10))));
+                    pTarget++;
+
+                    ch = (ch & 0x3FF) +
+                        (int)(CharUnicodeInfo::LOW_SURROGATE_START);
+                }
+            }
+
+            goto EncodeChar;
+
+        InvalidByteSequence:
+            // this code fragment should be close to the gotos referencing it
+            // Have to do fallback for invalid bytes
+            if (fallback == nullptr)
+            {
+                fallback = decoderFallback->CreateFallbackBuffer();
+                fallback->InternalInitialize(bytes, pAllocatedBufferEnd);
+            }
+
+            // That'll back us up the appropriate # of bytes if we didn't get anywhere
+            if (!FallbackInvalidByteSequence(&pSrc, ch, fallback, &pTarget))
+            {
+                // Ran out of buffer space
+                // Need to throw an exception?
+                Contract::Assert(pSrc >= bytes || pTarget == chars,
+                    "[UTF8Encoding.GetChars]Expected to throw or remain in byte buffer after fallback");
+                fallback->InternalReset();
+                ThrowCharsOverflow(pTarget == chars);
+                ch = 0;
+                break;
+            }
+            Contract::Assert(pSrc >= bytes,
+                "[UTF8Encoding.GetChars]Expected invalid byte sequence to have remained within the byte array");
+            ch = 0;
+            continue;
+
+        ReadChar:
+            ch = *pSrc;
+            pSrc++;
+
+        ProcessChar:
+            if (ch > 0x7F) {
+                // If its > 0x7F, its start of a new multi-byte sequence
+
+                // bit 6 has to be non-zero
+                if ((ch & 0x40) == 0) {
+                    goto InvalidByteSequence;
+                }
+
+                // start a new long code
+                if ((ch & 0x20) != 0) {
+                    if ((ch & 0x10) != 0) {
+                        // 4 byte encoding - supplimentary character (2 surrogates)
+
+                        ch &= 0x0F;
+
+                        // check that bit 4 is zero and the valid supplimentary character
+                        // range 0x000000 - 0x10FFFF at the same time
+                        if (ch > 0x04) {
+                            ch |= 0xf0;
+                            goto InvalidByteSequence;
+                        }
+
+                        ch |= (FinalByte >> 3 * 6) | (1 << 30) | (3 << (30 - 2 * 6)) |
+                            (SupplimentarySeq) | (SupplimentarySeq >> 6) |
+                            (SupplimentarySeq >> 2 * 6) | (SupplimentarySeq >> 3 * 6);
+                    }
+                    else {
+                        // 3 byte encoding
+                        ch = (ch & 0x0F) | ((FinalByte >> 2 * 6) | (1 << 30) |
+                            (ThreeByteSeq) | (ThreeByteSeq >> 6) | (ThreeByteSeq >> 2 * 6));
+                    }
+                }
+                else {
+                    // 2 byte encoding
+
+                    ch &= 0x1F;
+
+                    // check for non-shortest form
+                    if (ch <= 1) {
+                        ch |= 0xc0;
+                        goto InvalidByteSequence;
+                    }
+
+                    ch |= (FinalByte >> 6);
+                }
+                continue;
+            }
+
+        EncodeChar:
+            // write the pending character
+            if (pTarget >= pAllocatedBufferEnd)
+            {
+                // Fix chars so we make sure to throw if we didn't output anything
+                ch &= 0x1fffff;
+                if (ch > 0x7f)
+                {
+                    if (ch > 0x7ff)
+                    {
+                        if (ch >= CharUnicodeInfo::LOW_SURROGATE_START &&
+                            ch <= CharUnicodeInfo::LOW_SURROGATE_END)
+                        {
+                            pSrc--;     // It was 4 bytes
+                            pTarget--;  // 1 was stored already, but we can't remember 1/2, so back up
+                        }
+                        else if (ch > 0xffff)
+                        {
+                            pSrc--;     // It was 4 bytes, nothing was stored
+                        }
+                        pSrc--;         // It was at least 3 bytes
+                    }
+                    pSrc--;             // It was at least 2 bytes
+                }
+                pSrc--;
+
+                // Throw that we don't have enough room (pSrc could be < chars if we had started to process
+                // a 4 byte sequence already)
+                Contract::Assert(pSrc >= bytes || pTarget == chars,
+                    "[UTF8Encoding.GetChars]Expected pSrc to be within input buffer or throw due to no output]");
+                ThrowCharsOverflow(pTarget == chars);
+
+                // Don't store ch in decoder, we already backed up to its start
+                ch = 0;
+
+                // Didn't throw, just use this buffer size.
+                break;
+            }
+            *pTarget = (WCHAR)ch;
+            pTarget++;
+
+#ifdef FASTLOOP
+            int availableChars = PtrDiff(pAllocatedBufferEnd, pTarget);
+            int availableBytes = PtrDiff(pEnd, pSrc);
+
+            // don't fall into the fast decoding loop if we don't have enough bytes
+            // Test for availableChars is done because pStop would be <= pTarget.
+            if (availableBytes <= 13) {
+                // we may need as many as 1 character per byte
+                if (availableChars < availableBytes) {
+                    // not enough output room.  no pending bits at this point
+                    ch = 0;
+                    continue;
+                }
+
+                // try to get over the remainder of the ascii characters fast though
+                BYTE* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
+                while (pSrc < pLocalEnd) {
+                    ch = *pSrc;
+                    pSrc++;
+
+                    if (ch > 0x7F)
+                        goto ProcessChar;
+
+                    *pTarget = (WCHAR)ch;
+                    pTarget++;
+                }
+                // we are done
+                ch = 0;
+                break;
+            }
+
+            // we may need as many as 1 character per byte, so reduce the byte count if necessary.
+            // If availableChars is too small, pStop will be before pTarget and we won't do fast loop.
+            if (availableChars < availableBytes) {
+                availableBytes = availableChars;
+            }
+
+            // To compute the upper bound, assume that all characters are ASCII characters at this point,
+            //  the boundary will be decreased for every non-ASCII character we encounter
+            // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
+            WCHAR *pStop = pTarget + availableBytes - 7;
+
+            while (pTarget < pStop) {
+                ch = *pSrc;
+                pSrc++;
+
+                if (ch > 0x7F) {
+                    goto LongCode;
+                }
+                *pTarget = (WCHAR)ch;
+                pTarget++;
+
+                // get pSrc to be 2-byte aligned
+                if ((((size_t)pSrc) & 0x1) != 0) {
+                    ch = *pSrc;
+                    pSrc++;
+                    if (ch > 0x7F) {
+                        goto LongCode;
+                    }
+                    *pTarget = (WCHAR)ch;
+                    pTarget++;
+                }
+
+                // get pSrc to be 4-byte aligned
+                if ((((size_t)pSrc) & 0x2) != 0) {
+                    ch = *(USHORT*)pSrc;
+                    if ((ch & 0x8080) != 0) {
+                        goto LongCodeWithMask16;
+                    }
+
+                    // Unfortunately, this is endianness sensitive
+#if BIGENDIAN
+                    *pTarget = (WCHAR)((ch >> 8) & 0x7F);
+                    pSrc += 2;
+                    *(pTarget + 1) = (WCHAR)(ch & 0x7F);
+                    pTarget += 2;
+#else // BIGENDIAN
+                    *pTarget = (WCHAR)(ch & 0x7F);
+                    pSrc += 2;
+                    *(pTarget + 1) = (WCHAR)((ch >> 8) & 0x7F);
+                    pTarget += 2;
+#endif // BIGENDIAN
+                }
+
+                // Run 8 characters at a time!
+                while (pTarget < pStop) {
+                    ch = *(int*)pSrc;
+                    int chb = *(int*)(pSrc + 4);
+                    if (((ch | chb) & (int)0x80808080) != 0) {
+                        goto LongCodeWithMask32;
+                    }
+
+                    // Unfortunately, this is endianness sensitive
+#if BIGENDIAN
+                    *pTarget = (WCHAR)((ch >> 24) & 0x7F);
+                    *(pTarget + 1) = (WCHAR)((ch >> 16) & 0x7F);
+                    *(pTarget + 2) = (WCHAR)((ch >> 8) & 0x7F);
+                    *(pTarget + 3) = (WCHAR)(ch & 0x7F);
+                    pSrc += 8;
+                    *(pTarget + 4) = (WCHAR)((chb >> 24) & 0x7F);
+                    *(pTarget + 5) = (WCHAR)((chb >> 16) & 0x7F);
+                    *(pTarget + 6) = (WCHAR)((chb >> 8) & 0x7F);
+                    *(pTarget + 7) = (WCHAR)(chb & 0x7F);
+                    pTarget += 8;
+#else // BIGENDIAN
+                    *pTarget = (WCHAR)(ch & 0x7F);
+                    *(pTarget + 1) = (WCHAR)((ch >> 8) & 0x7F);
+                    *(pTarget + 2) = (WCHAR)((ch >> 16) & 0x7F);
+                    *(pTarget + 3) = (WCHAR)((ch >> 24) & 0x7F);
+                    pSrc += 8;
+                    *(pTarget + 4) = (WCHAR)(chb & 0x7F);
+                    *(pTarget + 5) = (WCHAR)((chb >> 8) & 0x7F);
+                    *(pTarget + 6) = (WCHAR)((chb >> 16) & 0x7F);
+                    *(pTarget + 7) = (WCHAR)((chb >> 24) & 0x7F);
+                    pTarget += 8;
+#endif // BIGENDIAN
+                }
+                break;
+
+#if BIGENDIAN
+                LongCodeWithMask32 :
+                    // be careful about the sign extension
+                    ch = (int)(((uint)ch) >> 16);
+                LongCodeWithMask16:
+                    ch = (int)(((uint)ch) >> 8);
+#else // BIGENDIAN
+            LongCodeWithMask32:
+            LongCodeWithMask16:
+                ch &= 0xFF;
+#endif // BIGENDIAN
+                pSrc++;
+                if (ch <= 0x7F) {
+                    *pTarget = (WCHAR)ch;
+                    pTarget++;
+                    continue;
+                }
+
+            LongCode:
+                int chc = *pSrc;
+                pSrc++;
+
+                if (
+                    // bit 6 has to be zero
+                    (ch & 0x40) == 0 ||
+                    // we are expecting to see trailing bytes like 10vvvvvv
+                    (chc & 0xC0) != 0x80)
+                {
+                    goto BadLongCode;
+                }
+
+                chc &= 0x3F;
+
+                // start a new long code
+                if ((ch & 0x20) != 0) {
+
+                    // fold the first two bytes together
+                    chc |= (ch & 0x0F) << 6;
+
+                    if ((ch & 0x10) != 0) {
+                        // 4 byte encoding - surrogate
+                        ch = *pSrc;
+                        if (
+                            // check that bit 4 is zero, the non-shortest form of surrogate
+                            // and the valid surrogate range 0x000000 - 0x10FFFF at the same time
+                            !InRange(chc >> 4, 0x01, 0x10) ||
+                            // we are expecting to see trailing bytes like 10vvvvvv
+                            (ch & 0xC0) != 0x80)
+                        {
+                            goto BadLongCode;
+                        }
+
+                        chc = (chc << 6) | (ch & 0x3F);
+
+                        ch = *(pSrc + 1);
+                        // we are expecting to see trailing bytes like 10vvvvvv
+                        if ((ch & 0xC0) != 0x80) {
+                            goto BadLongCode;
+                        }
+                        pSrc += 2;
+
+                        ch = (chc << 6) | (ch & 0x3F);
+
+                        *pTarget = (WCHAR)(((ch >> 10) & 0x7FF) +
+                            (SHORT)(CharUnicodeInfo::HIGH_SURROGATE_START - (0x10000 >> 10)));
+                        pTarget++;
+
+                        ch = (ch & 0x3FF) +
+                            (SHORT)(CharUnicodeInfo::LOW_SURROGATE_START);
+
+                        // extra byte, we're already planning 2 chars for 2 of these bytes,
+                        // but the big loop is testing the target against pStop, so we need
+                        // to subtract 2 more or we risk overrunning the input.  Subtract
+                        // one here and one below.
+                        pStop--;
+                    }
+                    else {
+                        // 3 byte encoding
+                        ch = *pSrc;
+                        if (
+                            // check for non-shortest form of 3 byte seq
+                            (chc & (0x1F << 5)) == 0 ||
+                            // Can't have surrogates here.
+                            (chc & (0xF800 >> 6)) == (0xD800 >> 6) ||
+                            // we are expecting to see trailing bytes like 10vvvvvv
+                            (ch & 0xC0) != 0x80)
+                        {
+                            goto BadLongCode;
+                        }
+                        pSrc++;
+
+                        ch = (chc << 6) | (ch & 0x3F);
+
+                        // extra byte, we're only expecting 1 char for each of these 3 bytes,
+                        // but the loop is testing the target (not source) against pStop, so
+                        // we need to subtract 2 more or we risk overrunning the input.
+                        // Subtract 1 here and one more below
+                        pStop--;
+                    }
+                }
+                else {
+                    // 2 byte encoding
+
+                    ch &= 0x1F;
+
+                    // check for non-shortest form
+                    if (ch <= 1) {
+                        goto BadLongCode;
+                    }
+                    ch = (ch << 6) | chc;
+                }
+
+                *pTarget = (WCHAR)ch;
+                pTarget++;
+
+                // extra byte, we're only expecting 1 char for each of these 2 bytes,
+                // but the loop is testing the target (not source) against pStop.
+                // subtract an extra count from pStop so that we don't overrun the input.
+                pStop--;
+            }
+#endif // FASTLOOP
+
+            Contract::Assert(pTarget <= pAllocatedBufferEnd, "[UTF8Encoding.GetChars]pTarget <= pAllocatedBufferEnd");
+
+            // no pending bits at this point
+            ch = 0;
+            continue;
+
+        BadLongCode:
+            pSrc -= 2;
+            ch = 0;
+            continue;
+        }
+
+        if (ch != 0)
+        {
+            // Have to do fallback for invalid bytes
+            if (fallback == nullptr)
+            {
+                fallback = decoderFallback->CreateFallbackBuffer();
+                fallback->InternalInitialize(bytes, pAllocatedBufferEnd);
+            }
+
+            // This'll back us up the appropriate # of bytes if we didn't get anywhere
+            if (!FallbackInvalidByteSequence(pSrc, ch, fallback))
+            {
+                Contract::Assert(pSrc >= bytes || pTarget == chars,
+                    "[UTF8Encoding.GetChars]Expected to throw or remain in byte buffer while flushing");
+
+                // Ran out of buffer space
+                // Need to throw an exception?
+                fallback->InternalReset();
+                ThrowCharsOverflow(pTarget == chars);
+            }
+            Contract::Assert(pSrc >= bytes,
+                "[UTF8Encoding.GetChars]Expected flushing invalid byte sequence to have remained within the byte array");
+            ch = 0;
+        }
+
+        // Shouldn't have anything in fallback buffer for GetChars
+        // (don't have to check m_throwOnOverflow for chars)
+        Contract::Assert(fallback == nullptr || fallback->GetRemaining() == 0,
+            "[UTF8Encoding.GetChars]Expected empty fallback buffer at end");
+
+        InternalDelete(fallback);
+
+        return PtrDiff(pTarget, chars);
+    }
+
+    int GetBytes(WCHAR* chars, int charCount, BYTE* bytes, int byteCount)
+    {
+        Contract::Assert(chars != nullptr, "[UTF8Encoding.GetBytes]chars!=nullptr");
+        Contract::Assert(byteCount >= 0, "[UTF8Encoding.GetBytes]byteCount >=0");
+        Contract::Assert(charCount >= 0, "[UTF8Encoding.GetBytes]charCount >=0");
+        Contract::Assert(bytes != nullptr, "[UTF8Encoding.GetBytes]bytes!=nullptr");
+
+        // For fallback we may need a fallback buffer.
+        // We wait to initialize it though in case we don't have any broken input unicode
+        EncoderFallbackBuffer* fallbackBuffer = nullptr;
+        WCHAR *pSrc = chars;
+        BYTE *pTarget = bytes;
+
+        WCHAR *pEnd = pSrc + charCount;
+        BYTE *pAllocatedBufferEnd = pTarget + byteCount;
+
+        int ch = 0;
+
+        // assume that JIT will enregister pSrc, pTarget and ch
+
+        while (true) {
+            // SLOWLOOP: does all range checks, handles all special cases, but it is slow
+
+            if (pSrc >= pEnd) {
+
+                if (ch == 0) {
+                    // Check if there's anything left to get out of the fallback buffer
+                    ch = fallbackBuffer != nullptr ? fallbackBuffer->InternalGetNextChar() : 0;
+                    if (ch > 0) {
+                        goto ProcessChar;
+                    }
+                }
+                else {
+                    // Case of leftover surrogates in the fallback buffer
+                    if (fallbackBuffer != nullptr && fallbackBuffer->bFallingBack) {
+                        Contract::Assert(ch >= 0xD800 && ch <= 0xDBFF,
+                            "[UTF8Encoding.GetBytes]expected high surrogate"); //, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
+
+                        int cha = ch;
+
+                        ch = fallbackBuffer->InternalGetNextChar();
+
+                        if (InRange(ch, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
+                            ch = ch + (cha << 10) + (0x10000 - CharUnicodeInfo::LOW_SURROGATE_START - (CharUnicodeInfo::HIGH_SURROGATE_START << 10));
+                            goto EncodeChar;
+                        }
+                        else if (ch > 0){
+                            goto ProcessChar;
+                        }
+                        else {
+                            break;
+                        }
+                    }
+                }
+
+                // attempt to encode the partial surrogate (will fail or ignore)
+                if (ch > 0)
+                    goto EncodeChar;
+
+                // We're done
+                break;
+            }
+
+            if (ch > 0) {
+                // We have a high surrogate left over from a previous loop.
+                Contract::Assert(ch >= 0xD800 && ch <= 0xDBFF,
+                    "[UTF8Encoding.GetBytes]expected high surrogate");//, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
+
+                // use separate helper variables for local contexts so that the jit optimizations
+                // won't get confused about the variable lifetimes
+                int cha = *pSrc;
+
+                // In previous byte, we encountered a high surrogate, so we are expecting a low surrogate here.
+                // if (IsLowSurrogate(cha)) {
+                if (InRange(cha, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
+                    ch = cha + (ch << 10) +
+                        (0x10000
+                        - CharUnicodeInfo::LOW_SURROGATE_START
+                        - (CharUnicodeInfo::HIGH_SURROGATE_START << 10));
+
+                    pSrc++;
+                }
+                // else ch is still high surrogate and encoding will fail
+
+                // attempt to encode the surrogate or partial surrogate
+                goto EncodeChar;
+            }
+
+            // If we've used a fallback, then we have to check for it
+            if (fallbackBuffer != nullptr)
+            {
+                ch = fallbackBuffer->InternalGetNextChar();
+                if (ch > 0) goto ProcessChar;
+            }
+
+            // read next char. The JIT optimization seems to be getting confused when
+            // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
+            ch = *pSrc;
+            pSrc++;
+
+        ProcessChar:
+            if (InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::HIGH_SURROGATE_END)) {
+                continue;
+            }
+            // either good char or partial surrogate
+
+        EncodeChar:
+            // throw exception on partial surrogate if necessary
+            if (InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END))
+            {
+                // Lone surrogates aren't allowed, we have to do fallback for them
+                // Have to make a fallback buffer if we don't have one
+                if (fallbackBuffer == nullptr)
+                {
+                    // wait on fallbacks if we can
+                    // For fallback we may need a fallback buffer
+                    fallbackBuffer = encoderFallback->CreateFallbackBuffer();
+
+                    // Set our internal fallback interesting things.
+                    fallbackBuffer->InternalInitialize(chars, pEnd, true);
+                }
+
+                // Do our fallback.  Actually we already know its a mixed up surrogate,
+                // so the ref pSrc isn't gonna do anything.
+                fallbackBuffer->InternalFallback((WCHAR)ch, &pSrc);
+
+                // Ignore it if we don't throw
+                ch = 0;
+                continue;
+            }
+
+            // Count bytes needed
+            int bytesNeeded = 1;
+            if (ch > 0x7F) {
+                if (ch > 0x7FF) {
+                    if (ch > 0xFFFF) {
+                        bytesNeeded++;  // 4 bytes (surrogate pair)
+                    }
+                    bytesNeeded++;      // 3 bytes (800-FFFF)
+                }
+                bytesNeeded++;          // 2 bytes (80-7FF)
+            }
+
+            if (pTarget > pAllocatedBufferEnd - bytesNeeded) {
+                // Left over surrogate from last time will cause pSrc == chars, so we'll throw
+                if (fallbackBuffer != nullptr && fallbackBuffer->bFallingBack)
+                {
+                    fallbackBuffer->MovePrevious();              // Didn't use this fallback char
+                    if (ch > 0xFFFF)
+                        fallbackBuffer->MovePrevious();          // Was surrogate, didn't use 2nd part either
+                }
+                else
+                {
+                    pSrc--;                                     // Didn't use this char
+                    if (ch > 0xFFFF)
+                        pSrc--;                                 // Was surrogate, didn't use 2nd part either
+                }
+                Contract::Assert(pSrc >= chars || pTarget == bytes,
+                    "[UTF8Encoding.GetBytes]Expected pSrc to be within buffer or to throw with insufficient room.");
+                ThrowBytesOverflow(pTarget == bytes);  // Throw if we must
+                ch = 0;                                         // Nothing left over (we backed up to start of pair if supplimentary)
+                break;
+            }
+
+            if (ch <= 0x7F) {
+                *pTarget = (BYTE)ch;
+            }
+            else {
+                // use separate helper variables for local contexts so that the jit optimizations
+                // won't get confused about the variable lifetimes
+                int chb;
+                if (ch <= 0x7FF) {
+                    // 2 BYTE encoding
+                    chb = (BYTE)(0xC0 | (ch >> 6));
+                }
+                else
+                {
+                    if (ch <= 0xFFFF) {
+                        chb = (BYTE)(0xE0 | (ch >> 12));
+                    }
+                    else
+                    {
+                        *pTarget = (BYTE)(0xF0 | (ch >> 18));
+                        pTarget++;
+
+                        chb = 0x80 | ((ch >> 12) & 0x3F);
+                    }
+                    *pTarget = (BYTE)chb;
+                    pTarget++;
+
+                    chb = 0x80 | ((ch >> 6) & 0x3F);
+                }
+                *pTarget = (BYTE)chb;
+                pTarget++;
+
+                *pTarget = (BYTE)0x80 | (ch & 0x3F);
+            }
+            pTarget++;
+
+
+#ifdef FASTLOOP
+            // If still have fallback don't do fast loop
+            if (fallbackBuffer != nullptr && (ch = fallbackBuffer->InternalGetNextChar()) != 0)
+                goto ProcessChar;
+
+            int availableChars = PtrDiff(pEnd, pSrc);
+            int availableBytes = PtrDiff(pAllocatedBufferEnd, pTarget);
+
+            // don't fall into the fast decoding loop if we don't have enough characters
+            // Note that if we don't have enough bytes, pStop will prevent us from entering the fast loop.
+            if (availableChars <= 13) {
+                // we are hoping for 1 BYTE per char
+                if (availableBytes < availableChars) {
+                    // not enough output room.  no pending bits at this point
+                    ch = 0;
+                    continue;
+                }
+
+                // try to get over the remainder of the ascii characters fast though
+                WCHAR* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
+                while (pSrc < pLocalEnd) {
+                    ch = *pSrc;
+                    pSrc++;
+
+                    // Not ASCII, need more than 1 BYTE per char
+                    if (ch > 0x7F)
+                        goto ProcessChar;
+
+                    *pTarget = (BYTE)ch;
+                    pTarget++;
+                }
+                // we are done, let ch be 0 to clear encoder
+                ch = 0;
+                break;
+            }
+
+            // we need at least 1 BYTE per character, but Convert might allow us to convert
+            // only part of the input, so try as much as we can.  Reduce charCount if necessary
+            if (availableBytes < availableChars)
+            {
+                availableChars = availableBytes;
+            }
+
+            // FASTLOOP:
+            // - optimistic range checks
+            // - fallbacks to the slow loop for all special cases, exception throwing, etc.
+
+            // To compute the upper bound, assume that all characters are ASCII characters at this point,
+            //  the boundary will be decreased for every non-ASCII character we encounter
+            // Also, we need 5 chars reserve for the unrolled ansi decoding loop and for decoding of surrogates
+            // If there aren't enough bytes for the output, then pStop will be <= pSrc and will bypass the loop.
+            WCHAR *pStop = pSrc + availableChars - 5;
+
+            while (pSrc < pStop) {
+                ch = *pSrc;
+                pSrc++;
+
+                if (ch > 0x7F) {
+                    goto LongCode;
+                }
+                *pTarget = (BYTE)ch;
+                pTarget++;
+
+                // get pSrc aligned
+                if (((size_t)pSrc & 0x2) != 0) {
+                    ch = *pSrc;
+                    pSrc++;
+                    if (ch > 0x7F) {
+                        goto LongCode;
+                    }
+                    *pTarget = (BYTE)ch;
+                    pTarget++;
+                }
+
+                // Run 4 characters at a time!
+                while (pSrc < pStop) {
+                    ch = *(int*)pSrc;
+                    int chc = *(int*)(pSrc + 2);
+                    if (((ch | chc) & (int)0xFF80FF80) != 0) {
+                        goto LongCodeWithMask;
+                    }
+
+                    // Unfortunately, this is endianness sensitive
+#if BIGENDIAN
+                    *pTarget = (BYTE)(ch >> 16);
+                    *(pTarget + 1) = (BYTE)ch;
+                    pSrc += 4;
+                    *(pTarget + 2) = (BYTE)(chc >> 16);
+                    *(pTarget + 3) = (BYTE)chc;
+                    pTarget += 4;
+#else // BIGENDIAN
+                    *pTarget = (BYTE)ch;
+                    *(pTarget + 1) = (BYTE)(ch >> 16);
+                    pSrc += 4;
+                    *(pTarget + 2) = (BYTE)chc;
+                    *(pTarget + 3) = (BYTE)(chc >> 16);
+                    pTarget += 4;
+#endif // BIGENDIAN
+                }
+                continue;
+
+            LongCodeWithMask:
+#if BIGENDIAN
+                // be careful about the sign extension
+                ch = (int)(((uint)ch) >> 16);
+#else // BIGENDIAN
+                ch = (WCHAR)ch;
+#endif // BIGENDIAN
+                pSrc++;
+
+                if (ch > 0x7F) {
+                    goto LongCode;
+                }
+                *pTarget = (BYTE)ch;
+                pTarget++;
+                continue;
+
+            LongCode:
+                // use separate helper variables for slow and fast loop so that the jit optimizations
+                // won't get confused about the variable lifetimes
+                int chd;
+                if (ch <= 0x7FF) {
+                    // 2 BYTE encoding
+                    chd = 0xC0 | (ch >> 6);
+                }
+                else {
+                    if (!InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
+                        // 3 BYTE encoding
+                        chd = 0xE0 | (ch >> 12);
+                    }
+                    else
+                    {
+                        // 4 BYTE encoding - high surrogate + low surrogate
+                        if (ch > CharUnicodeInfo::HIGH_SURROGATE_END) {
+                            // low without high -> bad, try again in slow loop
+                            pSrc -= 1;
+                            break;
+                        }
+
+                        chd = *pSrc;
+                        pSrc++;
+
+                        // if (!IsLowSurrogate(chd)) {
+                        if (!InRange(chd, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
+                            // high not followed by low -> bad, try again in slow loop
+                            pSrc -= 2;
+                            break;
+                        }
+
+                        ch = chd + (ch << 10) +
+                            (0x10000
+                            - CharUnicodeInfo::LOW_SURROGATE_START
+                            - (CharUnicodeInfo::HIGH_SURROGATE_START << 10));
+
+                        *pTarget = (BYTE)(0xF0 | (ch >> 18));
+                        // pStop - this BYTE is compensated by the second surrogate character
+                        // 2 input chars require 4 output bytes.  2 have been anticipated already
+                        // and 2 more will be accounted for by the 2 pStop-- calls below.
+                        pTarget++;
+
+                        chd = 0x80 | ((ch >> 12) & 0x3F);
+                    }
+                    *pTarget = (BYTE)chd;
+                    pStop--;                    // 3 BYTE sequence for 1 char, so need pStop-- and the one below too.
+                    pTarget++;
+
+                    chd = 0x80 | ((ch >> 6) & 0x3F);
+                }
+                *pTarget = (BYTE)chd;
+                pStop--;                        // 2 BYTE sequence for 1 char so need pStop--.
+                pTarget++;
+
+                *pTarget = (BYTE)(0x80 | (ch & 0x3F));
+                // pStop - this BYTE is already included
+                pTarget++;
+            }
+
+            Contract::Assert(pTarget <= pAllocatedBufferEnd, "[UTF8Encoding.GetBytes]pTarget <= pAllocatedBufferEnd");
+
+#endif // FASTLOOP
+
+            // no pending char at this point
+            ch = 0;
+        }
+
+        InternalDelete(fallbackBuffer);
+
+        return (int)(pTarget - bytes);
+    }
+
+    int GetByteCount(WCHAR *chars, int count)
+    {
+        // For fallback we may need a fallback buffer.
+        // We wait to initialize it though in case we don't have any broken input unicode
+        EncoderFallbackBuffer* fallbackBuffer = nullptr;
+        WCHAR *pSrc = chars;
+        WCHAR *pEnd = pSrc + count;
+
+        // Start by assuming we have as many as count
+        int byteCount = count;
+
+        int ch = 0;
+
+        while (true) {
+            // SLOWLOOP: does all range checks, handles all special cases, but it is slow
+            if (pSrc >= pEnd) {
+
+                if (ch == 0) {
+                    // Unroll any fallback that happens at the end
+                    ch = fallbackBuffer != nullptr ? fallbackBuffer->InternalGetNextChar() : 0;
+                    if (ch > 0) {
+                        byteCount++;
+                        goto ProcessChar;
+                    }
+                }
+                else {
+                    // Case of surrogates in the fallback.
+                    if (fallbackBuffer != nullptr && fallbackBuffer->bFallingBack) {
+                        Contract::Assert(ch >= 0xD800 && ch <= 0xDBFF,
+                            "[UTF8Encoding.GetBytes]expected high surrogate");// , not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
+
+                        ch = fallbackBuffer->InternalGetNextChar();
+                        byteCount++;
+
+                        if (InRange(ch, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
+                            ch = 0xfffd;
+                            byteCount++;
+                            goto EncodeChar;
+                        }
+                        else if (ch > 0){
+                            goto ProcessChar;
+                        }
+                        else {
+                            byteCount--; // ignore last one.
+                            break;
+                        }
+                    }
+                }
+
+                if (ch <= 0) {
+                    break;
+                }
+
+                // attempt to encode the partial surrogate (will fallback or ignore it), it'll also subtract 1.
+                byteCount++;
+                goto EncodeChar;
+            }
+
+            if (ch > 0) {
+                Contract::Assert(ch >= 0xD800 && ch <= 0xDBFF,
+                    "[UTF8Encoding.GetBytes]expected high surrogate"); // , not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
+
+                // use separate helper variables for local contexts so that the jit optimizations
+                // won't get confused about the variable lifetimes
+                int cha = *pSrc;
+
+                // count the pending surrogate
+                byteCount++;
+
+                // In previous byte, we encountered a high surrogate, so we are expecting a low surrogate here.
+                // if (IsLowSurrogate(cha)) {
+                if (InRange(cha, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
+                    // Don't need a real # because we're just counting, anything > 0x7ff ('cept surrogate) will do.
+                    ch = 0xfffd;
+                    //                        ch = cha + (ch << 10) +
+                    //                            (0x10000
+                    //                            - CharUnicodeInfo::LOW_SURROGATE_START
+                    //                            - (CharUnicodeInfo::HIGH_SURROGATE_START << 10) );
+
+                    // Use this next char
+                    pSrc++;
+                }
+                // else ch is still high surrogate and encoding will fail (so don't add count)
+
+                // attempt to encode the surrogate or partial surrogate
+                goto EncodeChar;
+            }
+
+            // If we've used a fallback, then we have to check for it
+            if (fallbackBuffer != nullptr)
+            {
+                ch = fallbackBuffer->InternalGetNextChar();
+                if (ch > 0)
+                {
+                    // We have an extra byte we weren't expecting.
+                    byteCount++;
+                    goto ProcessChar;
+                }
+            }
+
+            // read next char. The JIT optimization seems to be getting confused when
+            // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
+            ch = *pSrc;
+            pSrc++;
+
+        ProcessChar:
+            if (InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::HIGH_SURROGATE_END)) {
+                // we will count this surrogate next time around
+                byteCount--;
+                continue;
+            }
+            // either good char or partial surrogate
+
+        EncodeChar:
+            // throw exception on partial surrogate if necessary
+            if (InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END))
+            {
+                // Lone surrogates aren't allowed
+                // Have to make a fallback buffer if we don't have one
+                if (fallbackBuffer == nullptr)
+                {
+                    // wait on fallbacks if we can
+                    // For fallback we may need a fallback buffer
+                    fallbackBuffer = encoderFallback->CreateFallbackBuffer();
+
+                    // Set our internal fallback interesting things.
+                    fallbackBuffer->InternalInitialize(chars, chars + count, false);
+                }
+
+                // Do our fallback.  Actually we already know its a mixed up surrogate,
+                // so the ref pSrc isn't gonna do anything.
+                fallbackBuffer->InternalFallback((WCHAR)ch, &pSrc);
+
+                // Ignore it if we don't throw (we had preallocated this ch)
+                byteCount--;
+                ch = 0;
+                continue;
+            }
+
+            // Count them
+            if (ch > 0x7F) {
+                if (ch > 0x7FF) {
+                    // the extra surrogate byte was compensated by the second surrogate character
+                    // (2 surrogates make 4 bytes.  We've already counted 2 bytes, 1 per char)
+                    byteCount++;
+                }
+                byteCount++;
+            }
+
+#if WIN64
+            // check for overflow
+            if (byteCount < 0) {
+                break;
+            }
+#endif
+
+#ifdef FASTLOOP
+            // If still have fallback don't do fast loop
+            if (fallbackBuffer != nullptr && (ch = fallbackBuffer->InternalGetNextChar()) != 0)
+            {
+                // We're reserving 1 byte for each char by default
+                byteCount++;
+                goto ProcessChar;
+            }
+
+            int availableChars = PtrDiff(pEnd, pSrc);
+
+            // don't fall into the fast decoding loop if we don't have enough characters
+            if (availableChars <= 13) {
+                // try to get over the remainder of the ascii characters fast though
+                WCHAR* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
+                while (pSrc < pLocalEnd) {
+                    ch = *pSrc;
+                    pSrc++;
+                    if (ch > 0x7F)
+                        goto ProcessChar;
+                }
+
+                // we are done
+                break;
+            }
+
+#if WIN64
+            // make sure that we won't get a silent overflow inside the fast loop
+            // (Fall out to slow loop if we have this many characters)
+            availableChars &= 0x0FFFFFFF;
+#endif
+
+            // To compute the upper bound, assume that all characters are ASCII characters at this point,
+            //  the boundary will be decreased for every non-ASCII character we encounter
+            // Also, we need 3 + 4 chars reserve for the unrolled ansi decoding loop and for decoding of surrogates
+            WCHAR *pStop = pSrc + availableChars - (3 + 4);
+
+            while (pSrc < pStop) {
+                ch = *pSrc;
+                pSrc++;
+
+                if (ch > 0x7F)                                                  // Not ASCII
+                {
+                    if (ch > 0x7FF)                                             // Not 2 Byte
+                    {
+                        if ((ch & 0xF800) == 0xD800)                            // See if its a Surrogate
+                            goto LongCode;
+                        byteCount++;
+                    }
+                    byteCount++;
+                }
+
+                // get pSrc aligned
+                if (((size_t)pSrc & 0x2) != 0) {
+                    ch = *pSrc;
+                    pSrc++;
+                    if (ch > 0x7F)                                              // Not ASCII
+                    {
+                        if (ch > 0x7FF)                                         // Not 2 Byte
+                        {
+                            if ((ch & 0xF800) == 0xD800)                        // See if its a Surrogate
+                                goto LongCode;
+                            byteCount++;
+                        }
+                        byteCount++;
+                    }
+                }
+
+                // Run 2 * 4 characters at a time!
+                while (pSrc < pStop) {
+                    ch = *(int*)pSrc;
+                    int chc = *(int*)(pSrc + 2);
+                    if (((ch | chc) & (int)0xFF80FF80) != 0)         // See if not ASCII
+                    {
+                        if (((ch | chc) & (int)0xF800F800) != 0)     // See if not 2 Byte
+                        {
+                            goto LongCodeWithMask;
+                        }
+
+
+                        if ((ch & (int)0xFF800000) != 0)             // Actually 0x07800780 is all we care about (4 bits)
+                            byteCount++;
+                        if ((ch & (int)0xFF80) != 0)
+                            byteCount++;
+                        if ((chc & (int)0xFF800000) != 0)
+                            byteCount++;
+                        if ((chc & (int)0xFF80) != 0)
+                            byteCount++;
+                    }
+                    pSrc += 4;
+
+                    ch = *(int*)pSrc;
+                    chc = *(int*)(pSrc + 2);
+                    if (((ch | chc) & (int)0xFF80FF80) != 0)         // See if not ASCII
+                    {
+                        if (((ch | chc) & (int)0xF800F800) != 0)     // See if not 2 Byte
+                        {
+                            goto LongCodeWithMask;
+                        }
+
+                        if ((ch & (int)0xFF800000) != 0)
+                            byteCount++;
+                        if ((ch & (int)0xFF80) != 0)
+                            byteCount++;
+                        if ((chc & (int)0xFF800000) != 0)
+                            byteCount++;
+                        if ((chc & (int)0xFF80) != 0)
+                            byteCount++;
+                    }
+                    pSrc += 4;
+                }
+                break;
+
+            LongCodeWithMask:
+#if BIGENDIAN
+                // be careful about the sign extension
+                ch = (int)(((uint)ch) >> 16);
+#else // BIGENDIAN
+                ch = (WCHAR)ch;
+#endif // BIGENDIAN
+                pSrc++;
+
+                if (ch <= 0x7F) {
+                    continue;
+                }
+
+            LongCode:
+                // use separate helper variables for slow and fast loop so that the jit optimizations
+                // won't get confused about the variable lifetimes
+                if (ch > 0x7FF) {
+                    if (InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
+                        // 4 byte encoding - high surrogate + low surrogate
+
+                        int chd = *pSrc;
+                        if (
+                            ch > CharUnicodeInfo::HIGH_SURROGATE_END ||
+                            !InRange(chd, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END))
+                        {
+                            // Back up and drop out to slow loop to figure out error
+                            pSrc--;
+                            break;
+                        }
+                        pSrc++;
+
+                        // byteCount - this byte is compensated by the second surrogate character
+                    }
+                    byteCount++;
+                }
+                byteCount++;
+
+                // byteCount - the last byte is already included
+            }
+#endif // FASTLOOP
+
+            // no pending char at this point
+            ch = 0;
+        }
+
+#if WIN64
+        // check for overflow
+        if (byteCount < 0) {
+            throw ArgumentException("Conversion buffer overflow.");
+        }
+#endif
+
+        Contract::Assert(fallbackBuffer == nullptr || fallbackBuffer->GetRemaining() == 0,
+            "[UTF8Encoding.GetByteCount]Expected Empty fallback buffer");
+
+        InternalDelete(fallbackBuffer);
+
+        return byteCount;
+    }
+
+};
+
+
+////////////////////////////////////////////////////////////////////////////
+//
+//  UTF8ToUnicode
+//
+//  Maps a UTF-8 character string to its wide character string counterpart.
+//
+////////////////////////////////////////////////////////////////////////////
+
+int UTF8ToUnicode(
+    LPCSTR lpSrcStr,
+    int cchSrc,
+    LPWSTR lpDestStr,
+    int cchDest,
+    DWORD dwFlags
+    )
+{
+    int ret;
+    UTF8Encoding enc(dwFlags & MB_ERR_INVALID_CHARS);
+    try {
+        ret = enc.GetCharCount((BYTE*)lpSrcStr, cchSrc);
+        if (cchDest){
+            if (ret > cchDest){
+                SetLastError(ERROR_INSUFFICIENT_BUFFER);
+                ret = 0;
+            }
+            enc.GetChars((BYTE*)lpSrcStr, cchSrc, (WCHAR*)lpDestStr, ret);
+        }
+    }
+    catch (const InsufficientBufferException& e){
+        SetLastError(ERROR_INSUFFICIENT_BUFFER);
+        return 0;
+    }
+    catch (const DecoderFallbackException& e){
+        SetLastError(ERROR_NO_UNICODE_TRANSLATION);
+        return 0;
+    }
+    catch (const ArgumentException& e){
+        SetLastError(ERROR_INVALID_PARAMETER);
+        return 0;
+    }
+    return ret;
+}
+
+////////////////////////////////////////////////////////////////////////////
+//
+//  UnicodeToUTF8
+//
+//  Maps a Unicode character string to its UTF-8 string counterpart.
+//
+////////////////////////////////////////////////////////////////////////////
+
+int UnicodeToUTF8(
+    LPCWSTR lpSrcStr,
+    int cchSrc,
+    LPSTR lpDestStr,
+    int cchDest)
+{
+    int ret;
+    UTF8Encoding enc(false);
+    try{
+        ret = enc.GetByteCount((WCHAR*)lpSrcStr, cchSrc);
+        if (cchDest){
+            if (ret > cchDest){
+                SetLastError(ERROR_INSUFFICIENT_BUFFER);
+                ret = 0;
+            }
+            enc.GetBytes((WCHAR*)lpSrcStr, cchSrc, (BYTE*)lpDestStr, ret);
+        }
+    }
+    catch (const InsufficientBufferException& e){
+        SetLastError(ERROR_INSUFFICIENT_BUFFER);
+        return 0;
+    }
+    catch (const EncoderFallbackException& e){
+        SetLastError(ERROR_NO_UNICODE_TRANSLATION);
+        return 0;
+    }
+    catch (const ArgumentException& e){
+        SetLastError(ERROR_INVALID_PARAMETER);
+        return 0;
+    }
+    return ret;
+}
diff --git a/src/coreclr/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp b/src/coreclr/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp
index 2b9f67b17bfbde..cab71f15e7098e 100644
--- a/src/coreclr/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp
+++ b/src/coreclr/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp
@@ -217,7 +217,7 @@ PALTEST(locale_info_MultiByteToWideChar_test4_paltest_multibytetowidechar_test4,
         
         if (wcscmp(wideBuffer, unicodeStrings[i]) != 0)
         {
-            printf("MultiByteToWideChar string %d: the resulting string doesn't match the expected one!\n", i);
+            Fail("MultiByteToWideChar string %d: the resulting string doesn't match the expected one!\n", i);
         }
         
         free(wideBuffer);
diff --git a/src/coreclr/pal/tests/palsuite/locale_info/WideCharToMultiByte/test5/test5.cpp b/src/coreclr/pal/tests/palsuite/locale_info/WideCharToMultiByte/test5/test5.cpp
index 387015f0af71cc..bf2dabedefa880 100644
--- a/src/coreclr/pal/tests/palsuite/locale_info/WideCharToMultiByte/test5/test5.cpp
+++ b/src/coreclr/pal/tests/palsuite/locale_info/WideCharToMultiByte/test5/test5.cpp
@@ -141,7 +141,7 @@ PALTEST(locale_info_WideCharToMultiByte_test5_paltest_widechartomultibyte_test5,
         
         if (strcmp(utf8Buffer, utf8Strings[i]) != 0)
         {
-            printf("WideCharToMultiByte string %d: the resulting string doesn't match the expected one!\n", i);
+            Fail("WideCharToMultiByte string %d: the resulting string doesn't match the expected one!\n", i);
         }
         
         free(utf8Buffer);
diff --git a/src/libraries/System.Reflection.Emit/tests/TypeBuilder/TypeBuilderDefineEvent.cs b/src/libraries/System.Reflection.Emit/tests/TypeBuilder/TypeBuilderDefineEvent.cs
index a01945d7f64911..05cda03a777379 100644
--- a/src/libraries/System.Reflection.Emit/tests/TypeBuilder/TypeBuilderDefineEvent.cs
+++ b/src/libraries/System.Reflection.Emit/tests/TypeBuilder/TypeBuilderDefineEvent.cs
@@ -26,7 +26,7 @@ public static IEnumerable<object[]> TestData()
         }
 
         [Theory]
-        [ActiveIssue("https://github.com/dotnet/runtime/issues/2389", TestRuntimes.Mono | TestRuntimes.CoreCLR)]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/2389", TestRuntimes.Mono)]
         [MemberData(nameof(TestData))]
         public void DefineEvent(string name, EventAttributes attributes, Type eventType, string expectedName, EventAttributes expectedAttributes)
         {
diff --git a/src/libraries/System.Reflection.Emit/tests/TypeBuilder/TypeBuilderDefineProperty.cs b/src/libraries/System.Reflection.Emit/tests/TypeBuilder/TypeBuilderDefineProperty.cs
index 500ee8104766d7..84d27ee2f98e31 100644
--- a/src/libraries/System.Reflection.Emit/tests/TypeBuilder/TypeBuilderDefineProperty.cs
+++ b/src/libraries/System.Reflection.Emit/tests/TypeBuilder/TypeBuilderDefineProperty.cs
@@ -29,7 +29,7 @@ public static IEnumerable<object[]> TestData()
         }
 
         [Theory]
-        [ActiveIssue("https://github.com/dotnet/runtime/issues/2389", TestRuntimes.Mono | TestRuntimes.CoreCLR)]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/2389", TestRuntimes.Mono)]
         [MemberData(nameof(TestData))]
         public void DefineProperty(string name, PropertyAttributes attributes, Type returnType, Type[] parameterTypes, string expectedName, PropertyAttributes expectedPropertyAttributes)
         {
diff --git a/src/mono/mono/eglib/CMakeLists.txt b/src/mono/mono/eglib/CMakeLists.txt
index b2945231711711..3de4a9c83d2f5b 100644
--- a/src/mono/mono/eglib/CMakeLists.txt
+++ b/src/mono/mono/eglib/CMakeLists.txt
@@ -20,7 +20,7 @@ set(eglib_common_sources
     gbytearray.c
     gerror.c
     ghashtable.c
-    ${CLR_SRC_NATIVE_DIR}/minipal/utf8converter.c
+    giconv.c
     gmem.c
     goutput.c
     gstr.c
@@ -32,7 +32,8 @@ set(eglib_common_sources
     gpath.c
     gspawn.c
     gfile.c
-    gfile-posix.c)
+    gfile-posix.c
+    gutf8.c)
 
 set(eglib_headers
   glib.h
diff --git a/src/native/minipal/utf8converter.c b/src/mono/mono/eglib/giconv.c
similarity index 68%
rename from src/native/minipal/utf8converter.c
rename to src/mono/mono/eglib/giconv.c
index 0aeada3f4773a0..664ad31bba258a 100644
--- a/src/native/minipal/utf8converter.c
+++ b/src/mono/mono/eglib/giconv.c
@@ -1,7 +1,32 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#include <minipal/utf8converter.h>
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ *  Copyright (C) 2011 Jeffrey Stedfast
+ *
+ *  Permission is hereby granted, free of charge, to any person
+ *  obtaining a copy of this software and associated documentation
+ *  files (the "Software"), to deal in the Software without
+ *  restriction, including without limitation the rights to use, copy,
+ *  modify, merge, publish, distribute, sublicense, and/or sell copies
+ *  of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be
+ *  included in all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ *  DEALINGS IN THE SOFTWARE.
+ */
+#include <config.h>
+#include <glib.h>
+#include <string.h>
+#include <errno.h>
+#include "../utils/mono-errno.h"
 
 #ifdef _MSC_VER
 #define FORCE_INLINE(RET_TYPE) __forceinline RET_TYPE
@@ -9,333 +34,40 @@
 #define FORCE_INLINE(RET_TYPE) inline RET_TYPE __attribute__((always_inline))
 #endif
 
+
+#define UNROLL_DECODE_UTF8 0
+#define UNROLL_ENCODE_UTF8 0
+
+static int decode_utf32be (char *inbuf, size_t inleft, gunichar *outchar);
+static int encode_utf32be (gunichar c, char *outbuf, size_t outleft);
+
+static int decode_utf32le (char *inbuf, size_t inleft, gunichar *outchar);
+static int encode_utf32le (gunichar c, char *outbuf, size_t outleft);
+
+static int decode_utf16be (char *inbuf, size_t inleft, gunichar *outchar);
+static int encode_utf16be (gunichar c, char *outbuf, size_t outleft);
+
+static int decode_utf16le (char *inbuf, size_t inleft, gunichar *outchar);
+static int encode_utf16le (gunichar c, char *outbuf, size_t outleft);
+
+static FORCE_INLINE (int) decode_utf8 (char *inbuf, size_t inleft, gunichar *outchar);
+static int encode_utf8 (gunichar c, char *outbuf, size_t outleft);
+
+static int decode_latin1 (char *inbuf, size_t inleft, gunichar *outchar);
+static int encode_latin1 (gunichar c, char *outbuf, size_t outleft);
+
 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
 #define decode_utf32 decode_utf32le
 #define encode_utf32 encode_utf32le
 #define decode_utf16 decode_utf16le
 #define encode_utf16 encode_utf16le
-#define GUINT16_TO_LE(x) (x)
-#define GUINT16_TO_BE(x) GUINT16_SWAP_LE_BE(x)
 #else
 #define decode_utf32 decode_utf32be
 #define encode_utf32 encode_utf32be
 #define decode_utf16 decode_utf16be
 #define encode_utf16 encode_utf16be
-#define GUINT16_TO_LE(x) GUINT16_SWAP_LE_BE(x)
-#define GUINT16_TO_BE(x) (x)
 #endif
 
-/*
- * Index into the table below with the first byte of a UTF-8 sequence to get
- * the number of bytes that are supposed to follow it to complete the sequence.
- *
- * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is left
- * as-is for anyone who may want to do such conversion, which was allowed in
- * earlier algorithms.
-*/
-const guchar g_utf8_jump_table[256] = {
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-	3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
-};
-
-static gboolean
-utf8_validate (const unsigned char *inptr, size_t len)
-{
-	const unsigned char *ptr = inptr + len;
-	unsigned char c;
-
-	/* Everything falls through when TRUE... */
-	switch (len) {
-	default:
-		return FALSE;
-	case 4:
-		if ((c = (*--ptr)) < 0x80 || c > 0xBF)
-			return FALSE;
-
-		if ((c == 0xBF || c == 0xBE) && ptr[-1] == 0xBF) {
-			if (ptr[-2] == 0x8F || ptr[-2] == 0x9F ||
-			    ptr[-2] == 0xAF || ptr[-2] == 0xBF)
-				return FALSE;
-		}
-	case 3:
-		if ((c = (*--ptr)) < 0x80 || c > 0xBF)
-			return FALSE;
-	case 2:
-		if ((c = (*--ptr)) < 0x80 || c > 0xBF)
-			return FALSE;
-
-		/* no fall-through in this inner switch */
-		switch (*inptr) {
-		case 0xE0: if (c < 0xA0) return FALSE; break;
-		case 0xED: if (c > 0x9F) return FALSE; break;
-		case 0xEF: if (c == 0xB7 && (ptr[1] > 0x8F && ptr[1] < 0xB0)) return FALSE;
-			if (c == 0xBF && (ptr[1] == 0xBE || ptr[1] == 0xBF)) return FALSE;
-			break;
-		case 0xF0: if (c < 0x90) return FALSE; break;
-		case 0xF4: if (c > 0x8F) return FALSE; break;
-		default:   if (c < 0x80) return FALSE; break;
-		}
-	case 1: if (*inptr >= 0x80 && *inptr < 0xC2) return FALSE;
-	}
-
-	if (*inptr > 0xF4)
-		return FALSE;
-
-	return TRUE;
-}
-
-/**
- * g_utf8_validate:
- * @str: a utf-8 encoded string
- * @max_len: max number of bytes to validate (or -1 to validate the entire null-terminated string)
- * @end: output parameter to mark the end of the valid input
- *
- * Checks @utf for being valid UTF-8. @str is assumed to be
- * null-terminated. This function is not super-strict, as it will
- * allow longer UTF-8 sequences than necessary. Note that Java is
- * capable of producing these sequences if provoked. Also note, this
- * routine checks for the 4-byte maximum size, but does not check for
- * 0x10ffff maximum value.
- *
- * Return value: %TRUE if @str is valid or %FALSE otherwise.
- **/
-gboolean
-g_utf8_validate (const gchar *str, gssize max_len, const gchar **end)
-{
-	guchar *inptr = (guchar *) str;
-	gboolean valid = TRUE;
-	guint length, min;
-	gssize n = 0;
-
-	if (max_len == 0)
-		return FALSE;
-
-	if (max_len < 0) {
-		while (*inptr != 0) {
-			length = g_utf8_jump_table[*inptr];
-			if (!utf8_validate (inptr, length)) {
-				valid = FALSE;
-				break;
-			}
-
-			inptr += length;
-		}
-	} else {
-		while (n < max_len) {
-			if (*inptr == 0) {
-				/* Note: return FALSE if we encounter nul-byte
-				 * before max_len is reached. */
-				valid = FALSE;
-				break;
-			}
-
-			length = g_utf8_jump_table[*inptr];
-			min = MIN (length, GSSIZE_TO_UINT (max_len - n));
-
-			if (!utf8_validate (inptr, min)) {
-				valid = FALSE;
-				break;
-			}
-
-			if (min < length) {
-				valid = FALSE;
-				break;
-			}
-
-			inptr += length;
-			n += length;
-		}
-	}
-
-	if (end != NULL)
-		*end = (gchar *) inptr;
-
-	return valid;
-}
-
-gunichar
-g_utf8_get_char_validated (const gchar *str, gssize max_len)
-{
-	unsigned char *inptr = (unsigned char *) str;
-	gunichar u = *inptr;
-	int n, i;
-
-	if (max_len == 0)
-		return -2;
-
-	if (u < 0x80) {
-		/* simple ascii case */
-		return u;
-	} else if (u < 0xc2) {
-		return -1;
-	} else if (u < 0xe0) {
-		u &= 0x1f;
-		n = 2;
-	} else if (u < 0xf0) {
-		u &= 0x0f;
-		n = 3;
-	} else if (u < 0xf8) {
-		u &= 0x07;
-		n = 4;
-	} else if (u < 0xfc) {
-		u &= 0x03;
-		n = 5;
-	} else if (u < 0xfe) {
-		u &= 0x01;
-		n = 6;
-	} else {
-		return -1;
-	}
-
-	if (max_len > 0) {
-		if (!utf8_validate (inptr, MIN (max_len, n)))
-			return -1;
-
-		if (max_len < n)
-			return -2;
-	} else {
-		if (!utf8_validate (inptr, n))
-			return -1;
-	}
-
-	for (i = 1; i < n; i++)
-		u = (u << 6) | (*++inptr ^ 0x80);
-
-	return u;
-}
-
-glong
-g_utf8_strlen (const gchar *str, gssize max_len)
-{
-	const guchar *inptr = (const guchar *) str;
-	glong clen = 0, len = 0, n;
-
-	if (max_len == 0)
-		return 0;
-
-	if (max_len < 0) {
-		while (*inptr) {
-			inptr += g_utf8_jump_table[*inptr];
-			len++;
-		}
-	} else {
-		while (len < max_len && *inptr) {
-			n = g_utf8_jump_table[*inptr];
-			if ((clen + n) > max_len)
-				break;
-
-			inptr += n;
-			clen += n;
-			len++;
-		}
-	}
-
-	return len;
-}
-
-gunichar
-g_utf8_get_char (const gchar *src)
-{
-	unsigned char *inptr = (unsigned char *) src;
-	gunichar u = *inptr;
-	int n, i;
-
-	if (u < 0x80) {
-		/* simple ascii case */
-		return u;
-	} else if (u < 0xe0) {
-		u &= 0x1f;
-		n = 2;
-	} else if (u < 0xf0) {
-		u &= 0x0f;
-		n = 3;
-	} else if (u < 0xf8) {
-		u &= 0x07;
-		n = 4;
-	} else if (u < 0xfc) {
-		u &= 0x03;
-		n = 5;
-	} else {
-		u &= 0x01;
-		n = 6;
-	}
-
-	for (i = 1; i < n; i++)
-		u = (u << 6) | (*++inptr ^ 0x80);
-
-	return u;
-}
-
-gchar *
-g_utf8_offset_to_pointer (const gchar *str, glong offset)
-{
-	const gchar *p = str;
-
-	if (offset > 0) {
-		do {
-			p = g_utf8_next_char (p);
-			offset --;
-		} while (offset > 0);
-	}
-	else if (offset < 0) {
-		const gchar *jump = str;
-		do {
-			// since the minimum size of a character is 1
-			// we know we can step back at least offset bytes
-			jump = jump + offset;
-
-			// if we land in the middle of a character
-			// walk to the beginning
-			while ((*jump & 0xc0) == 0x80)
-				jump --;
-
-			// count how many characters we've actually walked
-			// by going forward
-			p = jump;
-			do {
-				p = g_utf8_next_char (p);
-				offset ++;
-			} while (p < jump);
-
-		} while (offset < 0);
-	}
-
-	return (gchar *)p;
-}
-
-glong
-g_utf8_pointer_to_offset (const gchar *str, const gchar *pos)
-{
-	const gchar *inptr, *inend;
-	glong offset = 0;
-	glong sign = 1;
-
-	if (pos == str)
-		return 0;
-
-	if (str < pos) {
-		inptr = str;
-		inend = pos;
-	} else {
-		inptr = pos;
-		inend = str;
-		sign = -1;
-	}
-
-	do {
-		inptr = g_utf8_next_char (inptr);
-		offset++;
-	} while (inptr < inend);
-
-	return offset * sign;
-}
-
 /*
  * Unicode encoders and decoders
  */
@@ -687,12 +419,12 @@ encode_latin1 (gunichar c, char *outbuf, size_t outleft)
  * Simple conversion API
  */
 
-static gpointer g_error_quark = (gpointer)"ConvertError";
+static gpointer error_quark = (gpointer)"ConvertError";
 
 gpointer
 g_convert_error_quark (void)
 {
-	return g_error_quark;
+	return error_quark;
 }
 /*
  * Unicode conversion
@@ -814,7 +546,7 @@ g_utf8_to_ucs4_fast (const gchar *str, glong len, glong *items_written)
 	if (items_written)
 		*items_written = n;
 
-	outptr = outbuf = (gunichar *)g_malloc ((n + 1) * sizeof (gunichar));
+	outptr = outbuf = g_malloc ((n + 1) * sizeof (gunichar));
 	inptr = (char *) str;
 
 	for (i = 0; i < n; i++) {
@@ -828,7 +560,7 @@ g_utf8_to_ucs4_fast (const gchar *str, glong len, glong *items_written)
 }
 
 static gunichar2 *
-eg_utf8_to_utf16_general (const gchar *str, glong len, glong *items_read, glong *items_written, gboolean include_nuls, gboolean replace_invalid_codepoints, gboolean null_terminate, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err, unsigned endian)
+eg_utf8_to_utf16_general (const gchar *str, glong len, glong *items_read, glong *items_written, gboolean include_nuls, gboolean replace_invalid_codepoints, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err, unsigned endian)
 {
 	gunichar2 *outbuf, *outptr;
 	size_t outlen = 0;
@@ -879,7 +611,7 @@ eg_utf8_to_utf16_general (const gchar *str, glong len, glong *items_read, glong
 		*items_written = (glong)outlen;
 
 	if (G_LIKELY (!custom_alloc_func))
-		outptr = outbuf = (gunichar2 *)g_malloc ((outlen + 1) * sizeof (gunichar2));
+		outptr = outbuf = g_malloc ((outlen + 1) * sizeof (gunichar2));
 	else
 		outptr = outbuf = (gunichar2 *)custom_alloc_func ((outlen + 1) * sizeof (gunichar2), custom_alloc_data);
 
@@ -910,8 +642,7 @@ eg_utf8_to_utf16_general (const gchar *str, glong len, glong *items_read, glong
 		inptr += n;
 	}
 
-	if (null_terminate)
-		*outptr = '\0';
+	*outptr = '\0';
 
 	return outbuf;
 
@@ -941,55 +672,49 @@ eg_utf8_to_utf16_general (const gchar *str, glong len, glong *items_read, glong
 gunichar2 *
 g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, NULL, NULL, err, G_BYTE_ORDER);
+	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, NULL, NULL, err, G_BYTE_ORDER);
 }
 
 gunichar2 *
 g_utf8_to_utf16be (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, NULL, NULL, err, G_BIG_ENDIAN);
+	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, NULL, NULL, err, G_BIG_ENDIAN);
 }
 
 gunichar2 *
 g_utf8_to_utf16le (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, NULL, NULL, err, G_LITTLE_ENDIAN);
+	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, NULL, NULL, err, G_LITTLE_ENDIAN);
 }
 
 gunichar2 *
 g_utf8_to_utf16_custom_alloc (const gchar *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, custom_alloc_func, custom_alloc_data, err, G_BYTE_ORDER);
-}
-
-gunichar2 *
-g_utf8_to_utf16_custom_alloc_optional (const gchar *str, glong len, glong *items_read, glong *items_written, gboolean include_nuls, gboolean replace_invalid_codepoints, gboolean null_terminate, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
-{
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, include_nuls, replace_invalid_codepoints, null_terminate, custom_alloc_func, custom_alloc_data, err, G_BYTE_ORDER);
+	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, custom_alloc_func, custom_alloc_data, err, G_BYTE_ORDER);
 }
 
 gunichar2 *
 g_utf8_to_utf16be_custom_alloc (const gchar *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, custom_alloc_func, custom_alloc_data, err, G_BIG_ENDIAN);
+	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, custom_alloc_func, custom_alloc_data, err, G_BIG_ENDIAN);
 }
 
 gunichar2 *
 g_utf8_to_utf16le_custom_alloc (const gchar *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, custom_alloc_func, custom_alloc_data, err, G_LITTLE_ENDIAN);
+	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, custom_alloc_func, custom_alloc_data, err, G_LITTLE_ENDIAN);
 }
 
 gunichar2 *
 eg_utf8_to_utf16_with_nuls (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, TRUE, FALSE, TRUE, NULL, NULL, err, G_BYTE_ORDER);
+	return eg_utf8_to_utf16_general (str, len, items_read, items_written, TRUE, FALSE, NULL, NULL, err, G_BYTE_ORDER);
 }
 
 gunichar2 *
 eg_wtf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, TRUE, TRUE, TRUE, NULL, NULL, err, G_BYTE_ORDER);
+	return eg_utf8_to_utf16_general (str, len, items_read, items_written, TRUE, TRUE, NULL, NULL, err, G_BYTE_ORDER);
 }
 
 gunichar *
@@ -1044,7 +769,7 @@ g_utf8_to_ucs4 (const gchar *str, glong len, glong *items_read, glong *items_wri
 	if (items_read)
 		*items_read = GPTRDIFF_TO_LONG (inptr - str);
 
-	outptr = outbuf = (gunichar *)g_malloc (outlen + 4);
+	outptr = outbuf = g_malloc (outlen + 4);
 	inptr = (char *) str;
 	inleft = len;
 
@@ -1066,23 +791,17 @@ g_utf8_to_ucs4 (const gchar *str, glong len, glong *items_read, glong *items_wri
 
 static
 gchar *
-eg_utf16_to_utf8_general (const gunichar2 *str, glong len, glong *items_read, glong *items_written, gboolean include_nuls, gboolean replace_invalid_codepoints, gboolean null_terminate, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err, unsigned endian)
+eg_utf16_to_utf8_general (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err, unsigned endian)
 {
 	char *inptr, *outbuf, *outptr;
 	size_t outlen = 0;
 	size_t inleft;
 	gunichar c;
-	gboolean replaced = FALSE;
 	int n;
 
 	g_return_val_if_fail (str != NULL, NULL);
 
 	if (len < 0) {
-		if (include_nuls) {
-			g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED, "Conversions with embedded nulls must pass the string length");
-			return NULL;
-		}
-
 		len = 0;
 		while (str[len])
 			len++;
@@ -1099,37 +818,30 @@ eg_utf16_to_utf8_general (const gunichar2 *str, glong len, glong *items_read, gl
 				inptr += 2;
 			}
 
-			if (errno == EILSEQ && !replace_invalid_codepoints) {
+			if (errno == EILSEQ) {
 				g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
 					     "Illegal byte sequence encountered in the input.");
-			} else if (items_read && !replace_invalid_codepoints) {
+			} else if (items_read) {
 				/* partial input is ok if we can let our caller know... */
 				break;
-			} else if (!replace_invalid_codepoints) {
+			} else {
 				g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
 					     "Partial byte sequence encountered in the input.");
 			}
 
-			if (replace_invalid_codepoints) {
-				n = sizeof(gunichar);
-				c = '?';
-				replaced = TRUE;
-			} else {
-				if (items_read)
-					*items_read = GPTRDIFF_TO_LONG ((inptr - (char *) str) / 2);
+			if (items_read)
+				*items_read = GPTRDIFF_TO_LONG ((inptr - (char *) str) / 2);
 
-				if (items_written)
-					*items_written = 0;
+			if (items_written)
+				*items_written = 0;
 
-				return NULL;
-			}
-		} else if (c == 0 && !include_nuls)
+			return NULL;
+		} else if (c == 0)
 			break;
 
-		outlen += (replaced && replace_invalid_codepoints) ? n - 1 : g_unichar_to_utf8 (c, NULL);
+		outlen += g_unichar_to_utf8 (c, NULL);
 		inleft -= n;
 		inptr += n;
-		replaced = FALSE;
 	}
 
 	if (items_read)
@@ -1139,7 +851,7 @@ eg_utf16_to_utf8_general (const gunichar2 *str, glong len, glong *items_read, gl
 		*items_written = (glong)outlen;
 
 	if (G_LIKELY (!custom_alloc_func))
-		outptr = outbuf = (char *)g_malloc (outlen + 1);
+		outptr = outbuf = g_malloc (outlen + 1);
 	else
 		outptr = outbuf = (char *)custom_alloc_func (outlen + 1, custom_alloc_data);
 
@@ -1154,24 +866,17 @@ eg_utf16_to_utf8_general (const gunichar2 *str, glong len, glong *items_read, gl
 	inleft = len * 2;
 
 	while (inleft > 0) {
-		if ((n = decode_utf16_endian (inptr, inleft, &c, endian)) < 0) {
-			if (replace_invalid_codepoints) {
-				outptr += '?';
-				n = sizeof(gunichar);
-			} else
-				break;
-		} else if (c == 0 && !include_nuls) {
+		if ((n = decode_utf16_endian (inptr, inleft, &c, endian)) < 0)
+			break;
+		else if (c == 0)
 			break;
-		} else {
-			outptr += g_unichar_to_utf8 (c, outptr);
-		}
 
+		outptr += g_unichar_to_utf8 (c, outptr);
 		inleft -= n;
 		inptr += n;
 	}
 
-	if (null_terminate)
-		*outptr = '\0';
+	*outptr = '\0';
 
 	return outbuf;
 }
@@ -1179,31 +884,25 @@ eg_utf16_to_utf8_general (const gunichar2 *str, glong len, glong *items_read, gl
 gchar *
 g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf16_to_utf8_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, NULL, NULL, err, G_BYTE_ORDER);
+	return eg_utf16_to_utf8_general (str, len, items_read, items_written, NULL, NULL, err, G_BYTE_ORDER);
 }
 
 gchar *
 g_utf16le_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf16_to_utf8_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, NULL, NULL, err, G_LITTLE_ENDIAN);
+	return eg_utf16_to_utf8_general (str, len, items_read, items_written, NULL, NULL, err, G_LITTLE_ENDIAN);
 }
 
 gchar *
 g_utf16be_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf16_to_utf8_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, NULL, NULL, err, G_BIG_ENDIAN);
+	return eg_utf16_to_utf8_general (str, len, items_read, items_written, NULL, NULL, err, G_BIG_ENDIAN);
 }
 
 gchar *
 g_utf16_to_utf8_custom_alloc (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
 {
-	return eg_utf16_to_utf8_general (str, len, items_read, items_written, FALSE, FALSE, TRUE, custom_alloc_func, custom_alloc_data, err, G_BYTE_ORDER);
-}
-
-gchar *
-g_utf16_to_utf8_custom_alloc_with_nulls (const gunichar2 *str, glong len, glong *items_read, glong *items_written, gboolean include_nuls, gboolean null_terminate, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
-{
-	return eg_utf16_to_utf8_general (str, len, items_read, items_written, include_nuls, TRUE, null_terminate, custom_alloc_func, custom_alloc_data, err, G_BYTE_ORDER);
+	return eg_utf16_to_utf8_general (str, len, items_read, items_written, custom_alloc_func, custom_alloc_data, err, G_BYTE_ORDER);
 }
 
 gunichar *
@@ -1267,7 +966,7 @@ g_utf16_to_ucs4 (const gunichar2 *str, glong len, glong *items_read, glong *item
 	if (items_written)
 		*items_written = (glong)(outlen / 4);
 
-	outptr = outbuf = (gunichar *)g_malloc (outlen + 4);
+	outptr = outbuf = g_malloc (outlen + 4);
 	inptr = (char *) str;
 	inleft = len * 2;
 
@@ -1335,7 +1034,7 @@ g_ucs4_to_utf8 (const gunichar *str, glong len, glong *items_read, glong *items_
 
 	len = i;
 
-	outptr = outbuf = (char *)g_malloc (outlen + 1);
+	outptr = outbuf = g_malloc (outlen + 1);
 	for (i = 0; i < len; i++)
 		outptr += g_unichar_to_utf8 (str[i], outptr);
 	*outptr = 0;
@@ -1397,7 +1096,7 @@ g_ucs4_to_utf16 (const gunichar *str, glong len, glong *items_read, glong *items
 
 	len = i;
 
-	outptr = outbuf = (gunichar2 *)g_malloc ((outlen + 1) * sizeof (gunichar2));
+	outptr = outbuf = g_malloc ((outlen + 1) * sizeof (gunichar2));
 	for (i = 0; i < len; i++)
 		outptr += g_unichar_to_utf16 (str[i], outptr);
 	*outptr = 0;
diff --git a/src/mono/mono/eglib/glib.h b/src/mono/mono/eglib/glib.h
index ef2101315eee98..e438c00298ec72 100644
--- a/src/mono/mono/eglib/glib.h
+++ b/src/mono/mono/eglib/glib.h
@@ -29,7 +29,6 @@
 #include <eglib-config.h>
 #include <minipal/utils.h>
 #include <time.h>
-#include "../utils/mono-errno.h"
 
 #ifndef EGLIB_NO_REMAP
 #include <eglib-remap.h>
diff --git a/src/mono/mono/eglib/gutf8.c b/src/mono/mono/eglib/gutf8.c
new file mode 100644
index 00000000000000..965a69f42e655d
--- /dev/null
+++ b/src/mono/mono/eglib/gutf8.c
@@ -0,0 +1,323 @@
+/*
+ * gutf8.c: UTF-8 conversion
+ *
+ * Author:
+ *   Atsushi Enomoto  <atsushi@ximian.com>
+ *
+ * (C) 2006 Novell, Inc.
+ * Copyright 2012 Xamarin Inc
+ */
+#include "config.h"
+#include <stdio.h>
+#include <glib.h>
+
+/*
+ * Index into the table below with the first byte of a UTF-8 sequence to get
+ * the number of bytes that are supposed to follow it to complete the sequence.
+ *
+ * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is left
+ * as-is for anyone who may want to do such conversion, which was allowed in
+ * earlier algorithms.
+*/
+const guchar g_utf8_jump_table[256] = {
+	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+	3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
+};
+
+static gboolean
+utf8_validate (const unsigned char *inptr, size_t len)
+{
+	const unsigned char *ptr = inptr + len;
+	unsigned char c;
+
+	/* Everything falls through when TRUE... */
+	switch (len) {
+	default:
+		return FALSE;
+	case 4:
+		if ((c = (*--ptr)) < 0x80 || c > 0xBF)
+			return FALSE;
+
+		if ((c == 0xBF || c == 0xBE) && ptr[-1] == 0xBF) {
+			if (ptr[-2] == 0x8F || ptr[-2] == 0x9F ||
+			    ptr[-2] == 0xAF || ptr[-2] == 0xBF)
+				return FALSE;
+		}
+	case 3:
+		if ((c = (*--ptr)) < 0x80 || c > 0xBF)
+			return FALSE;
+	case 2:
+		if ((c = (*--ptr)) < 0x80 || c > 0xBF)
+			return FALSE;
+
+		/* no fall-through in this inner switch */
+		switch (*inptr) {
+		case 0xE0: if (c < 0xA0) return FALSE; break;
+		case 0xED: if (c > 0x9F) return FALSE; break;
+		case 0xEF: if (c == 0xB7 && (ptr[1] > 0x8F && ptr[1] < 0xB0)) return FALSE;
+			if (c == 0xBF && (ptr[1] == 0xBE || ptr[1] == 0xBF)) return FALSE;
+			break;
+		case 0xF0: if (c < 0x90) return FALSE; break;
+		case 0xF4: if (c > 0x8F) return FALSE; break;
+		default:   if (c < 0x80) return FALSE; break;
+		}
+	case 1: if (*inptr >= 0x80 && *inptr < 0xC2) return FALSE;
+	}
+
+	if (*inptr > 0xF4)
+		return FALSE;
+
+	return TRUE;
+}
+
+/**
+ * g_utf8_validate:
+ * @str: a utf-8 encoded string
+ * @max_len: max number of bytes to validate (or -1 to validate the entire null-terminated string)
+ * @end: output parameter to mark the end of the valid input
+ *
+ * Checks @utf for being valid UTF-8. @str is assumed to be
+ * null-terminated. This function is not super-strict, as it will
+ * allow longer UTF-8 sequences than necessary. Note that Java is
+ * capable of producing these sequences if provoked. Also note, this
+ * routine checks for the 4-byte maximum size, but does not check for
+ * 0x10ffff maximum value.
+ *
+ * Return value: %TRUE if @str is valid or %FALSE otherwise.
+ **/
+gboolean
+g_utf8_validate (const gchar *str, gssize max_len, const gchar **end)
+{
+	guchar *inptr = (guchar *) str;
+	gboolean valid = TRUE;
+	guint length, min;
+	gssize n = 0;
+
+	if (max_len == 0)
+		return FALSE;
+
+	if (max_len < 0) {
+		while (*inptr != 0) {
+			length = g_utf8_jump_table[*inptr];
+			if (!utf8_validate (inptr, length)) {
+				valid = FALSE;
+				break;
+			}
+
+			inptr += length;
+		}
+	} else {
+		while (n < max_len) {
+			if (*inptr == 0) {
+				/* Note: return FALSE if we encounter nul-byte
+				 * before max_len is reached. */
+				valid = FALSE;
+				break;
+			}
+
+			length = g_utf8_jump_table[*inptr];
+			min = MIN (length, GSSIZE_TO_UINT (max_len - n));
+
+			if (!utf8_validate (inptr, min)) {
+				valid = FALSE;
+				break;
+			}
+
+			if (min < length) {
+				valid = FALSE;
+				break;
+			}
+
+			inptr += length;
+			n += length;
+		}
+	}
+
+	if (end != NULL)
+		*end = (gchar *) inptr;
+
+	return valid;
+}
+
+gunichar
+g_utf8_get_char_validated (const gchar *str, gssize max_len)
+{
+	unsigned char *inptr = (unsigned char *) str;
+	gunichar u = *inptr;
+	int n, i;
+
+	if (max_len == 0)
+		return -2;
+
+	if (u < 0x80) {
+		/* simple ascii case */
+		return u;
+	} else if (u < 0xc2) {
+		return -1;
+	} else if (u < 0xe0) {
+		u &= 0x1f;
+		n = 2;
+	} else if (u < 0xf0) {
+		u &= 0x0f;
+		n = 3;
+	} else if (u < 0xf8) {
+		u &= 0x07;
+		n = 4;
+	} else if (u < 0xfc) {
+		u &= 0x03;
+		n = 5;
+	} else if (u < 0xfe) {
+		u &= 0x01;
+		n = 6;
+	} else {
+		return -1;
+	}
+
+	if (max_len > 0) {
+		if (!utf8_validate (inptr, MIN (max_len, n)))
+			return -1;
+
+		if (max_len < n)
+			return -2;
+	} else {
+		if (!utf8_validate (inptr, n))
+			return -1;
+	}
+
+	for (i = 1; i < n; i++)
+		u = (u << 6) | (*++inptr ^ 0x80);
+
+	return u;
+}
+
+glong
+g_utf8_strlen (const gchar *str, gssize max_len)
+{
+	const guchar *inptr = (const guchar *) str;
+	glong clen = 0, len = 0, n;
+
+	if (max_len == 0)
+		return 0;
+
+	if (max_len < 0) {
+		while (*inptr) {
+			inptr += g_utf8_jump_table[*inptr];
+			len++;
+		}
+	} else {
+		while (len < max_len && *inptr) {
+			n = g_utf8_jump_table[*inptr];
+			if ((clen + n) > max_len)
+				break;
+
+			inptr += n;
+			clen += n;
+			len++;
+		}
+	}
+
+	return len;
+}
+
+gunichar
+g_utf8_get_char (const gchar *src)
+{
+	unsigned char *inptr = (unsigned char *) src;
+	gunichar u = *inptr;
+	int n, i;
+
+	if (u < 0x80) {
+		/* simple ascii case */
+		return u;
+	} else if (u < 0xe0) {
+		u &= 0x1f;
+		n = 2;
+	} else if (u < 0xf0) {
+		u &= 0x0f;
+		n = 3;
+	} else if (u < 0xf8) {
+		u &= 0x07;
+		n = 4;
+	} else if (u < 0xfc) {
+		u &= 0x03;
+		n = 5;
+	} else {
+		u &= 0x01;
+		n = 6;
+	}
+
+	for (i = 1; i < n; i++)
+		u = (u << 6) | (*++inptr ^ 0x80);
+
+	return u;
+}
+
+gchar *
+g_utf8_offset_to_pointer (const gchar *str, glong offset)
+{
+	const gchar *p = str;
+
+	if (offset > 0) {
+		do {
+			p = g_utf8_next_char (p);
+			offset --;
+		} while (offset > 0);
+	}
+	else if (offset < 0) {
+		const gchar *jump = str;
+		do {
+			// since the minimum size of a character is 1
+			// we know we can step back at least offset bytes
+			jump = jump + offset;
+
+			// if we land in the middle of a character
+			// walk to the beginning
+			while ((*jump & 0xc0) == 0x80)
+				jump --;
+
+			// count how many characters we've actually walked
+			// by going forward
+			p = jump;
+			do {
+				p = g_utf8_next_char (p);
+				offset ++;
+			} while (p < jump);
+
+		} while (offset < 0);
+	}
+
+	return (gchar *)p;
+}
+
+glong
+g_utf8_pointer_to_offset (const gchar *str, const gchar *pos)
+{
+	const gchar *inptr, *inend;
+	glong offset = 0;
+	glong sign = 1;
+
+	if (pos == str)
+		return 0;
+
+	if (str < pos) {
+		inptr = str;
+		inend = pos;
+	} else {
+		inptr = pos;
+		inend = str;
+		sign = -1;
+	}
+
+	do {
+		inptr = g_utf8_next_char (inptr);
+		offset++;
+	} while (inptr < inend);
+
+	return offset * sign;
+}
diff --git a/src/native/minipal/utf8converter.h b/src/native/minipal/utf8converter.h
deleted file mode 100644
index 06cd677dfe1955..00000000000000
--- a/src/native/minipal/utf8converter.h
+++ /dev/null
@@ -1,200 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#ifndef HAVE_MINIPAL_UTF8CONVERTER_H
-#define HAVE_MINIPAL_UTF8CONVERTER_H
-
-#include <config.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <errno.h>
-#include <stdint.h>
-#include <stdbool.h>
-
-#ifndef CORECLR
-#include "glib.h"
-#endif
-
-#ifdef _MSC_VER
-#define FORCE_INLINE(RET_TYPE) __forceinline RET_TYPE
-#else
-#define FORCE_INLINE(RET_TYPE) inline RET_TYPE __attribute__((always_inline))
-#endif
-
-#if G_BYTE_ORDER == G_LITTLE_ENDIAN
-#define decode_utf32 decode_utf32le
-#define encode_utf32 encode_utf32le
-#define decode_utf16 decode_utf16le
-#define encode_utf16 encode_utf16le
-#define GUINT16_TO_LE(x) (x)
-#define GUINT16_TO_BE(x) GUINT16_SWAP_LE_BE(x)
-#else
-#define decode_utf32 decode_utf32be
-#define encode_utf32 encode_utf32be
-#define decode_utf16 decode_utf16be
-#define encode_utf16 encode_utf16be
-#define GUINT16_TO_LE(x) GUINT16_SWAP_LE_BE(x)
-#define GUINT16_TO_BE(x) (x)
-#endif
-
-#ifdef CORECLR
-
-#ifdef TARGET_64BIT
-#define ptrdiff_t int64_t
-#else
-#define ptrdiff_t int32_t
-#endif
-
-#define gunichar uint32_t
-#define gunichar2 uint16_t
-#define guint uint32_t
-#define gchar char
-#define guchar unsigned char
-#define gboolean bool
-#define gsize size_t
-#define gssize ptrdiff_t
-#define gint int32_t
-#define glong long
-#define gptrdiff ptrdiff_t
-#define guint8 uint8_t
-#define guint16 uint16_t
-#define gpointer void*
-#define g_malloc malloc
-#define TRUE 1
-#define FALSE 0
-#ifndef MIN
-#define MIN(a,b) ((a) < (b) ? (a) : (b))
-#endif
-
-typedef void* (*GCustomAllocator) (size_t req_size, void* custom_alloc_data);
-
-typedef struct {
-	/* In the real glib, this is a GQuark, but we dont use/need that */
-	void* domain;
-	int32_t code;
-	char *message;
-} GError;
-
-typedef struct {
-	void* buffer;
-	size_t buffer_size;
-	size_t req_buffer_size;
-} GFixedBufferCustomAllocatorData;
-
-typedef enum {
-	G_CONVERT_ERROR_NO_CONVERSION,
-	G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
-	G_CONVERT_ERROR_FAILED,
-	G_CONVERT_ERROR_PARTIAL_INPUT,
-	G_CONVERT_ERROR_BAD_URI,
-	G_CONVERT_ERROR_NOT_ABSOLUTE_PATH,
-	G_CONVERT_ERROR_NO_MEMORY
-} GConvertError;
-
-#define UNROLL_DECODE_UTF8 0
-#define UNROLL_ENCODE_UTF8 0
-
-static int decode_utf32be (char *inbuf, size_t inleft, uint32_t *outchar);
-static int encode_utf32be (uint32_t c, char *outbuf, size_t outleft);
-
-static int decode_utf32le (char *inbuf, size_t inleft, uint32_t *outchar);
-static int encode_utf32le (uint32_t c, char *outbuf, size_t outleft);
-
-static int decode_utf16be (char *inbuf, size_t inleft, uint32_t *outchar);
-static int encode_utf16be (uint32_t c, char *outbuf, size_t outleft);
-
-static int decode_utf16le (char *inbuf, size_t inleft, uint32_t *outchar);
-static int encode_utf16le (uint32_t c, char *outbuf, size_t outleft);
-
-static FORCE_INLINE (int) decode_utf8 (char *inbuf, size_t inleft, uint32_t *outchar);
-static int encode_utf8 (uint32_t c, char *outbuf, size_t outleft);
-
-static int decode_latin1 (char *inbuf, size_t inleft, uint32_t *outchar);
-static int encode_latin1 (uint32_t c, char *outbuf, size_t outleft);
-
-#define G_LITTLE_ENDIAN 1234
-#define G_BIG_ENDIAN 4321
-#define GUINT16_SWAP_LE_BE(x) ((uint16_t) (((uint16_t) x) >> 8) | ((((uint16_t)(x)) & 0xff) << 8))
-
-#ifdef BIGENDIAN
-#define G_BYTE_ORDER G_BIG_ENDIAN
-#else
-#define G_BYTE_ORDER G_LITTLE_ENDIAN
-#endif
-
-#define G_CAST_TYPE_TO_TYPE(src,dest,v) ((dest)(v))
-#define G_CAST_PTRTYPE_TO_STYPE(src,dest,v) ((dest)(gssize)(v))
-#define GUINT32_TO_UINT16(v) G_CAST_TYPE_TO_TYPE(guint32, guint16, v)
-#define GSIZE_TO_INT(v) G_CAST_TYPE_TO_TYPE(gsize, gint, v)
-#define GSSIZE_TO_UINT(v) G_CAST_TYPE_TO_TYPE(gssize, guint, v)
-#define GUNICHAR_TO_UINT8(v) G_CAST_TYPE_TO_TYPE(gunichar, guint8, v)
-#define GUNICHAR_TO_UINT16(v) G_CAST_TYPE_TO_TYPE(gunichar, guint16, v)
-#define GUNICHAR_TO_CHAR(v) G_CAST_TYPE_TO_TYPE(gunichar, gchar, v)
-#define GPTRDIFF_TO_LONG(v) G_CAST_PTRTYPE_TO_STYPE(gptrdiff, glong, v)
-#define g_return_val_if_fail(x,e)  do { if (!(x)) { printf ("%s:%d: assertion '%s' failed\n", __FILE__, __LINE__, #x); return (e); } } while(0)
-#define g_utf8_next_char(p) ((p) + g_utf8_jump_table[(unsigned char)(*p)])
-
-#if defined(__GNUC__) && (__GNUC__ > 2)
-#define G_LIKELY(expr) (__builtin_expect ((expr) != 0, 1))
-#define G_UNLIKELY(expr) (__builtin_expect ((expr) != 0, 0))
-#else
-#define G_LIKELY(x) (x)
-#define G_UNLIKELY(x) (x)
-#endif
-
-void
-g_set_error (GError **err, void* domain, int32_t code, const char *format, ...)
-{
-	va_list args;
-
-	if (err) {
-		*err = (GError *) malloc (sizeof (GError));
-		(*err)->domain = domain;
-		(*err)->code = code;
-
-		va_start (args, format);
-		int s = vsnprintf(NULL, 0, format, args);
-		va_end(args);
-
-		if (s > -1)
-		{
-			(*err)->message = (char*)malloc(s);
-
-			va_start(args, format);
-			vsnprintf((*err)->message, s, format, args);
-			va_end (args);
-		}
-	}
-}
-
-#define G_CONVERT_ERROR g_convert_error_quark()
-
-inline static void
-mono_set_errno (int errno_val)
-{
-	errno = errno_val;
-}
-
-#endif // CORECLR
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Unicode encoders and decoders
- */
-
-gunichar2 *
-g_utf8_to_utf16_custom_alloc_optional (const gchar *str, glong len, glong *items_read, glong *items_written, gboolean include_nuls, gboolean replace_invalid_codepoints, gboolean null_terminate, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err);
-
-gchar *
-g_utf16_to_utf8_custom_alloc_with_nulls (const gunichar2 *str, glong len, glong *items_read, glong *items_written, gboolean include_nuls, gboolean null_terminate, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err);
-
-#ifdef __cplusplus
-}
-#endif // extern "C"
-
-#endif //HAVE_MINIPAL_UTF8CONVERTER_H

From eb53c7fd05fafe5d0992c1b730e8aca31a1b05fa Mon Sep 17 00:00:00 2001
From: Adeel <3840695+am11@users.noreply.github.com>
Date: Thu, 25 May 2023 00:16:41 +0300
Subject: [PATCH 3/9] Share UTF8 converters between coreclr and mono - v2

---
 src/coreclr/pal/src/CMakeLists.txt            |    2 +-
 src/coreclr/pal/src/include/pal/utf8.h        |   52 -
 src/coreclr/pal/src/locale/unicode.cpp        |   19 +-
 src/mono/mono/eglib/CMakeLists.txt            |    9 +-
 src/mono/mono/eglib/giconv.c                  |  524 +------
 src/mono/mono/eglib/glib.h                    |    4 -
 src/mono/mono/eglib/test/utf8.c               |    6 +-
 src/mono/mono/metadata/object.c               |    2 +-
 .../src/locale => native/minipal}/utf8.cpp    | 1206 ++++++++---------
 src/native/minipal/utf8.h                     |   33 +
 10 files changed, 704 insertions(+), 1153 deletions(-)
 delete mode 100644 src/coreclr/pal/src/include/pal/utf8.h
 rename src/{coreclr/pal/src/locale => native/minipal}/utf8.cpp (72%)
 create mode 100644 src/native/minipal/utf8.h

diff --git a/src/coreclr/pal/src/CMakeLists.txt b/src/coreclr/pal/src/CMakeLists.txt
index bd5a6bdf4d5b22..d61ffc4cca5f11 100644
--- a/src/coreclr/pal/src/CMakeLists.txt
+++ b/src/coreclr/pal/src/CMakeLists.txt
@@ -152,7 +152,7 @@ set(SOURCES
   loader/module.cpp
   locale/unicode.cpp
   locale/unicodedata.cpp
-  locale/utf8.cpp
+  ${CLR_SRC_NATIVE_DIR}/minipal/utf8.cpp
   map/common.cpp
   map/map.cpp
   map/virtual.cpp
diff --git a/src/coreclr/pal/src/include/pal/utf8.h b/src/coreclr/pal/src/include/pal/utf8.h
deleted file mode 100644
index fa417c0a021f79..00000000000000
--- a/src/coreclr/pal/src/include/pal/utf8.h
+++ /dev/null
@@ -1,52 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-
-
-Module Name:
-
-    include/pal/utf8.h
-
-Abstract:
-    Header file for UTF-8 conversion functions.
-
-Revision History:
-
-
-
---*/
-
-#ifndef _PAL_UTF8_H_
-#define _PAL_UTF8_H_
-
-#include <pal/palinternal.h> /* for WCHAR */
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif // __cplusplus
-
-/*++
-Function :
-    UTF8ToUnicode
-
-    Convert a string from UTF-8 to UTF-16 (UCS-2)
---*/
-int UTF8ToUnicode(LPCSTR lpSrcStr, int cchSrc, LPWSTR lpDestStr, int cchDest, DWORD dwFlags);
-
-
-/*++
-Function :
-    UnicodeToUTF8
-
-    Convert a string from UTF-16 (UCS-2) to UTF-8
---*/
-int UnicodeToUTF8(LPCWSTR lpSrcStr, int cchSrc, LPSTR lpDestStr, int cchDest);
-
-#ifdef __cplusplus
-}
-#endif // __cplusplus
-
-#endif /* _PAL_UTF8_H_ */
diff --git a/src/coreclr/pal/src/locale/unicode.cpp b/src/coreclr/pal/src/locale/unicode.cpp
index f29eabc07d9be3..d3f4da7a60b53f 100644
--- a/src/coreclr/pal/src/locale/unicode.cpp
+++ b/src/coreclr/pal/src/locale/unicode.cpp
@@ -24,7 +24,7 @@ Revision History:
 #include "pal/palinternal.h"
 #include "pal/dbgmsg.h"
 #include "pal/file.h"
-#include "pal/utf8.h"
+#include <minipal/utf8.h>
 #include "pal/cruntime.h"
 #include "pal/stackstring.hpp"
 #include "pal/unicodedata.h"
@@ -253,16 +253,11 @@ MultiByteToWideChar(
         goto EXIT;
     }
 
-    // Use UTF8ToUnicode on all systems, since it replaces
+    // Use minipal_utf8_to_utf16_preallocated on all systems, since it replaces
     // invalid characters and Core Foundation doesn't do that.
     if (CodePage == CP_UTF8 || CodePage == CP_ACP)
     {
-        if (cbMultiByte <= -1)
-        {
-        cbMultiByte = strlen(lpMultiByteStr) + 1;
-        }
-
-        retval = UTF8ToUnicode(lpMultiByteStr, cbMultiByte, lpWideCharStr, cchWideChar, dwFlags);
+        retval = minipal_utf8_to_utf16_preallocated(lpMultiByteStr, cbMultiByte, &lpWideCharStr, cchWideChar, dwFlags, /* treatAsLE */ false);
         goto EXIT;
     }
 
@@ -338,15 +333,11 @@ WideCharToMultiByte(
         defaultChar = *lpDefaultChar;
     }
 
-    // Use UnicodeToUTF8 on all systems because we use
+    // Use minipal_utf16_to_utf8_preallocated on all systems because we use
     // UTF8ToUnicode in MultiByteToWideChar() on all systems.
     if (CodePage == CP_UTF8 || CodePage == CP_ACP)
     {
-        if (cchWideChar == -1)
-        {
-            cchWideChar = PAL_wcslen(lpWideCharStr) + 1;
-        }
-        retval = UnicodeToUTF8(lpWideCharStr, cchWideChar, lpMultiByteStr, cbMultiByte);
+        retval = minipal_utf16_to_utf8_preallocated(lpWideCharStr, cchWideChar, &lpMultiByteStr, cbMultiByte);
         goto EXIT;
     }
 
diff --git a/src/mono/mono/eglib/CMakeLists.txt b/src/mono/mono/eglib/CMakeLists.txt
index 3de4a9c83d2f5b..a65a4e64085e92 100644
--- a/src/mono/mono/eglib/CMakeLists.txt
+++ b/src/mono/mono/eglib/CMakeLists.txt
@@ -33,7 +33,14 @@ set(eglib_common_sources
     gspawn.c
     gfile.c
     gfile-posix.c
-    gutf8.c)
+    gutf8.c
+    ${CLR_SRC_NATIVE_DIR}/minipal/utf8.cpp)
+
+if(HOST_WIN32)
+set_source_files_properties("${CLR_SRC_NATIVE_DIR}/minipal/utf8.cpp" PROPERTIES COMPILE_FLAGS "/wd4100 /wd4267 /wd4458 /wd4310")
+else()
+set_source_files_properties("${CLR_SRC_NATIVE_DIR}/minipal/utf8.cpp" PROPERTIES COMPILE_FLAGS "-std=c++11 -nostdlib -fno-rtti -fno-exceptions")
+endif()
 
 set(eglib_headers
   glib.h
diff --git a/src/mono/mono/eglib/giconv.c b/src/mono/mono/eglib/giconv.c
index 664ad31bba258a..79c45c8182adea 100644
--- a/src/mono/mono/eglib/giconv.c
+++ b/src/mono/mono/eglib/giconv.c
@@ -28,132 +28,28 @@
 #include <errno.h>
 #include "../utils/mono-errno.h"
 
+typedef gunichar2 char16_t;
+#include <minipal/utf8.h>
+
 #ifdef _MSC_VER
 #define FORCE_INLINE(RET_TYPE) __forceinline RET_TYPE
 #else
 #define FORCE_INLINE(RET_TYPE) inline RET_TYPE __attribute__((always_inline))
 #endif
 
-
 #define UNROLL_DECODE_UTF8 0
-#define UNROLL_ENCODE_UTF8 0
-
-static int decode_utf32be (char *inbuf, size_t inleft, gunichar *outchar);
-static int encode_utf32be (gunichar c, char *outbuf, size_t outleft);
-
-static int decode_utf32le (char *inbuf, size_t inleft, gunichar *outchar);
-static int encode_utf32le (gunichar c, char *outbuf, size_t outleft);
-
-static int decode_utf16be (char *inbuf, size_t inleft, gunichar *outchar);
-static int encode_utf16be (gunichar c, char *outbuf, size_t outleft);
-
-static int decode_utf16le (char *inbuf, size_t inleft, gunichar *outchar);
-static int encode_utf16le (gunichar c, char *outbuf, size_t outleft);
 
 static FORCE_INLINE (int) decode_utf8 (char *inbuf, size_t inleft, gunichar *outchar);
-static int encode_utf8 (gunichar c, char *outbuf, size_t outleft);
-
-static int decode_latin1 (char *inbuf, size_t inleft, gunichar *outchar);
-static int encode_latin1 (gunichar c, char *outbuf, size_t outleft);
 
 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
-#define decode_utf32 decode_utf32le
-#define encode_utf32 encode_utf32le
 #define decode_utf16 decode_utf16le
-#define encode_utf16 encode_utf16le
 #else
-#define decode_utf32 decode_utf32be
-#define encode_utf32 encode_utf32be
+#ifndef BIGENDIAN
+#define BIGENDIAN
+#endif
 #define decode_utf16 decode_utf16be
-#define encode_utf16 encode_utf16be
 #endif
 
-/*
- * Unicode encoders and decoders
- */
-
-static FORCE_INLINE (uint32_t)
-read_uint32_endian (unsigned char *inptr, unsigned endian)
-{
-	if (endian == G_LITTLE_ENDIAN)
-		return (inptr[3] << 24) | (inptr[2] << 16) | (inptr[1] << 8) | inptr[0];
-	return (inptr[0] << 24) | (inptr[1] << 16) | (inptr[2] << 8) | inptr[3];
-}
-
-static int
-decode_utf32_endian (char *inbuf, size_t inleft, gunichar *outchar, unsigned endian)
-{
-	unsigned char *inptr = (unsigned char *) inbuf;
-	gunichar c;
-
-	if (inleft < 4) {
-		mono_set_errno (EINVAL);
-		return -1;
-	}
-
-	c = read_uint32_endian (inptr, endian);
-
-	if (c >= 0xd800 && c < 0xe000) {
-		mono_set_errno (EILSEQ);
-		return -1;
-	} else if (c >= 0x110000) {
-		mono_set_errno (EILSEQ);
-		return -1;
-	}
-
-	*outchar = c;
-
-	return 4;
-}
-
-static int
-decode_utf32be (char *inbuf, size_t inleft, gunichar *outchar)
-{
-	return decode_utf32_endian (inbuf, inleft, outchar, G_BIG_ENDIAN);
-}
-
-static int
-decode_utf32le (char *inbuf, size_t inleft, gunichar *outchar)
-{
-	return decode_utf32_endian (inbuf, inleft, outchar, G_LITTLE_ENDIAN);
-}
-
-static int
-encode_utf32be (gunichar c, char *outbuf, size_t outleft)
-{
-	unsigned char *outptr = (unsigned char *) outbuf;
-
-	if (outleft < 4) {
-		mono_set_errno (E2BIG);
-		return -1;
-	}
-
-	outptr[0] = (c >> 24) & 0xff;
-	outptr[1] = (c >> 16) & 0xff;
-	outptr[2] = (c >> 8) & 0xff;
-	outptr[3] = c & 0xff;
-
-	return 4;
-}
-
-static int
-encode_utf32le (gunichar c, char *outbuf, size_t outleft)
-{
-	unsigned char *outptr = (unsigned char *) outbuf;
-
-	if (outleft < 4) {
-		mono_set_errno (E2BIG);
-		return -1;
-	}
-
-	outptr[0] = c & 0xff;
-	outptr[1] = (c >> 8) & 0xff;
-	outptr[2] = (c >> 16) & 0xff;
-	outptr[3] = (c >> 24) & 0xff;
-
-	return 4;
-}
-
 static FORCE_INLINE (uint16_t)
 read_uint16_endian (unsigned char *inptr, unsigned endian)
 {
@@ -233,50 +129,6 @@ write_uint16_endian (unsigned char *outptr, uint16_t c, unsigned endian)
 	outptr[1] = c & 0xff;
 }
 
-static FORCE_INLINE (int)
-encode_utf16_endian (gunichar c, char *outbuf, size_t outleft, unsigned endian)
-{
-	unsigned char *outptr = (unsigned char *) outbuf;
-	gunichar2 ch;
-	gunichar c2;
-
-	if (c < 0x10000) {
-		if (outleft < 2) {
-			mono_set_errno (E2BIG);
-			return -1;
-		}
-
-		write_uint16_endian (outptr, GUNICHAR_TO_UINT16 (c), endian);
-		return 2;
-	} else {
-		if (outleft < 4) {
-			mono_set_errno (E2BIG);
-			return -1;
-		}
-
-		c2 = c - 0x10000;
-
-		ch = (gunichar2) ((c2 >> 10) + 0xd800);
-		write_uint16_endian (outptr, ch, endian);
-
-		ch = (gunichar2) ((c2 & 0x3ff) + 0xdc00);
-		write_uint16_endian (outptr + 2, ch, endian);
-		return 4;
-	}
-}
-
-static int
-encode_utf16be (gunichar c, char *outbuf, size_t outleft)
-{
-	return encode_utf16_endian (c, outbuf, outleft, G_BIG_ENDIAN);
-}
-
-static int
-encode_utf16le (gunichar c, char *outbuf, size_t outleft)
-{
-	return encode_utf16_endian (c, outbuf, outleft, G_LITTLE_ENDIAN);
-}
-
 static FORCE_INLINE (int)
 decode_utf8 (char *inbuf, size_t inleft, gunichar *outchar)
 {
@@ -336,89 +188,6 @@ decode_utf8 (char *inbuf, size_t inleft, gunichar *outchar)
 	return GSIZE_TO_INT(n);
 }
 
-static int
-encode_utf8 (gunichar c, char *outbuf, size_t outleft)
-{
-	unsigned char *outptr = (unsigned char *) outbuf;
-	int base;
-	size_t n;
-
-	if (c < 0x80) {
-		outptr[0] = GUNICHAR_TO_UINT8 (c);
-		return 1;
-	} else if (c < 0x800) {
-		base = 192;
-		n = 2;
-	} else if (c < 0x10000) {
-		base = 224;
-		n = 3;
-	} else if (c < 0x200000) {
-		base = 240;
-		n = 4;
-	} else if (c < 0x4000000) {
-		base = 248;
-		n = 5;
-	} else {
-		base = 252;
-		n = 6;
-	}
-
-	if (outleft < n) {
-		mono_set_errno (E2BIG);
-		return -1;
-	}
-
-#if UNROLL_ENCODE_UTF8
-	switch (n) {
-	case 6: outptr[5] = (c & 0x3f) | 0x80; c >>= 6;
-	case 5: outptr[4] = (c & 0x3f) | 0x80; c >>= 6;
-	case 4: outptr[3] = (c & 0x3f) | 0x80; c >>= 6;
-	case 3: outptr[2] = (c & 0x3f) | 0x80; c >>= 6;
-	case 2: outptr[1] = (c & 0x3f) | 0x80; c >>= 6;
-	case 1: outptr[0] = c | base;
-	}
-#else
-	for (size_t i = n - 1; i > 0; i--) {
-		outptr[i] = (c & 0x3f) | 0x80;
-		c >>= 6;
-	}
-
-	outptr[0] = GUNICHAR_TO_UINT8 (c | base);
-#endif
-
-	return GSIZE_TO_INT(n);
-}
-
-static int
-decode_latin1 (char *inbuf, size_t inleft, gunichar *outchar)
-{
-	*outchar = (unsigned char) *inbuf;
-	return 1;
-}
-
-static int
-encode_latin1 (gunichar c, char *outbuf, size_t outleft)
-{
-	if (outleft < 1) {
-		mono_set_errno (E2BIG);
-		return -1;
-	}
-
-	if (c > 0xff) {
-		mono_set_errno (EILSEQ);
-		return -1;
-	}
-
-	*outbuf = (char) c;
-
-	return 1;
-}
-
-
-/*
- * Simple conversion API
- */
-
 static gpointer error_quark = (gpointer)"ConvertError";
 
 gpointer
@@ -426,9 +195,6 @@ g_convert_error_quark (void)
 {
 	return error_quark;
 }
-/*
- * Unicode conversion
- */
 
 /**
  * An explanation of the conversion can be found at:
@@ -559,162 +325,81 @@ g_utf8_to_ucs4_fast (const gchar *str, glong len, glong *items_written)
 	return outbuf;
 }
 
-static gunichar2 *
-eg_utf8_to_utf16_general (const gchar *str, glong len, glong *items_read, glong *items_written, gboolean include_nuls, gboolean replace_invalid_codepoints, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err, unsigned endian)
+static FORCE_INLINE (void)
+map_error(GError **err)
 {
-	gunichar2 *outbuf, *outptr;
-	size_t outlen = 0;
-	size_t inleft;
-	char *inptr;
-	gunichar c;
-	int u, n;
-
-	g_return_val_if_fail (str != NULL, NULL);
-
-	if (len < 0) {
-		if (include_nuls) {
-			g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED, "Conversions with embedded nulls must pass the string length");
-			return NULL;
-		}
-
-		len = (glong)strlen (str);
-	}
-
-	inptr = (char *) str;
-	inleft = len;
-
-	while (inleft > 0) {
-		if ((n = decode_utf8 (inptr, inleft, &c)) < 0)
-			goto error;
-
-		if (c == 0 && !include_nuls)
-			break;
-
-		if ((u = g_unichar_to_utf16_endian (c, NULL, endian)) < 0) {
-			if (replace_invalid_codepoints) {
-				u = 2;
-			} else {
-				mono_set_errno (EILSEQ);
-				goto error;
-			}
-		}
-
-		outlen += u;
-		inleft -= n;
-		inptr += n;
-	}
-
-	if (items_read)
-		*items_read = GPTRDIFF_TO_LONG (inptr - str);
-
-	if (items_written)
-		*items_written = (glong)outlen;
-
-	if (G_LIKELY (!custom_alloc_func))
-		outptr = outbuf = g_malloc ((outlen + 1) * sizeof (gunichar2));
-	else
-		outptr = outbuf = (gunichar2 *)custom_alloc_func ((outlen + 1) * sizeof (gunichar2), custom_alloc_data);
-
-	if (G_UNLIKELY (custom_alloc_func && !outbuf)) {
-		mono_set_errno (ENOMEM);
-		goto error;
-	}
-
-	inptr = (char *) str;
-	inleft = len;
-
-	while (inleft > 0) {
-		if ((n = decode_utf8 (inptr, inleft, &c)) < 0)
-			break;
-
-		if (c == 0 && !include_nuls)
-			break;
-
-		u = g_unichar_to_utf16_endian (c, outptr, endian);
-		if ((u < 0) && replace_invalid_codepoints) {
-			outptr[0] = 0xFFFD;
-			outptr[1] = 0xFFFD;
-			u = 2;
-		}
-
-		outptr += u;
-		inleft -= n;
-		inptr += n;
-	}
-
-	*outptr = '\0';
-
-	return outbuf;
-
-error:
-	if (errno == ENOMEM) {
+	if (errno == 0) return;
+	if (errno == ERROR_INSUFFICIENT_BUFFER) {
 		g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_MEMORY,
 			     "Allocation failed.");
-	} else if (errno == EILSEQ) {
+	} else if (errno == ERROR_NO_UNICODE_TRANSLATION) {
 		g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
 			     "Illegal byte sequence encountered in the input.");
-	} else if (items_read) {
-		/* partial input is ok if we can let our caller know... */
 	} else {
 		g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
 			     "Partial byte sequence encountered in the input.");
 	}
-
-	if (items_read)
-		*items_read = GPTRDIFF_TO_LONG (inptr - str);
-
-	if (items_written)
-		*items_written = 0;
-
-	return NULL;
 }
 
-gunichar2 *
-g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
+static gunichar2 *
+g_utf8_to_utf16_impl (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err, int dwFlags, bool treatAsLE)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, NULL, NULL, err, G_BYTE_ORDER);
+	errno = 0;
+	gunichar2* lpDestStr = NULL;
+	int ret = minipal_utf8_to_utf16_allocate (str, len, &lpDestStr, dwFlags, treatAsLE);
+	if (items_written)
+		*items_written = errno == 0 ? ret : 0;
+	map_error(err);
+	return lpDestStr;
 }
 
-gunichar2 *
-g_utf8_to_utf16be (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
+static gunichar2 *
+g_utf8_to_utf16le_custom_alloc_impl (const gchar *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err, bool treatAsLE)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, NULL, NULL, err, G_BIG_ENDIAN);
-}
+	errno = 0;
+	int ret = minipal_utf8_to_utf16_preallocated (str, len, 0, 0, 0, /* treatAsLE */ treatAsLE);
+	map_error(err);
 
-gunichar2 *
-g_utf8_to_utf16le (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
-{
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, NULL, NULL, err, G_LITTLE_ENDIAN);
+	if (items_written)
+		*items_written = errno == 0 ? ret : 0;
+
+	if (ret <= 0)
+		return NULL;
+
+	gunichar2* lpDestStr = custom_alloc_func((ret + 1) * sizeof (gunichar2), custom_alloc_data);
+	ret = minipal_utf8_to_utf16_preallocated (str, len, &lpDestStr, ret, MB_ERR_INVALID_CHARS, /* treatAsLE */ treatAsLE);
+	map_error(err);
+	return lpDestStr;
 }
 
 gunichar2 *
-g_utf8_to_utf16_custom_alloc (const gchar *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
+g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, custom_alloc_func, custom_alloc_data, err, G_BYTE_ORDER);
+	return g_utf8_to_utf16_impl (str, len, items_read, items_written, err, MB_ERR_INVALID_CHARS, false);
 }
 
 gunichar2 *
-g_utf8_to_utf16be_custom_alloc (const gchar *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
+g_utf8_to_utf16le (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, custom_alloc_func, custom_alloc_data, err, G_BIG_ENDIAN);
+	return g_utf8_to_utf16_impl (str, len, items_read, items_written, err, MB_ERR_INVALID_CHARS, true);
 }
 
 gunichar2 *
-g_utf8_to_utf16le_custom_alloc (const gchar *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
+eg_wtf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, FALSE, custom_alloc_func, custom_alloc_data, err, G_LITTLE_ENDIAN);
+	return g_utf8_to_utf16_impl (str, len, items_read, items_written, err, 0, false);
 }
 
 gunichar2 *
-eg_utf8_to_utf16_with_nuls (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
+g_utf8_to_utf16_custom_alloc (const gchar *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, TRUE, FALSE, NULL, NULL, err, G_BYTE_ORDER);
+	return g_utf8_to_utf16le_custom_alloc_impl (str, len, items_read, items_written, custom_alloc_func, custom_alloc_data, err, false);
 }
 
 gunichar2 *
-eg_wtf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
+g_utf8_to_utf16le_custom_alloc (const gchar *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
 {
-	return eg_utf8_to_utf16_general (str, len, items_read, items_written, TRUE, TRUE, NULL, NULL, err, G_BYTE_ORDER);
+	return g_utf8_to_utf16le_custom_alloc_impl (str, len, items_read, items_written, custom_alloc_func, custom_alloc_data, err, true);
 }
 
 gunichar *
@@ -789,120 +474,49 @@ g_utf8_to_ucs4 (const gchar *str, glong len, glong *items_read, glong *items_wri
 	return outbuf;
 }
 
-static
-gchar *
-eg_utf16_to_utf8_general (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err, unsigned endian)
+static gchar *
+g_utf16_to_utf8_impl (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **err, bool treatAsLE)
 {
-	char *inptr, *outbuf, *outptr;
-	size_t outlen = 0;
-	size_t inleft;
-	gunichar c;
-	int n;
-
-	g_return_val_if_fail (str != NULL, NULL);
-
-	if (len < 0) {
-		len = 0;
-		while (str[len])
-			len++;
-	}
-
-	inptr = (char *) str;
-	inleft = len * 2;
-
-	while (inleft > 0) {
-		if ((n = decode_utf16_endian (inptr, inleft, &c, endian)) < 0) {
-			if (n == -2 && inleft > 2) {
-				/* This means that the first UTF-16 char was read, but second failed */
-				inleft -= 2;
-				inptr += 2;
-			}
-
-			if (errno == EILSEQ) {
-				g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
-					     "Illegal byte sequence encountered in the input.");
-			} else if (items_read) {
-				/* partial input is ok if we can let our caller know... */
-				break;
-			} else {
-				g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
-					     "Partial byte sequence encountered in the input.");
-			}
-
-			if (items_read)
-				*items_read = GPTRDIFF_TO_LONG ((inptr - (char *) str) / 2);
-
-			if (items_written)
-				*items_written = 0;
-
-			return NULL;
-		} else if (c == 0)
-			break;
-
-		outlen += g_unichar_to_utf8 (c, NULL);
-		inleft -= n;
-		inptr += n;
-	}
-
-	if (items_read)
-		*items_read = GPTRDIFF_TO_LONG ((inptr - (char *) str) / 2);
+	errno = 0;
+	gchar* lpDestStr = NULL;
+	int ret = minipal_utf16_to_utf8_allocate (str, len, &lpDestStr, treatAsLE);
 
 	if (items_written)
-		*items_written = (glong)outlen;
-
-	if (G_LIKELY (!custom_alloc_func))
-		outptr = outbuf = g_malloc (outlen + 1);
-	else
-		outptr = outbuf = (char *)custom_alloc_func (outlen + 1, custom_alloc_data);
-
-	if (G_UNLIKELY (custom_alloc_func && !outbuf)) {
-		g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_MEMORY, "Allocation failed.");
-		if (items_written)
-			*items_written = 0;
-		return NULL;
-	}
+		*items_written = errno == 0 ? ret : 0;
 
-	inptr = (char *) str;
-	inleft = len * 2;
-
-	while (inleft > 0) {
-		if ((n = decode_utf16_endian (inptr, inleft, &c, endian)) < 0)
-			break;
-		else if (c == 0)
-			break;
-
-		outptr += g_unichar_to_utf8 (c, outptr);
-		inleft -= n;
-		inptr += n;
-	}
-
-	*outptr = '\0';
-
-	return outbuf;
+	map_error(err);
+	return lpDestStr;
 }
 
 gchar *
 g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf16_to_utf8_general (str, len, items_read, items_written, NULL, NULL, err, G_BYTE_ORDER);
+	return g_utf16_to_utf8_impl (str, len, items_read, items_written, err, /* treatAsLE */ false);
 }
 
 gchar *
 g_utf16le_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return eg_utf16_to_utf8_general (str, len, items_read, items_written, NULL, NULL, err, G_LITTLE_ENDIAN);
-}
-
-gchar *
-g_utf16be_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **err)
-{
-	return eg_utf16_to_utf8_general (str, len, items_read, items_written, NULL, NULL, err, G_BIG_ENDIAN);
+	return g_utf16_to_utf8_impl (str, len, items_read, items_written, err, /* treatAsLE */ true);
 }
 
 gchar *
 g_utf16_to_utf8_custom_alloc (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
 {
-	return eg_utf16_to_utf8_general (str, len, items_read, items_written, custom_alloc_func, custom_alloc_data, err, G_BYTE_ORDER);
+	errno = 0;
+	int ret = minipal_utf16_to_utf8_preallocated (str, len, 0, 0);
+	map_error(err);
+
+	if (items_written)
+		*items_written = errno == 0 ? ret : 0;
+
+	if (ret <= 0)
+		return NULL;
+
+	gchar* lpDestStr = custom_alloc_func((ret + 1) * sizeof (gunichar2), custom_alloc_data);
+	ret = minipal_utf16_to_utf8_preallocated (str, len, &lpDestStr, ret);
+	map_error(err);
+	return lpDestStr;
 }
 
 gunichar *
diff --git a/src/mono/mono/eglib/glib.h b/src/mono/mono/eglib/glib.h
index e438c00298ec72..fcd8d2e37bdae1 100644
--- a/src/mono/mono/eglib/glib.h
+++ b/src/mono/mono/eglib/glib.h
@@ -882,14 +882,11 @@ gunichar  *g_utf8_to_ucs4_fast (const gchar *str, glong len, glong *items_writte
 gunichar  *g_utf8_to_ucs4 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err);
 G_EXTERN_C // Used by libtest, at least.
 gunichar2 *g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err);
-gunichar2 *g_utf8_to_utf16be (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err);
 gunichar2 *g_utf8_to_utf16le (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err);
-gunichar2 *eg_utf8_to_utf16_with_nuls (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err);
 gunichar2 *eg_wtf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err);
 G_EXTERN_C // Used by libtest, at least.
 gchar     *g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **err);
 gchar     *g_utf16le_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **err);
-gchar     *g_utf16be_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **err);
 gunichar  *g_utf16_to_ucs4 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **err);
 gchar     *g_ucs4_to_utf8  (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **err);
 gunichar2 *g_ucs4_to_utf16 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **err);
@@ -915,7 +912,6 @@ gpointer
 g_fixed_buffer_custom_allocator (gsize req_size, gpointer custom_alloc_data);
 
 gunichar2 *g_utf8_to_utf16_custom_alloc (const gchar *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err);
-gunichar2 *g_utf8_to_utf16be_custom_alloc (const gchar *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err);
 gunichar2 *g_utf8_to_utf16le_custom_alloc (const gchar *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err);
 gchar *g_utf16_to_utf8_custom_alloc (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err);
 
diff --git a/src/mono/mono/eglib/test/utf8.c b/src/mono/mono/eglib/test/utf8.c
index d36dbfaa54ed04..5602bbcbcb7208 100644
--- a/src/mono/mono/eglib/test/utf8.c
+++ b/src/mono/mono/eglib/test/utf8.c
@@ -155,7 +155,7 @@ compare_utf8_to_utf16_explicit (const gunichar2 *expected, const gchar *utf8, gl
 
 	gerror = NULL;
 	if (include_nuls)
-		ret = eg_utf8_to_utf16_with_nuls (utf8, size_spec, &in_read, &out_read, &gerror);
+		ret = g_utf8_to_utf16 (utf8, size_spec, &in_read, &out_read, &gerror);
 	else
 		ret = g_utf8_to_utf16 (utf8, size_spec, &in_read, &out_read, &gerror);
 
@@ -271,7 +271,7 @@ test_utf8_to_utf16_with_nuls (void)
 #endif
 
 	/* implicit length is forbidden */
-		if (eg_utf8_to_utf16_with_nuls (src1, -1, NULL, NULL, NULL) != NULL)
+		if (g_utf8_to_utf16 (src1, -1, NULL, NULL, NULL) != NULL)
 		return FAILED ("explicit nulls must fail with -1 length\n");
 
 	/* empty string */
@@ -699,7 +699,7 @@ utf8_byteslen (const gchar *src)
 static Test utf8_tests [] = {
 	{"g_utf16_to_utf8", test_utf16_to_utf8},
 	{"g_utf8_to_utf16", test_utf8_to_utf16},
-	{"g_utf8_to_utf16_with_nuls", test_utf8_to_utf16_with_nuls},
+	{"g_utf8_to_utf16_nuls", test_utf8_to_utf16_with_nuls},
 	{"g_utf8_seq", test_utf8_seq},
 	{"g_ucs4_to_utf16", test_ucs4_to_utf16 },
 	{"g_utf16_to_ucs4", test_utf16_to_ucs4 },
diff --git a/src/mono/mono/metadata/object.c b/src/mono/mono/metadata/object.c
index 8604114fe520f5..b267334250c6c2 100644
--- a/src/mono/mono/metadata/object.c
+++ b/src/mono/mono/metadata/object.c
@@ -6361,7 +6361,7 @@ mono_string_new_utf8_len (const char *text, guint length, MonoError *error)
 	gunichar2 *ut = NULL;
 	glong items_written;
 
-	ut = eg_utf8_to_utf16_with_nuls (text, length, NULL, &items_written, &eg_error);
+	ut = g_utf8_to_utf16 (text, length, NULL, &items_written, &eg_error);
 
 	if (eg_error) {
 		o = NULL_HANDLE_STRING;
diff --git a/src/coreclr/pal/src/locale/utf8.cpp b/src/native/minipal/utf8.cpp
similarity index 72%
rename from src/coreclr/pal/src/locale/utf8.cpp
rename to src/native/minipal/utf8.cpp
index f07c69ff7e15f3..ce967669c46d3d 100644
--- a/src/coreclr/pal/src/locale/utf8.cpp
+++ b/src/native/minipal/utf8.cpp
@@ -14,142 +14,169 @@ Revision History:
 
 --*/
 
-#include "pal/utf8.h"
-#include "pal/malloc.hpp"
+#include <minipal/utf8.h>
 
-using namespace CorUnix;
+#include <errno.h>
+#include <limits.h>
+#include <string.h>
+#include <new>
 
 #define FASTLOOP
 
+#ifdef TARGET_WINDOWS
+#define W(str) L ## str
+#else
+#define W(str) u##str
+#endif
+
 struct CharUnicodeInfo
 {
-    static const WCHAR HIGH_SURROGATE_START = 0xd800;
-    static const WCHAR HIGH_SURROGATE_END = 0xdbff;
-    static const WCHAR LOW_SURROGATE_START = 0xdc00;
-    static const WCHAR LOW_SURROGATE_END = 0xdfff;
+    static const char16_t HIGH_SURROGATE_START = 0xd800;
+    static const char16_t HIGH_SURROGATE_END = 0xdbff;
+    static const char16_t LOW_SURROGATE_START = 0xdc00;
+    static const char16_t LOW_SURROGATE_END = 0xdfff;
 };
 
 struct Char
 {
     // Test if the wide character is a high surrogate
-    static bool IsHighSurrogate(const WCHAR c)
+    static bool IsHighSurrogate(const char16_t c)
     {
         return (c & 0xFC00) == CharUnicodeInfo::HIGH_SURROGATE_START;
     }
 
     // Test if the wide character is a low surrogate
-    static bool IsLowSurrogate(const WCHAR c)
+    static bool IsLowSurrogate(const char16_t c)
     {
         return (c & 0xFC00) == CharUnicodeInfo::LOW_SURROGATE_START;
     }
 
     // Test if the wide character is a surrogate half
-    static bool IsSurrogate(const WCHAR c)
+    static bool IsSurrogate(const char16_t c)
     {
         return (c & 0xF800) == CharUnicodeInfo::HIGH_SURROGATE_START;
     }
 
     // Test if the wide character is a high surrogate
-    static bool IsHighSurrogate(const WCHAR* s, int index)
+    static bool IsHighSurrogate(const char16_t* s, int index)
     {
         return IsHighSurrogate(s[index]);
     }
 
     // Test if the wide character is a low surrogate
-    static bool IsLowSurrogate(const WCHAR* s, int index)
+    static bool IsLowSurrogate(const char16_t* s, int index)
     {
         return IsLowSurrogate(s[index]);
     }
 
     // Test if the wide character is a surrogate half
-    static bool IsSurrogate(const WCHAR* s, int index)
+    static bool IsSurrogate(const char16_t* s, int index)
     {
         return IsSurrogate(s[index]);
     }
 };
 
-class ArgumentException
+size_t wcslen(const char16_t* str)
 {
+    size_t nChar = 0;
+    while (*str++) nChar++;
+    return nChar;
+}
 
-public:
-    ArgumentException(LPCSTR message)
+int wcscpy_s(char16_t *_Dst, size_t _SizeInWords, const char16_t *_Src)
+{
+
+    char16_t* p = _Dst;
+    size_t available = _SizeInWords;
+
+    if (!_Src || !_Dst || _SizeInWords == 0) return EINVAL;
+
+    while ((*p++ = *_Src++) != 0 && --available > 0);
+
+    if (available == 0)
     {
+        _Dst = 0;
+        return ERANGE;
     }
 
-    ArgumentException(LPCSTR message, LPCSTR argName)
+#ifdef DEBUG
+    size_t offset = _SizeInWords - available + 1;
+    if (offset < _SizeInWords)
     {
+        memset((_Dst) + (offset), 0xFD, ((_SizeInWords) - (offset)) * sizeof(*(_Dst)));
     }
-};
+#endif
+
+    return 0;
+}
 
-class ArgumentNullException : public ArgumentException
+int wcscat_s(char16_t *_Dst, size_t _SizeInWords, const char16_t *_Src)
 {
-public:
-    ArgumentNullException(LPCSTR argName)
-        : ArgumentException("Argument is NULL", argName)
-    {
+    char16_t* p = _Dst;
+    size_t available = _SizeInWords;
 
-    }
-};
+    if (!_Src || !_Dst || _SizeInWords == 0) return EINVAL;
 
-class ArgumentOutOfRangeException : public ArgumentException
-{
-public:
-    ArgumentOutOfRangeException(LPCSTR argName, LPCSTR message)
-        : ArgumentException(message, argName)
+    while (available > 0 && *p != 0)
     {
-
+        p++;
+        available--;
     }
-};
 
-class InsufficientBufferException : public ArgumentException
-{
-public:
-    InsufficientBufferException(LPCSTR message, LPCSTR argName)
-        : ArgumentException(message, argName)
+    if (available == 0)
     {
+        _Dst = 0;
+        return EINVAL;
+    }
 
+    while ((*p++ = *_Src++) != 0 && --available > 0)
+    {
     }
-};
 
-class Contract
-{
-public:
-    static void Assert(bool cond, LPCSTR str)
+    if (available == 0)
     {
-        if (!cond)
-        {
-            throw ArgumentException(str);
-        }
+        _Dst = 0;
+        return ERANGE;
     }
 
-    static void EndContractBlock()
+#ifdef DEBUG
+    size_t offset = _SizeInWords - available + 1;
+    if (offset < _SizeInWords)
     {
+        memset((_Dst) + (offset), 0xFD, ((_SizeInWords) - (offset)) * sizeof(*(_Dst)));
     }
-};
+#endif
+    return 0;
+}
 
-class DecoderFallbackException : public ArgumentException
-{
-    BYTE *bytesUnknown;
-    int index;
+#define ContractAssert(cond)             \
+    if (!(cond))                         \
+    {                                    \
+        errno = ERROR_INVALID_PARAMETER; \
+        return 0;                        \
+    }
 
-public:
-    DecoderFallbackException(
-        LPCSTR message, BYTE bytesUnknown[], int index) : ArgumentException(message)
-    {
-        this->bytesUnknown = bytesUnknown;
-        this->index = index;
+#define ContractAssertVoid(cond)         \
+    if (!(cond))                         \
+    {                                    \
+        errno = ERROR_INVALID_PARAMETER; \
+        return;                          \
     }
 
-    BYTE *BytesUnknown()
-    {
-        return (bytesUnknown);
+#define ContractAssertFreeFallback(cond) \
+    if (!(cond))                         \
+    {                                    \
+        errno = ERROR_INVALID_PARAMETER; \
+        if (fallback) free(fallback);    \
+        return 0;                        \
     }
 
-    int GetIndex()
-    {
-        return index;
+#define RETURN_ON_ERROR               \
+    if (errno)                        \
+    {                                 \
+        if (fallback) free(fallback); \
+        return 0;                     \
     }
-};
 
 class DecoderFallbackBuffer;
 
@@ -171,7 +198,7 @@ class DecoderFallback
 class DecoderReplacementFallback : public DecoderFallback
 {
     // Our variables
-    WCHAR strDefault[2];
+    char16_t strDefault[2];
     int strDefaultLength;
 
 public:
@@ -180,16 +207,14 @@ class DecoderReplacementFallback : public DecoderFallback
     {
     }
 
-    DecoderReplacementFallback(const WCHAR* replacement)
+    DecoderReplacementFallback(const char16_t* replacement)
     {
         // Must not be null
-        if (replacement == nullptr)
-            throw ArgumentNullException("replacement");
-        Contract::EndContractBlock();
+        ContractAssertVoid(replacement != nullptr)
 
         // Make sure it doesn't have bad surrogate pairs
         bool bFoundHigh = false;
-        int replacementLength = PAL_wcslen((const WCHAR *)replacement);
+        int replacementLength = wcslen((const char16_t *)replacement);
         for (int i = 0; i < replacementLength; i++)
         {
             // Found a surrogate?
@@ -221,14 +246,13 @@ class DecoderReplacementFallback : public DecoderFallback
             else if (bFoundHigh)
                 break;
         }
-        if (bFoundHigh)
-            throw ArgumentException("String 'replacement' contains invalid Unicode code points.", "replacement");
+        ContractAssertVoid(!bFoundHigh)
 
         wcscpy_s(strDefault, ARRAY_SIZE(strDefault), replacement);
         strDefaultLength = replacementLength;
     }
 
-    WCHAR* GetDefaultString()
+    char16_t* GetDefaultString()
     {
         return strDefault;
     }
@@ -251,12 +275,10 @@ class DecoderFallbackBuffer
     // These wrap the internal methods so that we can check for people doing stuff that's incorrect
 
 public:
-    virtual ~DecoderFallbackBuffer() = default;
-
-    virtual bool Fallback(BYTE bytesUnknown[], int index, int size) = 0;
+    virtual bool Fallback(unsigned char bytesUnknown[], int index, int size) = 0;
 
     // Get next character
-    virtual WCHAR GetNextChar() = 0;
+    virtual char16_t GetNextChar() = 0;
 
     //Back up a character
     virtual bool MovePrevious() = 0;
@@ -267,14 +289,14 @@ class DecoderFallbackBuffer
     // Clear the buffer
     virtual void Reset()
     {
-        while (GetNextChar() != (WCHAR)0);
+        while (GetNextChar() != (char16_t)0);
     }
 
     // Internal items to help us figure out what we're doing as far as error messages, etc.
     // These help us with our performance and messages internally
 protected:
-    BYTE*           byteStart;
-    WCHAR*          charEnd;
+    unsigned char*           byteStart;
+    char16_t*          charEnd;
 
     // Internal reset
     void InternalReset()
@@ -285,7 +307,7 @@ class DecoderFallbackBuffer
 
     // Set the above values
     // This can't be part of the constructor because EncoderFallbacks would have to know how to implement these.
-    void InternalInitialize(BYTE* byteStart, WCHAR* charEnd)
+    void InternalInitialize(unsigned char* byteStart, char16_t* charEnd)
     {
         this->byteStart = byteStart;
         this->charEnd = charEnd;
@@ -299,17 +321,20 @@ class DecoderFallbackBuffer
     // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
     // array, and we might need the index, hence the byte*
     // Don't touch ref chars unless we succeed
-    virtual bool InternalFallback(BYTE bytes[], BYTE* pBytes, WCHAR** chars, int size)
+    virtual bool InternalFallback(unsigned char bytes[], unsigned char* pBytes, char16_t** chars, int size)
     {
 
-        Contract::Assert(byteStart != nullptr, "[DecoderFallback.InternalFallback]Used InternalFallback without calling InternalInitialize");
+        ContractAssert(byteStart != nullptr)
+
+        bool fallbackResult = this->Fallback(bytes, (int)(pBytes - byteStart - size), size);
+        if (errno) return false;
 
         // See if there's a fallback character and we have an output buffer then copy our string.
-        if (this->Fallback(bytes, (int)(pBytes - byteStart - size), size))
+        if (fallbackResult)
         {
             // Copy the chars to our output
-            WCHAR ch;
-            WCHAR* charTemp = *chars;
+            char16_t ch;
+            char16_t* charTemp = *chars;
             bool bHighSurrogate = false;
             while ((ch = GetNextChar()) != 0)
             {
@@ -319,15 +344,13 @@ class DecoderFallbackBuffer
                     if (Char::IsHighSurrogate(ch))
                     {
                         // High Surrogate
-                        if (bHighSurrogate)
-                            throw ArgumentException("String 'chars' contains invalid Unicode code points.");
+                        ContractAssert(!bHighSurrogate)
                         bHighSurrogate = true;
                     }
                     else
                     {
                         // Low surrogate
-                        if (!bHighSurrogate)
-                            throw ArgumentException("String 'chars' contains invalid Unicode code points.");
+                        ContractAssert(bHighSurrogate)
                         bHighSurrogate = false;
                     }
                 }
@@ -342,8 +365,7 @@ class DecoderFallbackBuffer
             }
 
             // Need to make sure that bHighSurrogate isn't true
-            if (bHighSurrogate)
-                throw ArgumentException("String 'chars' contains invalid Unicode code points.");
+            ContractAssert(!bHighSurrogate)
 
             // Now we aren't going to be false, so its OK to update chars
             *chars = charTemp;
@@ -353,19 +375,22 @@ class DecoderFallbackBuffer
     }
 
     // This version just counts the fallback and doesn't actually copy anything.
-    virtual int InternalFallback(BYTE bytes[], BYTE* pBytes, int size)
-        // Right now this has both bytes[] and BYTE* bytes, since we might have extra bytes, hence the
+    virtual int InternalFallback(unsigned char bytes[], unsigned char* pBytes, int size)
+        // Right now this has both bytes[] and unsigned char* bytes, since we might have extra bytes, hence the
         // array, and we might need the index, hence the byte*
     {
 
-        Contract::Assert(byteStart != nullptr, "[DecoderFallback.InternalFallback]Used InternalFallback without calling InternalInitialize");
+        ContractAssert(byteStart != nullptr)
+
+        bool fallbackResult = this->Fallback(bytes, (int)(pBytes - byteStart - size), size);
+        if (errno) return 0;
 
         // See if there's a fallback character and we have an output buffer then copy our string.
-        if (this->Fallback(bytes, (int)(pBytes - byteStart - size), size))
+        if (fallbackResult)
         {
             int count = 0;
 
-            WCHAR ch;
+            char16_t ch;
             bool bHighSurrogate = false;
             while ((ch = GetNextChar()) != 0)
             {
@@ -375,15 +400,13 @@ class DecoderFallbackBuffer
                     if (Char::IsHighSurrogate(ch))
                     {
                         // High Surrogate
-                        if (bHighSurrogate)
-                            throw ArgumentException("String 'chars' contains invalid Unicode code points.");
+                        ContractAssert(!bHighSurrogate)
                         bHighSurrogate = true;
                     }
                     else
                     {
                         // Low surrogate
-                        if (!bHighSurrogate)
-                            throw ArgumentException("String 'chars' contains invalid Unicode code points.");
+                        ContractAssert(bHighSurrogate)
                         bHighSurrogate = false;
                     }
                 }
@@ -392,8 +415,7 @@ class DecoderFallbackBuffer
             }
 
             // Need to make sure that bHighSurrogate isn't true
-            if (bHighSurrogate)
-                throw ArgumentException("String 'chars' contains invalid Unicode code points.");
+            ContractAssert(!bHighSurrogate)
 
             return count;
         }
@@ -401,18 +423,12 @@ class DecoderFallbackBuffer
         // If no fallback return 0
         return 0;
     }
-
-    // private helper methods
-    void ThrowLastBytesRecursive(BYTE bytesUnknown[])
-    {
-        throw ArgumentException("Recursive fallback not allowed");
-    }
 };
 
 class DecoderReplacementFallbackBuffer : public DecoderFallbackBuffer
 {
     // Store our default string
-    WCHAR strDefault[2];
+    char16_t strDefault[2];
     int strDefaultLength;
     int fallbackCount = -1;
     int fallbackIndex = -1;
@@ -422,18 +438,15 @@ class DecoderReplacementFallbackBuffer : public DecoderFallbackBuffer
     DecoderReplacementFallbackBuffer(DecoderReplacementFallback* fallback)
     {
         wcscpy_s(strDefault, ARRAY_SIZE(strDefault), fallback->GetDefaultString());
-        strDefaultLength = PAL_wcslen((const WCHAR *)fallback->GetDefaultString());
+        strDefaultLength = wcslen((const char16_t *)fallback->GetDefaultString());
     }
 
     // Fallback Methods
-    virtual bool Fallback(BYTE bytesUnknown[], int index, int size)
+    virtual bool Fallback(unsigned char bytesUnknown[], int index, int size)
     {
         // We expect no previous fallback in our buffer
         // We can't call recursively but others might (note, we don't test on last char!!!)
-        if (fallbackCount >= 1)
-        {
-            ThrowLastBytesRecursive(bytesUnknown);
-        }
+        ContractAssert(fallbackCount < 1)
 
         // Go ahead and get our fallback
         if (strDefaultLength == 0)
@@ -445,7 +458,7 @@ class DecoderReplacementFallbackBuffer : public DecoderFallbackBuffer
         return true;
     }
 
-    virtual WCHAR GetNextChar()
+    virtual char16_t GetNextChar()
     {
         // We want it to get < 0 because == 0 means that the current/last character is a fallback
         // and we need to detect recursion.  We could have a flag but we already have this counter.
@@ -465,8 +478,7 @@ class DecoderReplacementFallbackBuffer : public DecoderFallbackBuffer
         }
 
         // Now make sure its in the expected range
-        Contract::Assert(fallbackIndex < strDefaultLength && fallbackIndex >= 0,
-            "Index exceeds buffer range");
+        ContractAssert(fallbackIndex < strDefaultLength && fallbackIndex >= 0)
 
         return strDefault[fallbackIndex];
     }
@@ -501,7 +513,7 @@ class DecoderReplacementFallbackBuffer : public DecoderFallbackBuffer
     }
 
     // This version just counts the fallback and doesn't actually copy anything.
-    virtual int InternalFallback(BYTE bytes[], BYTE* pBytes, int size)
+    virtual int InternalFallback(unsigned char bytes[], unsigned char* pBytes, int size)
         // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
         // array, and we might need the index, hence the byte*
     {
@@ -517,13 +529,12 @@ class DecoderExceptionFallbackBuffer : public DecoderFallbackBuffer
     {
     }
 
-    virtual bool Fallback(BYTE bytesUnknown[], int index, int size)
+    virtual bool Fallback(unsigned char bytesUnknown[], int index, int size)
     {
-        throw DecoderFallbackException(
-            "Unable to translate UTF-8 character to Unicode", bytesUnknown, index);
+        ContractAssert(false)
     }
 
-    virtual WCHAR GetNextChar()
+    virtual char16_t GetNextChar()
     {
         return 0;
     }
@@ -552,7 +563,13 @@ class DecoderExceptionFallback : public DecoderFallback
 
     virtual DecoderFallbackBuffer* CreateFallbackBuffer()
     {
-        return InternalNew<DecoderExceptionFallbackBuffer>();
+        DecoderExceptionFallbackBuffer* pMem = (DecoderExceptionFallbackBuffer*)malloc(sizeof(DecoderExceptionFallbackBuffer));
+        if (pMem == nullptr)
+        {
+            errno = ERROR_INSUFFICIENT_BUFFER;
+            return nullptr;
+        }
+        return new (pMem) DecoderExceptionFallbackBuffer();
     }
 
     // Maximum number of characters that this instance of this fallback could return
@@ -564,70 +581,20 @@ class DecoderExceptionFallback : public DecoderFallback
 
 DecoderFallbackBuffer* DecoderReplacementFallback::CreateFallbackBuffer()
 {
-    return InternalNew<DecoderReplacementFallbackBuffer>(this);
-}
-
-class EncoderFallbackException : public ArgumentException
-{
-    WCHAR   charUnknown;
-    WCHAR   charUnknownHigh;
-    WCHAR   charUnknownLow;
-    int     index;
-
-public:
-    EncoderFallbackException(
-        LPCSTR message, WCHAR charUnknown, int index) : ArgumentException(message)
-    {
-        this->charUnknown = charUnknown;
-        this->index = index;
-    }
-
-    EncoderFallbackException(
-        LPCSTR message, WCHAR charUnknownHigh, WCHAR charUnknownLow, int index) : ArgumentException(message)
+    DecoderReplacementFallbackBuffer* pMem = (DecoderReplacementFallbackBuffer*)malloc(sizeof(DecoderReplacementFallbackBuffer));
+    if (pMem == nullptr)
     {
-        if (!Char::IsHighSurrogate(charUnknownHigh))
-        {
-            throw ArgumentOutOfRangeException("charUnknownHigh",
-                "Argument out of range 0xD800..0xDBFF");
-        }
-        if (!Char::IsLowSurrogate(charUnknownLow))
-        {
-            throw ArgumentOutOfRangeException("charUnknownLow",
-                "Argument out of range 0xDC00..0xDFFF");
-        }
-        Contract::EndContractBlock();
-
-        this->charUnknownHigh = charUnknownHigh;
-        this->charUnknownLow = charUnknownLow;
-        this->index = index;
+        errno = ERROR_INSUFFICIENT_BUFFER;
+        return nullptr;
     }
-
-    WCHAR GetCharUnknown()
+    pMem = new (pMem) DecoderReplacementFallbackBuffer(this);
+    if (errno)
     {
-        return (charUnknown);
+        free(pMem);
+        return nullptr;
     }
-
-    WCHAR GetCharUnknownHigh()
-    {
-        return (charUnknownHigh);
-    }
-
-    WCHAR GetCharUnknownLow()
-    {
-        return (charUnknownLow);
-    }
-
-    int GetIndex()
-    {
-        return index;
-    }
-
-    // Return true if the unknown character is a surrogate pair.
-    bool IsUnknownSurrogate()
-    {
-        return (charUnknownHigh != '\0');
-    }
-};
+    return pMem;
+}
 
 class EncoderFallbackBuffer;
 
@@ -648,7 +615,7 @@ class EncoderFallback
 class EncoderReplacementFallback : public EncoderFallback
 {
     // Our variables
-    WCHAR strDefault[2];
+    char16_t strDefault[2];
     int strDefaultLength;
 
 public:
@@ -657,16 +624,14 @@ class EncoderReplacementFallback : public EncoderFallback
     {
     }
 
-    EncoderReplacementFallback(const WCHAR* replacement)
+    EncoderReplacementFallback(const char16_t* replacement)
     {
         // Must not be null
-        if (replacement == nullptr)
-            throw ArgumentNullException("replacement");
-        Contract::EndContractBlock();
+        ContractAssertVoid(replacement != nullptr)
 
         // Make sure it doesn't have bad surrogate pairs
         bool bFoundHigh = false;
-        int replacementLength = PAL_wcslen((const WCHAR *)replacement);
+        int replacementLength = wcslen((const char16_t *)replacement);
         for (int i = 0; i < replacementLength; i++)
         {
             // Found a surrogate?
@@ -698,14 +663,13 @@ class EncoderReplacementFallback : public EncoderFallback
             else if (bFoundHigh)
                 break;
         }
-        if (bFoundHigh)
-            throw ArgumentException("String 'replacement' contains invalid Unicode code points.", "replacement");
+        ContractAssertVoid(!bFoundHigh)
 
         wcscpy_s(strDefault, ARRAY_SIZE(strDefault), replacement);
         strDefaultLength = replacementLength;
     }
 
-    WCHAR* GetDefaultString()
+    char16_t* GetDefaultString()
     {
         return strDefault;
     }
@@ -728,14 +692,12 @@ class EncoderFallbackBuffer
     // These wrap the internal methods so that we can check for people doing stuff that is incorrect
 
 public:
-    virtual ~EncoderFallbackBuffer() = default;
+    virtual bool Fallback(char16_t charUnknown, int index) = 0;
 
-    virtual bool Fallback(WCHAR charUnknown, int index) = 0;
-
-    virtual bool Fallback(WCHAR charUnknownHigh, WCHAR charUnknownLow, int index) = 0;
+    virtual bool Fallback(char16_t charUnknownHigh, char16_t charUnknownLow, int index) = 0;
 
     // Get next character
-    virtual WCHAR GetNextChar() = 0;
+    virtual char16_t GetNextChar() = 0;
 
     // Back up a character
     virtual bool MovePrevious() = 0;
@@ -747,14 +709,14 @@ class EncoderFallbackBuffer
     // Clear the buffer
     virtual void Reset()
     {
-        while (GetNextChar() != (WCHAR)0);
+        while (GetNextChar() != (char16_t)0);
     }
 
     // Internal items to help us figure out what we're doing as far as error messages, etc.
     // These help us with our performance and messages internally
 protected:
-    WCHAR*          charStart;
-    WCHAR*          charEnd;
+    char16_t*          charStart;
+    char16_t*          charEnd;
     bool            setEncoder;
     bool            bUsedEncoder;
     bool            bFallingBack = false;
@@ -773,7 +735,7 @@ class EncoderFallbackBuffer
 
     // Set the above values
     // This can't be part of the constructor because EncoderFallbacks would have to know how to implement these.
-    void InternalInitialize(WCHAR* charStart, WCHAR* charEnd, bool setEncoder)
+    void InternalInitialize(char16_t* charStart, char16_t* charEnd, bool setEncoder)
     {
         this->charStart = charStart;
         this->charEnd = charEnd;
@@ -783,9 +745,9 @@ class EncoderFallbackBuffer
         this->iRecursionCount = 0;
     }
 
-    WCHAR InternalGetNextChar()
+    char16_t InternalGetNextChar()
     {
-        WCHAR ch = GetNextChar();
+        char16_t ch = GetNextChar();
         bFallingBack = (ch != 0);
         if (ch == 0) iRecursionCount = 0;
         return ch;
@@ -799,11 +761,10 @@ class EncoderFallbackBuffer
     // Note that this could also change the contents of this->encoder, which is the same
     // object that the caller is using, so the caller could mess up the encoder for us
     // if they aren't careful.
-    virtual bool InternalFallback(WCHAR ch, WCHAR** chars)
+    virtual bool InternalFallback(char16_t ch, char16_t** chars)
     {
         // Shouldn't have null charStart
-        Contract::Assert(charStart != nullptr,
-            "[EncoderFallback.InternalFallbackBuffer]Fallback buffer is not initialized");
+        ContractAssert(charStart != nullptr)
 
         // Get our index, remember chars was preincremented to point at next char, so have to -1
         int index = (int)(*chars - charStart) - 1;
@@ -820,12 +781,11 @@ class EncoderFallbackBuffer
             else
             {
                 // Might have a low surrogate
-                WCHAR cNext = **chars;
+                char16_t cNext = **chars;
                 if (Char::IsLowSurrogate(cNext))
                 {
                     // If already falling back then fail
-                    if (bFallingBack && iRecursionCount++ > iMaxRecursion)
-                        ThrowLastCharRecursive(ch, cNext);
+                    ContractAssert(!bFallingBack || iRecursionCount++ <= iMaxRecursion)
 
                     // Next is a surrogate, add it as surrogate pair, and increment chars
                     (*chars)++;
@@ -838,33 +798,19 @@ class EncoderFallbackBuffer
         }
 
         // If already falling back then fail
-        if (bFallingBack && iRecursionCount++ > iMaxRecursion)
-            ThrowLastCharRecursive((int)ch);
+        ContractAssert(!bFallingBack || iRecursionCount++ <= iMaxRecursion)
 
         // Fall back our char
         bFallingBack = Fallback(ch, index);
 
         return bFallingBack;
     }
-
-    // private helper methods
-    void ThrowLastCharRecursive(WCHAR highSurrogate, WCHAR lowSurrogate)
-    {
-        // Throw it, using our complete character
-        throw ArgumentException("Recursive fallback not allowed", "chars");
-    }
-
-    void ThrowLastCharRecursive(int utf32Char)
-    {
-        throw ArgumentException("Recursive fallback not allowed", "chars");
-    }
-
 };
 
 class EncoderReplacementFallbackBuffer : public EncoderFallbackBuffer
 {
     // Store our default string
-    WCHAR strDefault[4];
+    char16_t strDefault[4];
     int strDefaultLength;
     int fallbackCount = -1;
     int fallbackIndex = -1;
@@ -875,25 +821,16 @@ class EncoderReplacementFallbackBuffer : public EncoderFallbackBuffer
         // 2X in case we're a surrogate pair
         wcscpy_s(strDefault, ARRAY_SIZE(strDefault), fallback->GetDefaultString());
         wcscat_s(strDefault, ARRAY_SIZE(strDefault), fallback->GetDefaultString());
-        strDefaultLength = 2 * PAL_wcslen((const WCHAR *)fallback->GetDefaultString());
+        strDefaultLength = 2 * wcslen((const char16_t *)fallback->GetDefaultString());
 
     }
 
     // Fallback Methods
-    virtual bool Fallback(WCHAR charUnknown, int index)
+    virtual bool Fallback(char16_t charUnknown, int index)
     {
         // If we had a buffer already we're being recursive, throw, it's probably at the suspect
         // character in our array.
-        if (fallbackCount >= 1)
-        {
-            // If we're recursive we may still have something in our buffer that makes this a surrogate
-            if (Char::IsHighSurrogate(charUnknown) && fallbackCount >= 0 &&
-                Char::IsLowSurrogate(strDefault[fallbackIndex + 1]))
-                ThrowLastCharRecursive(charUnknown, strDefault[fallbackIndex + 1]);
-
-            // Nope, just one character
-            ThrowLastCharRecursive((int)charUnknown);
-        }
+        ContractAssert(fallbackCount < 1)
 
         // Go ahead and get our fallback
         // Divide by 2 because we aren't a surrogate pair
@@ -903,22 +840,15 @@ class EncoderReplacementFallbackBuffer : public EncoderFallbackBuffer
         return fallbackCount != 0;
     }
 
-    virtual bool Fallback(WCHAR charUnknownHigh, WCHAR charUnknownLow, int index)
+    virtual bool Fallback(char16_t charUnknownHigh, char16_t charUnknownLow, int index)
     {
         // Double check input surrogate pair
-        if (!Char::IsHighSurrogate(charUnknownHigh))
-            throw ArgumentOutOfRangeException("charUnknownHigh",
-            "Argument out of range 0xD800..0xDBFF");
-
-        if (!Char::IsLowSurrogate(charUnknownLow))
-            throw ArgumentOutOfRangeException("charUnknownLow",
-            "Argument out of range 0xDC00..0xDFFF");
-        Contract::EndContractBlock();
+        ContractAssert(Char::IsHighSurrogate(charUnknownHigh))
+        ContractAssert(Char::IsLowSurrogate(charUnknownLow))
 
         // If we had a buffer already we're being recursive, throw, it's probably at the suspect
         // character in our array.
-        if (fallbackCount >= 1)
-            ThrowLastCharRecursive(charUnknownHigh, charUnknownLow);
+        ContractAssert(fallbackCount < 1)
 
         // Go ahead and get our fallback
         fallbackCount = strDefaultLength;
@@ -927,7 +857,7 @@ class EncoderReplacementFallbackBuffer : public EncoderFallbackBuffer
         return fallbackCount != 0;
     }
 
-    virtual WCHAR GetNextChar()
+    virtual char16_t GetNextChar()
     {
         // We want it to get < 0 because == 0 means that the current/last character is a fallback
         // and we need to detect recursion.  We could have a flag but we already have this counter.
@@ -947,8 +877,7 @@ class EncoderReplacementFallbackBuffer : public EncoderFallbackBuffer
         }
 
         // Now make sure its in the expected range
-        Contract::Assert(fallbackIndex < strDefaultLength && fallbackIndex >= 0,
-            "Index exceeds buffer range");
+        ContractAssert(fallbackIndex < strDefaultLength && fallbackIndex >= 0)
 
         return strDefault[fallbackIndex];
     }
@@ -991,34 +920,24 @@ class EncoderExceptionFallbackBuffer : public EncoderFallbackBuffer
     {
     }
 
-    virtual bool Fallback(WCHAR charUnknown, int index)
+    virtual bool Fallback(char16_t charUnknown, int index)
     {
         // Fall back our char
-        throw EncoderFallbackException("Unable to translate Unicode character to UTF-8", charUnknown, index);
+        ContractAssert(false)
     }
 
-    virtual bool Fallback(WCHAR charUnknownHigh, WCHAR charUnknownLow, int index)
+    virtual bool Fallback(char16_t charUnknownHigh, char16_t charUnknownLow, int index)
     {
-        if (!Char::IsHighSurrogate(charUnknownHigh))
-        {
-            throw ArgumentOutOfRangeException("charUnknownHigh",
-                "Argument out of range 0xD800..0xDBFF");
-        }
-        if (!Char::IsLowSurrogate(charUnknownLow))
-        {
-            throw ArgumentOutOfRangeException("charUnknownLow",
-                "Argument out of range 0xDC00..0xDFFF");
-        }
-        Contract::EndContractBlock();
+        ContractAssert(Char::IsHighSurrogate(charUnknownHigh))
+        ContractAssert(Char::IsLowSurrogate(charUnknownLow))
 
         //int iTemp = Char::ConvertToUtf32(charUnknownHigh, charUnknownLow);
 
         // Fall back our char
-        throw EncoderFallbackException(
-            "Unable to translate Unicode character to UTF-8", charUnknownHigh, charUnknownLow, index);
+        ContractAssert(false)
     }
 
-    virtual WCHAR GetNextChar()
+    virtual char16_t GetNextChar()
     {
         return 0;
     }
@@ -1046,7 +965,10 @@ class EncoderExceptionFallback : public EncoderFallback
 
     virtual EncoderFallbackBuffer* CreateFallbackBuffer()
     {
-        return InternalNew<EncoderExceptionFallbackBuffer>();
+        EncoderExceptionFallbackBuffer* pMem = (EncoderExceptionFallbackBuffer*)malloc(sizeof(EncoderExceptionFallbackBuffer));
+        if (pMem == nullptr)
+            return nullptr;
+        return new (pMem) EncoderExceptionFallbackBuffer();
     }
 
     // Maximum number of characters that this instance of this fallback could return
@@ -1058,7 +980,13 @@ class EncoderExceptionFallback : public EncoderFallback
 
 EncoderFallbackBuffer* EncoderReplacementFallback::CreateFallbackBuffer()
 {
-    return InternalNew<EncoderReplacementFallbackBuffer>(this);
+    EncoderReplacementFallbackBuffer* pMem = (EncoderReplacementFallbackBuffer*)malloc(sizeof(EncoderReplacementFallbackBuffer));
+    if (pMem == nullptr)
+    {
+        errno = ERROR_INSUFFICIENT_BUFFER;
+        return nullptr;
+    }
+    return new (pMem) EncoderReplacementFallbackBuffer(this);
 }
 
 class UTF8Encoding
@@ -1075,65 +1003,39 @@ class UTF8Encoding
     DecoderReplacementFallback decoderReplacementFallback;
     DecoderExceptionFallback decoderExceptionFallback;
 
+#if BIGENDIAN
+    bool treatAsLE;
+#endif
+
     bool InRange(int c, int begin, int end)
     {
         return begin <= c && c <= end;
     }
 
-    size_t PtrDiff(WCHAR* ptr1, WCHAR* ptr2)
+    size_t PtrDiff(char16_t* ptr1, char16_t* ptr2)
     {
         return ptr1 - ptr2;
     }
 
-    size_t PtrDiff(BYTE* ptr1, BYTE* ptr2)
+    size_t PtrDiff(unsigned char* ptr1, unsigned char* ptr2)
     {
         return ptr1 - ptr2;
     }
 
-    void ThrowBytesOverflow()
-    {
-        // Special message to include fallback type in case fallback's GetMaxCharCount is broken
-        // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
-        throw InsufficientBufferException("The output byte buffer is too small to contain the encoded data", "bytes");
-    }
-
-    void ThrowBytesOverflow(bool nothingEncoded)
-    {
-        // Special message to include fallback type in case fallback's GetMaxCharCount is broken
-        // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
-        if (nothingEncoded){
-            ThrowBytesOverflow();
-        }
-    }
-
-    void ThrowCharsOverflow()
-    {
-        // Special message to include fallback type in case fallback's GetMaxCharCount is broken
-        // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
-        throw InsufficientBufferException("The output char buffer is too small to contain the encoded data", "chars");
-    }
-
-    void ThrowCharsOverflow(bool nothingEncoded)
-    {
-        // Special message to include fallback type in case fallback's GetMaxCharCount is broken
-        // This happens if user has implemented an decoder fallback with a broken GetMaxCharCount
-        if (nothingEncoded){
-            ThrowCharsOverflow();
-        }
-    }
-
     // During GetChars we had an invalid byte sequence
     // pSrc is backed up to the start of the bad sequence if we didn't have room to
     // fall it back.  Otherwise pSrc remains where it is.
-    bool FallbackInvalidByteSequence(BYTE** pSrc, int ch, DecoderFallbackBuffer* fallback, WCHAR** pTarget)
+    bool FallbackInvalidByteSequence(unsigned char** pSrc, int ch, DecoderFallbackBuffer* fallback, char16_t** pTarget)
     {
         // Get our byte[]
-        BYTE* pStart = *pSrc;
-        BYTE bytesUnknown[3];
+        unsigned char* pStart = *pSrc;
+        unsigned char bytesUnknown[3];
         int size = GetBytesUnknown(pStart, ch, bytesUnknown);
+        bool fallbackResult = fallback->InternalFallback(bytesUnknown, *pSrc, pTarget, size);
+        RETURN_ON_ERROR
 
         // Do the actual fallback
-        if (!fallback->InternalFallback(bytesUnknown, *pSrc, pTarget, size))
+        if (!fallbackResult)
         {
             // Oops, it failed, back up to pStart
             *pSrc = pStart;
@@ -1144,10 +1046,10 @@ class UTF8Encoding
         return true;
     }
 
-    int FallbackInvalidByteSequence(BYTE* pSrc, int ch, DecoderFallbackBuffer *fallback)
+    int FallbackInvalidByteSequence(unsigned char* pSrc, int ch, DecoderFallbackBuffer *fallback)
     {
         // Get our byte[]
-        BYTE bytesUnknown[3];
+        unsigned char bytesUnknown[3];
         int size = GetBytesUnknown(pSrc, ch, bytesUnknown);
 
         // Do the actual fallback
@@ -1159,7 +1061,7 @@ class UTF8Encoding
         return count;
     }
 
-    int GetBytesUnknown(BYTE* pSrc, int ch, BYTE* bytesUnknown)
+    int GetBytesUnknown(unsigned char* pSrc, int ch, unsigned char* bytesUnknown)
     {
         int size;
 
@@ -1168,14 +1070,14 @@ class UTF8Encoding
         if (ch < 0x100 && ch >= 0)
         {
             pSrc--;
-            bytesUnknown[0] = (BYTE)ch;
+            bytesUnknown[0] = (unsigned char)ch;
             size =  1;
         }
         // See if its an unfinished 2 byte sequence
         else if ((ch & (SupplimentarySeq | ThreeByteSeq)) == 0)
         {
             pSrc--;
-            bytesUnknown[0] = (BYTE)((ch & 0x1F) | 0xc0);
+            bytesUnknown[0] = (unsigned char)((ch & 0x1F) | 0xc0);
             size = 1;
         }
         // So now we're either 2nd byte of 3 or 4 byte sequence or
@@ -1188,24 +1090,24 @@ class UTF8Encoding
             {
                 // 3rd byte of 4 byte sequence
                 pSrc -= 3;
-                bytesUnknown[0] = (BYTE)(((ch >> 12) & 0x07) | 0xF0);
-                bytesUnknown[1] = (BYTE)(((ch >> 6) & 0x3F) | 0x80);
-                bytesUnknown[2] = (BYTE)(((ch)& 0x3F) | 0x80);
+                bytesUnknown[0] = (unsigned char)(((ch >> 12) & 0x07) | 0xF0);
+                bytesUnknown[1] = (unsigned char)(((ch >> 6) & 0x3F) | 0x80);
+                bytesUnknown[2] = (unsigned char)(((ch)& 0x3F) | 0x80);
                 size = 3;
             }
             else if ((ch & (FinalByte >> 12)) != 0)
             {
                 // 2nd byte of a 4 byte sequence
                 pSrc -= 2;
-                bytesUnknown[0] = (BYTE)(((ch >> 6) & 0x07) | 0xF0);
-                bytesUnknown[1] = (BYTE)(((ch)& 0x3F) | 0x80);
+                bytesUnknown[0] = (unsigned char)(((ch >> 6) & 0x07) | 0xF0);
+                bytesUnknown[1] = (unsigned char)(((ch)& 0x3F) | 0x80);
                 size = 2;
             }
             else
             {
                 // 4th byte of a 4 byte sequence
                 pSrc--;
-                bytesUnknown[0] = (BYTE)(((ch)& 0x07) | 0xF0);
+                bytesUnknown[0] = (unsigned char)(((ch)& 0x07) | 0xF0);
                 size = 1;
             }
         }
@@ -1216,15 +1118,15 @@ class UTF8Encoding
             {
                 // So its 2nd byte of a 3 byte sequence
                 pSrc -= 2;
-                bytesUnknown[0] = (BYTE)(((ch >> 6) & 0x0F) | 0xE0);
-                bytesUnknown[1] = (BYTE)(((ch)& 0x3F) | 0x80);
+                bytesUnknown[0] = (unsigned char)(((ch >> 6) & 0x0F) | 0xE0);
+                bytesUnknown[1] = (unsigned char)(((ch)& 0x3F) | 0x80);
                 size = 2;
             }
             else
             {
                 // 1st byte of a 3 byte sequence
                 pSrc--;
-                bytesUnknown[0] = (BYTE)(((ch)& 0x0F) | 0xE0);
+                bytesUnknown[0] = (unsigned char)(((ch)& 0x0F) | 0xE0);
                 size = 1;
             }
         }
@@ -1234,8 +1136,11 @@ class UTF8Encoding
 
 public:
 
-    UTF8Encoding(bool isThrowException)
+    UTF8Encoding(bool isThrowException, bool treatAsLE)
         : encoderReplacementFallback(W("\xFFFD")), decoderReplacementFallback(W("\xFFFD"))
+#if BIGENDIAN
+        , treatAsLE(treatAsLE)
+#endif
     {
         if (isThrowException)
         {
@@ -1258,14 +1163,14 @@ class UTF8Encoding
     const int SupplimentarySeq = 1 << 28;
     const int ThreeByteSeq = 1 << 27;
 
-    int GetCharCount(BYTE* bytes, int count)
+    int GetCharCount(unsigned char* bytes, int count)
     {
-        Contract::Assert(bytes != nullptr, "[UTF8Encoding.GetCharCount]bytes!=nullptr");
-        Contract::Assert(count >= 0, "[UTF8Encoding.GetCharCount]count >=0");
+        ContractAssert(bytes != nullptr)
+        ContractAssert(count >= 0)
 
         // Initialize stuff
-        BYTE *pSrc = bytes;
-        BYTE *pEnd = pSrc + count;
+        unsigned char *pSrc = bytes;
+        unsigned char *pEnd = pSrc + count;
 
         // Start by assuming we have as many as count, charCount always includes the adjustment
         // for the character being decoded
@@ -1304,8 +1209,7 @@ class UTF8Encoding
             ch = (ch << 6) | (cha & 0x3F);
 
             if ((ch & FinalByte) == 0) {
-                Contract::Assert((ch & (SupplimentarySeq | ThreeByteSeq)) != 0,
-                    "[UTF8Encoding.GetChars]Invariant volation");
+                ContractAssertFreeFallback((ch & (SupplimentarySeq | ThreeByteSeq)) != 0)
 
                 if ((ch & SupplimentarySeq) != 0) {
                     if ((ch & (FinalByte >> 6)) != 0) {
@@ -1345,6 +1249,7 @@ class UTF8Encoding
             if (fallback == nullptr)
             {
                 fallback = decoderFallback->CreateFallbackBuffer();
+                RETURN_ON_ERROR
                 fallback->InternalInitialize(bytes, nullptr);
             }
             charCount += FallbackInvalidByteSequence(pSrc, ch, fallback);
@@ -1429,7 +1334,7 @@ class UTF8Encoding
             // don't fall into the fast decoding loop if we don't have enough bytes
             if (availableBytes <= 13) {
                 // try to get over the remainder of the ascii characters fast though
-            BYTE* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
+            unsigned char* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
                 while (pSrc < pLocalEnd) {
                     ch = *pSrc;
                     pSrc++;
@@ -1445,7 +1350,7 @@ class UTF8Encoding
             // To compute the upper bound, assume that all characters are ASCII characters at this point,
             //  the boundary will be decreased for every non-ASCII character we encounter
             // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
-            BYTE *pStop = pSrc + availableBytes - 7;
+            unsigned char *pStop = pSrc + availableBytes - 7;
 
             while (pSrc < pStop) {
                 ch = *pSrc;
@@ -1466,7 +1371,7 @@ class UTF8Encoding
 
                 // get pSrc 4-byte aligned
                 if (((size_t)pSrc & 0x2) != 0) {
-                    ch = *(USHORT*)pSrc;
+                    ch = *(unsigned short*)pSrc;
                     if ((ch & 0x8080) != 0) {
                         goto LongCodeWithMask16;
                     }
@@ -1496,21 +1401,27 @@ class UTF8Encoding
                 }
                 break;
 
-#if BIGENDIAN
             LongCodeWithMask32 :
-                // be careful about the sign extension
-                ch = (int)(((uint)ch) >> 16);
-            LongCodeWithMask16:
-                ch = (int)(((uint)ch) >> 8);
-#else // BIGENDIAN
-            LongCodeWithMask32:
+#if BIGENDIAN
+            // be careful about the sign extension
+            if (!treatAsLE) ch = (int)(((unsigned int)ch) >> 16);
+            else
+#else
+                ch &= 0xFF;
+#endif
+
             LongCodeWithMask16:
+#if BIGENDIAN
+            if (!treatAsLE) ch = (int)(((unsigned int)ch) >> 8);
+            else
+#else
                 ch &= 0xFF;
-#endif // BIGENDIAN
-                pSrc++;
-                if (ch <= 0x7F) {
-                    continue;
-                }
+#endif
+
+            pSrc++;
+            if (ch <= 0x7F) {
+                continue;
+            }
 
             LongCode:
                 int chc = *pSrc;
@@ -1610,6 +1521,7 @@ class UTF8Encoding
             if (fallback == nullptr)
             {
                 fallback = decoderFallback->CreateFallbackBuffer();
+                RETURN_ON_ERROR
                 fallback->InternalInitialize(bytes, nullptr);
             }
             charCount += FallbackInvalidByteSequence(pSrc, ch, fallback);
@@ -1617,27 +1529,26 @@ class UTF8Encoding
 
         // Shouldn't have anything in fallback buffer for GetCharCount
         // (don't have to check m_throwOnOverflow for count)
-        Contract::Assert(fallback == nullptr || fallback->GetRemaining() == 0,
-            "[UTF8Encoding.GetCharCount]Expected empty fallback buffer at end");
+        ContractAssertFreeFallback(fallback == nullptr || fallback->GetRemaining() == 0)
 
-        InternalDelete(fallback);
+        free(fallback);
 
         return charCount;
 
     }
 
-    int GetChars(BYTE* bytes, int byteCount, WCHAR* chars, int charCount)
+    int GetChars(unsigned char* bytes, int byteCount, char16_t* chars, int charCount)
     {
-        Contract::Assert(chars != nullptr, "[UTF8Encoding.GetChars]chars!=nullptr");
-        Contract::Assert(byteCount >= 0, "[UTF8Encoding.GetChars]byteCount >=0");
-        Contract::Assert(charCount >= 0, "[UTF8Encoding.GetChars]charCount >=0");
-        Contract::Assert(bytes != nullptr, "[UTF8Encoding.GetChars]bytes!=nullptr");
+        ContractAssert(chars != nullptr)
+        ContractAssert(byteCount >= 0)
+        ContractAssert(charCount >= 0)
+        ContractAssert(bytes != nullptr)
 
-        BYTE *pSrc = bytes;
-        WCHAR *pTarget = chars;
+        unsigned char *pSrc = bytes;
+        char16_t *pTarget = chars;
 
-        BYTE *pEnd = pSrc + byteCount;
-        WCHAR *pAllocatedBufferEnd = pTarget + charCount;
+        unsigned char *pEnd = pSrc + byteCount;
+        char16_t *pAllocatedBufferEnd = pTarget + charCount;
 
         int ch = 0;
 
@@ -1675,8 +1586,7 @@ class UTF8Encoding
 
             if ((ch & FinalByte) == 0) {
                 // Not at last byte yet
-                Contract::Assert((ch & (SupplimentarySeq | ThreeByteSeq)) != 0,
-                    "[UTF8Encoding.GetChars]Invariant volation");
+                ContractAssertFreeFallback((ch & (SupplimentarySeq | ThreeByteSeq)) != 0)
 
                 if ((ch & SupplimentarySeq) != 0) {
                     // Its a 4-byte supplimentary sequence
@@ -1711,8 +1621,8 @@ class UTF8Encoding
             if ((ch & (SupplimentarySeq | 0x1F0000)) > SupplimentarySeq) {
                 // let the range check for the second char throw the exception
                 if (pTarget < pAllocatedBufferEnd) {
-                    *pTarget = (WCHAR)(((ch >> 10) & 0x7FF) +
-                        (SHORT)((CharUnicodeInfo::HIGH_SURROGATE_START - (0x10000 >> 10))));
+                    *pTarget = (char16_t)(((ch >> 10) & 0x7FF) +
+                        (short)((CharUnicodeInfo::HIGH_SURROGATE_START - (0x10000 >> 10))));
                     pTarget++;
 
                     ch = (ch & 0x3FF) +
@@ -1728,6 +1638,7 @@ class UTF8Encoding
             if (fallback == nullptr)
             {
                 fallback = decoderFallback->CreateFallbackBuffer();
+                RETURN_ON_ERROR
                 fallback->InternalInitialize(bytes, pAllocatedBufferEnd);
             }
 
@@ -1736,15 +1647,18 @@ class UTF8Encoding
             {
                 // Ran out of buffer space
                 // Need to throw an exception?
-                Contract::Assert(pSrc >= bytes || pTarget == chars,
-                    "[UTF8Encoding.GetChars]Expected to throw or remain in byte buffer after fallback");
+                ContractAssertFreeFallback(pSrc >= bytes || pTarget == chars)
                 fallback->InternalReset();
-                ThrowCharsOverflow(pTarget == chars);
+                if (pTarget == chars)
+                {
+                    errno = ERROR_INSUFFICIENT_BUFFER;
+                    if (fallback) free(fallback);
+                    return 0;
+                }
                 ch = 0;
                 break;
             }
-            Contract::Assert(pSrc >= bytes,
-                "[UTF8Encoding.GetChars]Expected invalid byte sequence to have remained within the byte array");
+            ContractAssert(pSrc >= bytes)
             ch = 0;
             continue;
 
@@ -1829,9 +1743,13 @@ class UTF8Encoding
 
                 // Throw that we don't have enough room (pSrc could be < chars if we had started to process
                 // a 4 byte sequence already)
-                Contract::Assert(pSrc >= bytes || pTarget == chars,
-                    "[UTF8Encoding.GetChars]Expected pSrc to be within input buffer or throw due to no output]");
-                ThrowCharsOverflow(pTarget == chars);
+                ContractAssert(pSrc >= bytes || pTarget == chars)
+                if (pTarget == chars)
+                {
+                    errno = ERROR_INSUFFICIENT_BUFFER;
+                    if (fallback) free(fallback);
+                    return 0;
+                }
 
                 // Don't store ch in decoder, we already backed up to its start
                 ch = 0;
@@ -1839,7 +1757,7 @@ class UTF8Encoding
                 // Didn't throw, just use this buffer size.
                 break;
             }
-            *pTarget = (WCHAR)ch;
+            *pTarget = (char16_t)ch;
             pTarget++;
 
 #ifdef FASTLOOP
@@ -1857,7 +1775,7 @@ class UTF8Encoding
                 }
 
                 // try to get over the remainder of the ascii characters fast though
-                BYTE* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
+                unsigned char* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
                 while (pSrc < pLocalEnd) {
                     ch = *pSrc;
                     pSrc++;
@@ -1865,7 +1783,7 @@ class UTF8Encoding
                     if (ch > 0x7F)
                         goto ProcessChar;
 
-                    *pTarget = (WCHAR)ch;
+                    *pTarget = (char16_t)ch;
                     pTarget++;
                 }
                 // we are done
@@ -1882,7 +1800,7 @@ class UTF8Encoding
             // To compute the upper bound, assume that all characters are ASCII characters at this point,
             //  the boundary will be decreased for every non-ASCII character we encounter
             // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
-            WCHAR *pStop = pTarget + availableBytes - 7;
+            char16_t *pStop = pTarget + availableBytes - 7;
 
             while (pTarget < pStop) {
                 ch = *pSrc;
@@ -1891,7 +1809,7 @@ class UTF8Encoding
                 if (ch > 0x7F) {
                     goto LongCode;
                 }
-                *pTarget = (WCHAR)ch;
+                *pTarget = (char16_t)ch;
                 pTarget++;
 
                 // get pSrc to be 2-byte aligned
@@ -1901,29 +1819,35 @@ class UTF8Encoding
                     if (ch > 0x7F) {
                         goto LongCode;
                     }
-                    *pTarget = (WCHAR)ch;
+                    *pTarget = (char16_t)ch;
                     pTarget++;
                 }
 
                 // get pSrc to be 4-byte aligned
                 if ((((size_t)pSrc) & 0x2) != 0) {
-                    ch = *(USHORT*)pSrc;
+                    ch = *(unsigned short*)pSrc;
                     if ((ch & 0x8080) != 0) {
                         goto LongCodeWithMask16;
                     }
 
                     // Unfortunately, this is endianness sensitive
 #if BIGENDIAN
-                    *pTarget = (WCHAR)((ch >> 8) & 0x7F);
-                    pSrc += 2;
-                    *(pTarget + 1) = (WCHAR)(ch & 0x7F);
-                    pTarget += 2;
-#else // BIGENDIAN
-                    *pTarget = (WCHAR)(ch & 0x7F);
-                    pSrc += 2;
-                    *(pTarget + 1) = (WCHAR)((ch >> 8) & 0x7F);
-                    pTarget += 2;
-#endif // BIGENDIAN
+                    if (!treatAsLE)
+                    {
+                        *pTarget = (char16_t)((ch >> 8) & 0x7F);
+                        pSrc += 2;
+                        *(pTarget + 1) = (char16_t)(ch & 0x7F);
+                        pTarget += 2;
+                    }
+                    else
+#else
+                    {
+                        *pTarget = (char16_t)(ch & 0x7F);
+                        pSrc += 2;
+                        *(pTarget + 1) = (char16_t)((ch >> 8) & 0x7F);
+                        pTarget += 2;
+                    }
+#endif
                 }
 
                 // Run 8 characters at a time!
@@ -1936,45 +1860,57 @@ class UTF8Encoding
 
                     // Unfortunately, this is endianness sensitive
 #if BIGENDIAN
-                    *pTarget = (WCHAR)((ch >> 24) & 0x7F);
-                    *(pTarget + 1) = (WCHAR)((ch >> 16) & 0x7F);
-                    *(pTarget + 2) = (WCHAR)((ch >> 8) & 0x7F);
-                    *(pTarget + 3) = (WCHAR)(ch & 0x7F);
-                    pSrc += 8;
-                    *(pTarget + 4) = (WCHAR)((chb >> 24) & 0x7F);
-                    *(pTarget + 5) = (WCHAR)((chb >> 16) & 0x7F);
-                    *(pTarget + 6) = (WCHAR)((chb >> 8) & 0x7F);
-                    *(pTarget + 7) = (WCHAR)(chb & 0x7F);
-                    pTarget += 8;
-#else // BIGENDIAN
-                    *pTarget = (WCHAR)(ch & 0x7F);
-                    *(pTarget + 1) = (WCHAR)((ch >> 8) & 0x7F);
-                    *(pTarget + 2) = (WCHAR)((ch >> 16) & 0x7F);
-                    *(pTarget + 3) = (WCHAR)((ch >> 24) & 0x7F);
-                    pSrc += 8;
-                    *(pTarget + 4) = (WCHAR)(chb & 0x7F);
-                    *(pTarget + 5) = (WCHAR)((chb >> 8) & 0x7F);
-                    *(pTarget + 6) = (WCHAR)((chb >> 16) & 0x7F);
-                    *(pTarget + 7) = (WCHAR)((chb >> 24) & 0x7F);
-                    pTarget += 8;
-#endif // BIGENDIAN
+                    if (!treatAsLE)
+                    {
+                        *pTarget = (char16_t)((ch >> 24) & 0x7F);
+                        *(pTarget + 1) = (char16_t)((ch >> 16) & 0x7F);
+                        *(pTarget + 2) = (char16_t)((ch >> 8) & 0x7F);
+                        *(pTarget + 3) = (char16_t)(ch & 0x7F);
+                        pSrc += 8;
+                        *(pTarget + 4) = (char16_t)((chb >> 24) & 0x7F);
+                        *(pTarget + 5) = (char16_t)((chb >> 16) & 0x7F);
+                        *(pTarget + 6) = (char16_t)((chb >> 8) & 0x7F);
+                        *(pTarget + 7) = (char16_t)(chb & 0x7F);
+                        pTarget += 8;
+                    }
+                    else
+#else
+                    {
+                        *pTarget = (char16_t)(ch & 0x7F);
+                        *(pTarget + 1) = (char16_t)((ch >> 8) & 0x7F);
+                        *(pTarget + 2) = (char16_t)((ch >> 16) & 0x7F);
+                        *(pTarget + 3) = (char16_t)((ch >> 24) & 0x7F);
+                        pSrc += 8;
+                        *(pTarget + 4) = (char16_t)(chb & 0x7F);
+                        *(pTarget + 5) = (char16_t)((chb >> 8) & 0x7F);
+                        *(pTarget + 6) = (char16_t)((chb >> 16) & 0x7F);
+                        *(pTarget + 7) = (char16_t)((chb >> 24) & 0x7F);
+                        pTarget += 8;
+                    }
+#endif
                 }
                 break;
 
-#if BIGENDIAN
                 LongCodeWithMask32 :
-                    // be careful about the sign extension
-                    ch = (int)(((uint)ch) >> 16);
+#if BIGENDIAN
+                // be careful about the sign extension
+                if (!treatAsLE) ch = (int)(((unsigned int)ch) >> 16);
+                else
+#else
+                ch &= 0xFF;
+#endif
+
                 LongCodeWithMask16:
-                    ch = (int)(((uint)ch) >> 8);
-#else // BIGENDIAN
-            LongCodeWithMask32:
-            LongCodeWithMask16:
+#if BIGENDIAN
+                if (!treatAsLE) ch = (int)(((unsigned int)ch) >> 8);
+                else
+#else
                 ch &= 0xFF;
-#endif // BIGENDIAN
+#endif
+
                 pSrc++;
                 if (ch <= 0x7F) {
-                    *pTarget = (WCHAR)ch;
+                    *pTarget = (char16_t)ch;
                     pTarget++;
                     continue;
                 }
@@ -2024,12 +1960,12 @@ class UTF8Encoding
 
                         ch = (chc << 6) | (ch & 0x3F);
 
-                        *pTarget = (WCHAR)(((ch >> 10) & 0x7FF) +
-                            (SHORT)(CharUnicodeInfo::HIGH_SURROGATE_START - (0x10000 >> 10)));
+                        *pTarget = (char16_t)(((ch >> 10) & 0x7FF) +
+                            (short)(CharUnicodeInfo::HIGH_SURROGATE_START - (0x10000 >> 10)));
                         pTarget++;
 
                         ch = (ch & 0x3FF) +
-                            (SHORT)(CharUnicodeInfo::LOW_SURROGATE_START);
+                            (short)(CharUnicodeInfo::LOW_SURROGATE_START);
 
                         // extra byte, we're already planning 2 chars for 2 of these bytes,
                         // but the big loop is testing the target against pStop, so we need
@@ -2073,7 +2009,7 @@ class UTF8Encoding
                     ch = (ch << 6) | chc;
                 }
 
-                *pTarget = (WCHAR)ch;
+                *pTarget = (char16_t)ch;
                 pTarget++;
 
                 // extra byte, we're only expecting 1 char for each of these 2 bytes,
@@ -2083,7 +2019,7 @@ class UTF8Encoding
             }
 #endif // FASTLOOP
 
-            Contract::Assert(pTarget <= pAllocatedBufferEnd, "[UTF8Encoding.GetChars]pTarget <= pAllocatedBufferEnd");
+            ContractAssert(pTarget <= pAllocatedBufferEnd)
 
             // no pending bits at this point
             ch = 0;
@@ -2101,50 +2037,53 @@ class UTF8Encoding
             if (fallback == nullptr)
             {
                 fallback = decoderFallback->CreateFallbackBuffer();
+                RETURN_ON_ERROR
                 fallback->InternalInitialize(bytes, pAllocatedBufferEnd);
             }
 
             // This'll back us up the appropriate # of bytes if we didn't get anywhere
             if (!FallbackInvalidByteSequence(pSrc, ch, fallback))
             {
-                Contract::Assert(pSrc >= bytes || pTarget == chars,
-                    "[UTF8Encoding.GetChars]Expected to throw or remain in byte buffer while flushing");
+                ContractAssertFreeFallback(pSrc >= bytes || pTarget == chars)
 
                 // Ran out of buffer space
                 // Need to throw an exception?
                 fallback->InternalReset();
-                ThrowCharsOverflow(pTarget == chars);
+                if (pTarget == chars)
+                {
+                    errno = ERROR_INSUFFICIENT_BUFFER;
+                    if (fallback) free(fallback);
+                    return 0;
+                }
             }
-            Contract::Assert(pSrc >= bytes,
-                "[UTF8Encoding.GetChars]Expected flushing invalid byte sequence to have remained within the byte array");
+            ContractAssertFreeFallback(pSrc >= bytes)
             ch = 0;
         }
 
         // Shouldn't have anything in fallback buffer for GetChars
         // (don't have to check m_throwOnOverflow for chars)
-        Contract::Assert(fallback == nullptr || fallback->GetRemaining() == 0,
-            "[UTF8Encoding.GetChars]Expected empty fallback buffer at end");
+        ContractAssert(fallback == nullptr || fallback->GetRemaining() == 0)
 
-        InternalDelete(fallback);
+        free(fallback);
 
         return PtrDiff(pTarget, chars);
     }
 
-    int GetBytes(WCHAR* chars, int charCount, BYTE* bytes, int byteCount)
+    int GetBytes(char16_t* chars, int charCount, unsigned char* bytes, int byteCount)
     {
-        Contract::Assert(chars != nullptr, "[UTF8Encoding.GetBytes]chars!=nullptr");
-        Contract::Assert(byteCount >= 0, "[UTF8Encoding.GetBytes]byteCount >=0");
-        Contract::Assert(charCount >= 0, "[UTF8Encoding.GetBytes]charCount >=0");
-        Contract::Assert(bytes != nullptr, "[UTF8Encoding.GetBytes]bytes!=nullptr");
+        ContractAssert(chars != nullptr)
+        ContractAssert(byteCount >= 0)
+        ContractAssert(charCount >= 0)
+        ContractAssert(bytes != nullptr)
 
         // For fallback we may need a fallback buffer.
         // We wait to initialize it though in case we don't have any broken input unicode
-        EncoderFallbackBuffer* fallbackBuffer = nullptr;
-        WCHAR *pSrc = chars;
-        BYTE *pTarget = bytes;
+        EncoderFallbackBuffer* fallback = nullptr;
+        char16_t *pSrc = chars;
+        unsigned char *pTarget = bytes;
 
-        WCHAR *pEnd = pSrc + charCount;
-        BYTE *pAllocatedBufferEnd = pTarget + byteCount;
+        char16_t *pEnd = pSrc + charCount;
+        unsigned char *pAllocatedBufferEnd = pTarget + byteCount;
 
         int ch = 0;
 
@@ -2157,20 +2096,19 @@ class UTF8Encoding
 
                 if (ch == 0) {
                     // Check if there's anything left to get out of the fallback buffer
-                    ch = fallbackBuffer != nullptr ? fallbackBuffer->InternalGetNextChar() : 0;
+                    ch = fallback != nullptr ? fallback->InternalGetNextChar() : 0;
                     if (ch > 0) {
                         goto ProcessChar;
                     }
                 }
                 else {
                     // Case of leftover surrogates in the fallback buffer
-                    if (fallbackBuffer != nullptr && fallbackBuffer->bFallingBack) {
-                        Contract::Assert(ch >= 0xD800 && ch <= 0xDBFF,
-                            "[UTF8Encoding.GetBytes]expected high surrogate"); //, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
+                    if (fallback != nullptr && fallback->bFallingBack) {
+                        ContractAssertFreeFallback(ch >= 0xD800 && ch <= 0xDBFF); //, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture))
 
                         int cha = ch;
 
-                        ch = fallbackBuffer->InternalGetNextChar();
+                        ch = fallback->InternalGetNextChar();
 
                         if (InRange(ch, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
                             ch = ch + (cha << 10) + (0x10000 - CharUnicodeInfo::LOW_SURROGATE_START - (CharUnicodeInfo::HIGH_SURROGATE_START << 10));
@@ -2195,8 +2133,7 @@ class UTF8Encoding
 
             if (ch > 0) {
                 // We have a high surrogate left over from a previous loop.
-                Contract::Assert(ch >= 0xD800 && ch <= 0xDBFF,
-                    "[UTF8Encoding.GetBytes]expected high surrogate");//, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
+                ContractAssertFreeFallback(ch >= 0xD800 && ch <= 0xDBFF);//, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture))
 
                 // use separate helper variables for local contexts so that the jit optimizations
                 // won't get confused about the variable lifetimes
@@ -2219,9 +2156,9 @@ class UTF8Encoding
             }
 
             // If we've used a fallback, then we have to check for it
-            if (fallbackBuffer != nullptr)
+            if (fallback != nullptr)
             {
-                ch = fallbackBuffer->InternalGetNextChar();
+                ch = fallback->InternalGetNextChar();
                 if (ch > 0) goto ProcessChar;
             }
 
@@ -2242,19 +2179,21 @@ class UTF8Encoding
             {
                 // Lone surrogates aren't allowed, we have to do fallback for them
                 // Have to make a fallback buffer if we don't have one
-                if (fallbackBuffer == nullptr)
+                if (fallback == nullptr)
                 {
                     // wait on fallbacks if we can
                     // For fallback we may need a fallback buffer
-                    fallbackBuffer = encoderFallback->CreateFallbackBuffer();
+                    fallback = encoderFallback->CreateFallbackBuffer();
+                    RETURN_ON_ERROR
 
                     // Set our internal fallback interesting things.
-                    fallbackBuffer->InternalInitialize(chars, pEnd, true);
+                    fallback->InternalInitialize(chars, pEnd, true);
                 }
 
                 // Do our fallback.  Actually we already know its a mixed up surrogate,
                 // so the ref pSrc isn't gonna do anything.
-                fallbackBuffer->InternalFallback((WCHAR)ch, &pSrc);
+                fallback->InternalFallback((char16_t)ch, &pSrc);
+                RETURN_ON_ERROR
 
                 // Ignore it if we don't throw
                 ch = 0;
@@ -2275,11 +2214,11 @@ class UTF8Encoding
 
             if (pTarget > pAllocatedBufferEnd - bytesNeeded) {
                 // Left over surrogate from last time will cause pSrc == chars, so we'll throw
-                if (fallbackBuffer != nullptr && fallbackBuffer->bFallingBack)
+                if (fallback != nullptr && fallback->bFallingBack)
                 {
-                    fallbackBuffer->MovePrevious();              // Didn't use this fallback char
+                    fallback->MovePrevious();              // Didn't use this fallback char
                     if (ch > 0xFFFF)
-                        fallbackBuffer->MovePrevious();          // Was surrogate, didn't use 2nd part either
+                        fallback->MovePrevious();          // Was surrogate, didn't use 2nd part either
                 }
                 else
                 {
@@ -2287,52 +2226,56 @@ class UTF8Encoding
                     if (ch > 0xFFFF)
                         pSrc--;                                 // Was surrogate, didn't use 2nd part either
                 }
-                Contract::Assert(pSrc >= chars || pTarget == bytes,
-                    "[UTF8Encoding.GetBytes]Expected pSrc to be within buffer or to throw with insufficient room.");
-                ThrowBytesOverflow(pTarget == bytes);  // Throw if we must
+                ContractAssertFreeFallback(pSrc >= chars || pTarget == bytes)
+                if (pTarget == bytes)  // Throw if we must
+                {
+                    errno = ERROR_INSUFFICIENT_BUFFER;
+                    if (fallback) free(fallback);
+                    return 0;
+                }
                 ch = 0;                                         // Nothing left over (we backed up to start of pair if supplimentary)
                 break;
             }
 
             if (ch <= 0x7F) {
-                *pTarget = (BYTE)ch;
+                *pTarget = (unsigned char)ch;
             }
             else {
                 // use separate helper variables for local contexts so that the jit optimizations
                 // won't get confused about the variable lifetimes
                 int chb;
                 if (ch <= 0x7FF) {
-                    // 2 BYTE encoding
-                    chb = (BYTE)(0xC0 | (ch >> 6));
+                    // 2 unsigned char encoding
+                    chb = (unsigned char)(0xC0 | (ch >> 6));
                 }
                 else
                 {
                     if (ch <= 0xFFFF) {
-                        chb = (BYTE)(0xE0 | (ch >> 12));
+                        chb = (unsigned char)(0xE0 | (ch >> 12));
                     }
                     else
                     {
-                        *pTarget = (BYTE)(0xF0 | (ch >> 18));
+                        *pTarget = (unsigned char)(0xF0 | (ch >> 18));
                         pTarget++;
 
                         chb = 0x80 | ((ch >> 12) & 0x3F);
                     }
-                    *pTarget = (BYTE)chb;
+                    *pTarget = (unsigned char)chb;
                     pTarget++;
 
                     chb = 0x80 | ((ch >> 6) & 0x3F);
                 }
-                *pTarget = (BYTE)chb;
+                *pTarget = (unsigned char)chb;
                 pTarget++;
 
-                *pTarget = (BYTE)0x80 | (ch & 0x3F);
+                *pTarget = (unsigned char)0x80 | (ch & 0x3F);
             }
             pTarget++;
 
 
 #ifdef FASTLOOP
             // If still have fallback don't do fast loop
-            if (fallbackBuffer != nullptr && (ch = fallbackBuffer->InternalGetNextChar()) != 0)
+            if (fallback != nullptr && (ch = fallback->InternalGetNextChar()) != 0)
                 goto ProcessChar;
 
             int availableChars = PtrDiff(pEnd, pSrc);
@@ -2341,7 +2284,7 @@ class UTF8Encoding
             // don't fall into the fast decoding loop if we don't have enough characters
             // Note that if we don't have enough bytes, pStop will prevent us from entering the fast loop.
             if (availableChars <= 13) {
-                // we are hoping for 1 BYTE per char
+                // we are hoping for 1 unsigned char per char
                 if (availableBytes < availableChars) {
                     // not enough output room.  no pending bits at this point
                     ch = 0;
@@ -2349,16 +2292,16 @@ class UTF8Encoding
                 }
 
                 // try to get over the remainder of the ascii characters fast though
-                WCHAR* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
+                char16_t* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
                 while (pSrc < pLocalEnd) {
                     ch = *pSrc;
                     pSrc++;
 
-                    // Not ASCII, need more than 1 BYTE per char
+                    // Not ASCII, need more than 1 unsigned char per char
                     if (ch > 0x7F)
                         goto ProcessChar;
 
-                    *pTarget = (BYTE)ch;
+                    *pTarget = (unsigned char)ch;
                     pTarget++;
                 }
                 // we are done, let ch be 0 to clear encoder
@@ -2366,7 +2309,7 @@ class UTF8Encoding
                 break;
             }
 
-            // we need at least 1 BYTE per character, but Convert might allow us to convert
+            // we need at least 1 unsigned char per character, but Convert might allow us to convert
             // only part of the input, so try as much as we can.  Reduce charCount if necessary
             if (availableBytes < availableChars)
             {
@@ -2381,7 +2324,7 @@ class UTF8Encoding
             //  the boundary will be decreased for every non-ASCII character we encounter
             // Also, we need 5 chars reserve for the unrolled ansi decoding loop and for decoding of surrogates
             // If there aren't enough bytes for the output, then pStop will be <= pSrc and will bypass the loop.
-            WCHAR *pStop = pSrc + availableChars - 5;
+            char16_t *pStop = pSrc + availableChars - 5;
 
             while (pSrc < pStop) {
                 ch = *pSrc;
@@ -2390,7 +2333,7 @@ class UTF8Encoding
                 if (ch > 0x7F) {
                     goto LongCode;
                 }
-                *pTarget = (BYTE)ch;
+                *pTarget = (unsigned char)ch;
                 pTarget++;
 
                 // get pSrc aligned
@@ -2400,7 +2343,7 @@ class UTF8Encoding
                     if (ch > 0x7F) {
                         goto LongCode;
                     }
-                    *pTarget = (BYTE)ch;
+                    *pTarget = (unsigned char)ch;
                     pTarget++;
                 }
 
@@ -2414,55 +2357,63 @@ class UTF8Encoding
 
                     // Unfortunately, this is endianness sensitive
 #if BIGENDIAN
-                    *pTarget = (BYTE)(ch >> 16);
-                    *(pTarget + 1) = (BYTE)ch;
-                    pSrc += 4;
-                    *(pTarget + 2) = (BYTE)(chc >> 16);
-                    *(pTarget + 3) = (BYTE)chc;
-                    pTarget += 4;
-#else // BIGENDIAN
-                    *pTarget = (BYTE)ch;
-                    *(pTarget + 1) = (BYTE)(ch >> 16);
-                    pSrc += 4;
-                    *(pTarget + 2) = (BYTE)chc;
-                    *(pTarget + 3) = (BYTE)(chc >> 16);
-                    pTarget += 4;
-#endif // BIGENDIAN
+                    if (!treatAsLE)
+                    {
+                        *pTarget = (unsigned char)(ch >> 16);
+                        *(pTarget + 1) = (unsigned char)ch;
+                        pSrc += 4;
+                        *(pTarget + 2) = (unsigned char)(chc >> 16);
+                        *(pTarget + 3) = (unsigned char)chc;
+                        pTarget += 4;
+                    }
+                    else
+#else
+                    {
+                        *pTarget = (unsigned char)ch;
+                        *(pTarget + 1) = (unsigned char)(ch >> 16);
+                        pSrc += 4;
+                        *(pTarget + 2) = (unsigned char)chc;
+                        *(pTarget + 3) = (unsigned char)(chc >> 16);
+                        pTarget += 4;
+                    }
+#endif
                 }
                 continue;
 
             LongCodeWithMask:
 #if BIGENDIAN
-                // be careful about the sign extension
-                ch = (int)(((uint)ch) >> 16);
-#else // BIGENDIAN
-                ch = (WCHAR)ch;
-#endif // BIGENDIAN
-                pSrc++;
+            // be careful about the sign extension
+            if (!treatAsLE) ch = (int)(((unsigned int)ch) >> 16);
+            else
+#else
+                ch = (char16_t)ch;
+#endif
 
-                if (ch > 0x7F) {
-                    goto LongCode;
-                }
-                *pTarget = (BYTE)ch;
-                pTarget++;
-                continue;
+            pSrc++;
+
+            if (ch > 0x7F) {
+                goto LongCode;
+            }
+            *pTarget = (unsigned char)ch;
+            pTarget++;
+            continue;
 
             LongCode:
                 // use separate helper variables for slow and fast loop so that the jit optimizations
                 // won't get confused about the variable lifetimes
                 int chd;
                 if (ch <= 0x7FF) {
-                    // 2 BYTE encoding
+                    // 2 unsigned char encoding
                     chd = 0xC0 | (ch >> 6);
                 }
                 else {
                     if (!InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
-                        // 3 BYTE encoding
+                        // 3 unsigned char encoding
                         chd = 0xE0 | (ch >> 12);
                     }
                     else
                     {
-                        // 4 BYTE encoding - high surrogate + low surrogate
+                        // 4 unsigned char encoding - high surrogate + low surrogate
                         if (ch > CharUnicodeInfo::HIGH_SURROGATE_END) {
                             // low without high -> bad, try again in slow loop
                             pSrc -= 1;
@@ -2484,30 +2435,30 @@ class UTF8Encoding
                             - CharUnicodeInfo::LOW_SURROGATE_START
                             - (CharUnicodeInfo::HIGH_SURROGATE_START << 10));
 
-                        *pTarget = (BYTE)(0xF0 | (ch >> 18));
-                        // pStop - this BYTE is compensated by the second surrogate character
+                        *pTarget = (unsigned char)(0xF0 | (ch >> 18));
+                        // pStop - this unsigned char is compensated by the second surrogate character
                         // 2 input chars require 4 output bytes.  2 have been anticipated already
                         // and 2 more will be accounted for by the 2 pStop-- calls below.
                         pTarget++;
 
                         chd = 0x80 | ((ch >> 12) & 0x3F);
                     }
-                    *pTarget = (BYTE)chd;
-                    pStop--;                    // 3 BYTE sequence for 1 char, so need pStop-- and the one below too.
+                    *pTarget = (unsigned char)chd;
+                    pStop--;                    // 3 unsigned char sequence for 1 char, so need pStop-- and the one below too.
                     pTarget++;
 
                     chd = 0x80 | ((ch >> 6) & 0x3F);
                 }
-                *pTarget = (BYTE)chd;
-                pStop--;                        // 2 BYTE sequence for 1 char so need pStop--.
+                *pTarget = (unsigned char)chd;
+                pStop--;                        // 2 unsigned char sequence for 1 char so need pStop--.
                 pTarget++;
 
-                *pTarget = (BYTE)(0x80 | (ch & 0x3F));
-                // pStop - this BYTE is already included
+                *pTarget = (unsigned char)(0x80 | (ch & 0x3F));
+                // pStop - this unsigned char is already included
                 pTarget++;
             }
 
-            Contract::Assert(pTarget <= pAllocatedBufferEnd, "[UTF8Encoding.GetBytes]pTarget <= pAllocatedBufferEnd");
+            ContractAssertFreeFallback(pTarget <= pAllocatedBufferEnd)
 
 #endif // FASTLOOP
 
@@ -2515,18 +2466,18 @@ class UTF8Encoding
             ch = 0;
         }
 
-        InternalDelete(fallbackBuffer);
+        free(fallback);
 
         return (int)(pTarget - bytes);
     }
 
-    int GetByteCount(WCHAR *chars, int count)
+    int GetByteCount(char16_t *chars, int count)
     {
         // For fallback we may need a fallback buffer.
         // We wait to initialize it though in case we don't have any broken input unicode
-        EncoderFallbackBuffer* fallbackBuffer = nullptr;
-        WCHAR *pSrc = chars;
-        WCHAR *pEnd = pSrc + count;
+        EncoderFallbackBuffer* fallback = nullptr;
+        char16_t *pSrc = chars;
+        char16_t *pEnd = pSrc + count;
 
         // Start by assuming we have as many as count
         int byteCount = count;
@@ -2539,7 +2490,7 @@ class UTF8Encoding
 
                 if (ch == 0) {
                     // Unroll any fallback that happens at the end
-                    ch = fallbackBuffer != nullptr ? fallbackBuffer->InternalGetNextChar() : 0;
+                    ch = fallback != nullptr ? fallback->InternalGetNextChar() : 0;
                     if (ch > 0) {
                         byteCount++;
                         goto ProcessChar;
@@ -2547,11 +2498,10 @@ class UTF8Encoding
                 }
                 else {
                     // Case of surrogates in the fallback.
-                    if (fallbackBuffer != nullptr && fallbackBuffer->bFallingBack) {
-                        Contract::Assert(ch >= 0xD800 && ch <= 0xDBFF,
-                            "[UTF8Encoding.GetBytes]expected high surrogate");// , not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
+                    if (fallback != nullptr && fallback->bFallingBack) {
+                        ContractAssertFreeFallback(ch >= 0xD800 && ch <= 0xDBFF);// , not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture))
 
-                        ch = fallbackBuffer->InternalGetNextChar();
+                        ch = fallback->InternalGetNextChar();
                         byteCount++;
 
                         if (InRange(ch, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
@@ -2579,8 +2529,7 @@ class UTF8Encoding
             }
 
             if (ch > 0) {
-                Contract::Assert(ch >= 0xD800 && ch <= 0xDBFF,
-                    "[UTF8Encoding.GetBytes]expected high surrogate"); // , not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
+                ContractAssertFreeFallback(ch >= 0xD800 && ch <= 0xDBFF); // , not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture))
 
                 // use separate helper variables for local contexts so that the jit optimizations
                 // won't get confused about the variable lifetimes
@@ -2609,9 +2558,9 @@ class UTF8Encoding
             }
 
             // If we've used a fallback, then we have to check for it
-            if (fallbackBuffer != nullptr)
+            if (fallback != nullptr)
             {
-                ch = fallbackBuffer->InternalGetNextChar();
+                ch = fallback->InternalGetNextChar();
                 if (ch > 0)
                 {
                     // We have an extra byte we weren't expecting.
@@ -2639,19 +2588,21 @@ class UTF8Encoding
             {
                 // Lone surrogates aren't allowed
                 // Have to make a fallback buffer if we don't have one
-                if (fallbackBuffer == nullptr)
+                if (fallback == nullptr)
                 {
                     // wait on fallbacks if we can
                     // For fallback we may need a fallback buffer
-                    fallbackBuffer = encoderFallback->CreateFallbackBuffer();
+                    fallback = encoderFallback->CreateFallbackBuffer();
+                    RETURN_ON_ERROR
 
                     // Set our internal fallback interesting things.
-                    fallbackBuffer->InternalInitialize(chars, chars + count, false);
+                    fallback->InternalInitialize(chars, chars + count, false);
                 }
 
                 // Do our fallback.  Actually we already know its a mixed up surrogate,
                 // so the ref pSrc isn't gonna do anything.
-                fallbackBuffer->InternalFallback((WCHAR)ch, &pSrc);
+                fallback->InternalFallback((char16_t)ch, &pSrc);
+                RETURN_ON_ERROR
 
                 // Ignore it if we don't throw (we had preallocated this ch)
                 byteCount--;
@@ -2678,7 +2629,7 @@ class UTF8Encoding
 
 #ifdef FASTLOOP
             // If still have fallback don't do fast loop
-            if (fallbackBuffer != nullptr && (ch = fallbackBuffer->InternalGetNextChar()) != 0)
+            if (fallback != nullptr && (ch = fallback->InternalGetNextChar()) != 0)
             {
                 // We're reserving 1 byte for each char by default
                 byteCount++;
@@ -2690,7 +2641,7 @@ class UTF8Encoding
             // don't fall into the fast decoding loop if we don't have enough characters
             if (availableChars <= 13) {
                 // try to get over the remainder of the ascii characters fast though
-                WCHAR* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
+                char16_t* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
                 while (pSrc < pLocalEnd) {
                     ch = *pSrc;
                     pSrc++;
@@ -2711,7 +2662,7 @@ class UTF8Encoding
             // To compute the upper bound, assume that all characters are ASCII characters at this point,
             //  the boundary will be decreased for every non-ASCII character we encounter
             // Also, we need 3 + 4 chars reserve for the unrolled ansi decoding loop and for decoding of surrogates
-            WCHAR *pStop = pSrc + availableChars - (3 + 4);
+            char16_t *pStop = pSrc + availableChars - (3 + 4);
 
             while (pSrc < pStop) {
                 ch = *pSrc;
@@ -2791,16 +2742,18 @@ class UTF8Encoding
 
             LongCodeWithMask:
 #if BIGENDIAN
-                // be careful about the sign extension
-                ch = (int)(((uint)ch) >> 16);
-#else // BIGENDIAN
-                ch = (WCHAR)ch;
-#endif // BIGENDIAN
-                pSrc++;
+            // be careful about the sign extension
+            if (!treatAsLE) ch = (int)(((unsigned int)ch) >> 16);
+            else
+#else
+                ch = (char16_t)ch;
+#endif
 
-                if (ch <= 0x7F) {
-                    continue;
-                }
+            pSrc++;
+
+            if (ch <= 0x7F) {
+                continue;
+            }
 
             LongCode:
                 // use separate helper variables for slow and fast loop so that the jit optimizations
@@ -2836,102 +2789,111 @@ class UTF8Encoding
 
 #if WIN64
         // check for overflow
-        if (byteCount < 0) {
-            throw ArgumentException("Conversion buffer overflow.");
-        }
+        ContractAssertFreeFallback(byteCount >= 0)
 #endif
+        ContractAssertFreeFallback(fallback == nullptr || fallback->GetRemaining() == 0)
 
-        Contract::Assert(fallbackBuffer == nullptr || fallbackBuffer->GetRemaining() == 0,
-            "[UTF8Encoding.GetByteCount]Expected Empty fallback buffer");
-
-        InternalDelete(fallbackBuffer);
+        free(fallback);
 
         return byteCount;
     }
-
 };
 
-
-////////////////////////////////////////////////////////////////////////////
-//
-//  UTF8ToUnicode
-//
-//  Maps a UTF-8 character string to its wide character string counterpart.
-//
-////////////////////////////////////////////////////////////////////////////
-
-int UTF8ToUnicode(
-    LPCSTR lpSrcStr,
+int minipal_utf8_to_utf16_preallocated(
+    const char* lpSrcStr,
     int cchSrc,
-    LPWSTR lpDestStr,
+    char16_t** lpDestStr,
     int cchDest,
-    DWORD dwFlags
-    )
+    unsigned int dwFlags,
+    bool treatAsLE)
 {
     int ret;
-    UTF8Encoding enc(dwFlags & MB_ERR_INVALID_CHARS);
-    try {
-        ret = enc.GetCharCount((BYTE*)lpSrcStr, cchSrc);
-        if (cchDest){
-            if (ret > cchDest){
-                SetLastError(ERROR_INSUFFICIENT_BUFFER);
-                ret = 0;
-            }
-            enc.GetChars((BYTE*)lpSrcStr, cchSrc, (WCHAR*)lpDestStr, ret);
+    errno = 0;
+
+    if (cchSrc < 0)
+        cchSrc = strlen(lpSrcStr) + 1;
+
+    UTF8Encoding enc(dwFlags & MB_ERR_INVALID_CHARS, treatAsLE);
+    ret = enc.GetCharCount((unsigned char*)lpSrcStr, cchSrc);
+    if (cchDest)
+    {
+        if (ret > cchDest)
+        {
+            errno = ERROR_INSUFFICIENT_BUFFER;
+            ret = 0;
         }
-    }
-    catch (const InsufficientBufferException& e){
-        SetLastError(ERROR_INSUFFICIENT_BUFFER);
-        return 0;
-    }
-    catch (const DecoderFallbackException& e){
-        SetLastError(ERROR_NO_UNICODE_TRANSLATION);
-        return 0;
-    }
-    catch (const ArgumentException& e){
-        SetLastError(ERROR_INVALID_PARAMETER);
-        return 0;
+        enc.GetChars((unsigned char*)lpSrcStr, cchSrc, (char16_t*)*lpDestStr, ret);
+        if (errno) ret = 0;
     }
     return ret;
 }
 
-////////////////////////////////////////////////////////////////////////////
-//
-//  UnicodeToUTF8
-//
-//  Maps a Unicode character string to its UTF-8 string counterpart.
-//
-////////////////////////////////////////////////////////////////////////////
-
-int UnicodeToUTF8(
-    LPCWSTR lpSrcStr,
+static int utf16_to_utf8_preallocated(
+    const char16_t* lpSrcStr,
     int cchSrc,
-    LPSTR lpDestStr,
-    int cchDest)
+    char** lpDestStr,
+    int cchDest,
+    bool treatAsLE)
 {
     int ret;
-    UTF8Encoding enc(false);
-    try{
-        ret = enc.GetByteCount((WCHAR*)lpSrcStr, cchSrc);
-        if (cchDest){
-            if (ret > cchDest){
-                SetLastError(ERROR_INSUFFICIENT_BUFFER);
-                ret = 0;
-            }
-            enc.GetBytes((WCHAR*)lpSrcStr, cchSrc, (BYTE*)lpDestStr, ret);
+    errno = 0;
+
+    if (cchSrc < 0)
+        cchSrc = wcslen(lpSrcStr) + 1;
+
+    UTF8Encoding enc(false, treatAsLE);
+    ret = enc.GetByteCount((char16_t*)lpSrcStr, cchSrc);
+    if (cchDest)
+    {
+        if (ret > cchDest)
+        {
+            errno = ERROR_INSUFFICIENT_BUFFER;
+            ret = 0;
         }
+        enc.GetBytes((char16_t*)lpSrcStr, cchSrc, (unsigned char*)*lpDestStr, ret);
+        if (errno) ret = 0;
     }
-    catch (const InsufficientBufferException& e){
-        SetLastError(ERROR_INSUFFICIENT_BUFFER);
-        return 0;
-    }
-    catch (const EncoderFallbackException& e){
-        SetLastError(ERROR_NO_UNICODE_TRANSLATION);
-        return 0;
+    return ret;
+}
+
+int minipal_utf16_to_utf8_preallocated(
+    const char16_t* lpSrcStr,
+    int cchSrc,
+    char** lpDestStr,
+    int cchDest)
+{
+    return utf16_to_utf8_preallocated(lpSrcStr, cchSrc, lpDestStr, cchDest, false);
+}
+
+int minipal_utf8_to_utf16_allocate(
+    const char* lpSrcStr,
+    int cchSrc,
+    char16_t** lpDestStr,
+    unsigned int dwFlags,
+    bool treatAsLE)
+{
+    int cchDest = minipal_utf8_to_utf16_preallocated(lpSrcStr, cchSrc, nullptr, 0, dwFlags, !treatAsLE);
+    if (cchDest > 0)
+    {
+        *lpDestStr = (char16_t*)malloc((cchDest + 1) * sizeof(char16_t));
+        cchDest = minipal_utf8_to_utf16_preallocated(lpSrcStr, cchSrc, lpDestStr, cchDest, dwFlags, !treatAsLE);
+        (*lpDestStr)[cchDest] = '\0';
     }
-    catch (const ArgumentException& e){
-        SetLastError(ERROR_INVALID_PARAMETER);
-        return 0;
+    return cchDest;
+}
+
+int minipal_utf16_to_utf8_allocate(
+    const char16_t* lpSrcStr,
+    int cchSrc,
+    char** lpDestStr,
+    bool treatAsLE)
+{
+    int cchDest = utf16_to_utf8_preallocated(lpSrcStr, cchSrc, nullptr, 0, treatAsLE);
+    if (cchDest > 0)
+    {
+        *lpDestStr = (char*)malloc((cchDest + 1) * sizeof(char));
+        cchDest = utf16_to_utf8_preallocated(lpSrcStr, cchSrc, lpDestStr, cchDest, treatAsLE);
+        (*lpDestStr)[cchDest] = '\0';
     }
-    return ret;
+    return cchDest;
 }
diff --git a/src/native/minipal/utf8.h b/src/native/minipal/utf8.h
new file mode 100644
index 00000000000000..71b9a805aa11b5
--- /dev/null
+++ b/src/native/minipal/utf8.h
@@ -0,0 +1,33 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef HAVE_MINIPAL_UTF8_H
+#define HAVE_MINIPAL_UTF8_H
+
+#include <minipal/utils.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#define MB_ERR_INVALID_CHARS 0x00000008
+#define ERROR_NO_UNICODE_TRANSLATION 1113L
+#define ERROR_INSUFFICIENT_BUFFER 122L
+#define ERROR_INVALID_PARAMETER 87L
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif // __cplusplus
+
+int minipal_utf8_to_utf16_preallocated(const char* lpSrcStr, int cchSrc, char16_t** lpDestStr, int cchDest, unsigned int dwFlags, bool treatAsLE);
+
+int minipal_utf16_to_utf8_preallocated(const char16_t* lpSrcStr, int cchSrc, char** lpDestStr, int cchDest);
+
+int minipal_utf8_to_utf16_allocate(const char* lpSrcStr, int cchSrc, char16_t** lpDestStr, unsigned int dwFlags, bool treatAsLE);
+
+int minipal_utf16_to_utf8_allocate(const char16_t* lpSrcStr, int cchSrc, char** lpDestStr, bool treatAsLE);
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+#endif /* HAVE_MINIPAL_UTF8_H */

From 0bd1d9cd6067b18666147346e03e5258613b372b Mon Sep 17 00:00:00 2001
From: Adeel <3840695+am11@users.noreply.github.com>
Date: Fri, 26 May 2023 06:10:06 +0300
Subject: [PATCH 4/9] Remove C++ runtime dependency

---
 src/mono/CMakeLists.txt               |  8 +++++++
 src/mono/mono/eglib/CMakeLists.txt    |  2 +-
 src/mono/mono/mini/CMakeLists.txt     |  5 ++++-
 src/mono/mono/profiler/CMakeLists.txt |  2 +-
 src/native/minipal/utf8.cpp           | 30 ++++++++++++++-------------
 5 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/src/mono/CMakeLists.txt b/src/mono/CMakeLists.txt
index bc65886aea2f36..05cd80948408ea 100644
--- a/src/mono/CMakeLists.txt
+++ b/src/mono/CMakeLists.txt
@@ -589,6 +589,14 @@ if(GCC)
   if(ENABLE_WERROR)
     append("-Werror" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
   endif()
+
+  # don't link with C++ runtime lib for targets which don't require it (tvOS and iOS need it for ICU)
+  if(NOT LLVM_PREFIX AND NOT HOST_TVOS AND NOT HOST_IOS)
+    append("-nodefaultlibs" CMAKE_CXX_FLAGS)
+    if(NOT HOST_WASM)
+      append("-lc" CMAKE_CXX_FLAGS)
+    endif()
+  endif()
 endif()
 
 ######################################
diff --git a/src/mono/mono/eglib/CMakeLists.txt b/src/mono/mono/eglib/CMakeLists.txt
index a65a4e64085e92..9946145859e030 100644
--- a/src/mono/mono/eglib/CMakeLists.txt
+++ b/src/mono/mono/eglib/CMakeLists.txt
@@ -39,7 +39,7 @@ set(eglib_common_sources
 if(HOST_WIN32)
 set_source_files_properties("${CLR_SRC_NATIVE_DIR}/minipal/utf8.cpp" PROPERTIES COMPILE_FLAGS "/wd4100 /wd4267 /wd4458 /wd4310")
 else()
-set_source_files_properties("${CLR_SRC_NATIVE_DIR}/minipal/utf8.cpp" PROPERTIES COMPILE_FLAGS "-std=c++11 -nostdlib -fno-rtti -fno-exceptions")
+set_source_files_properties("${CLR_SRC_NATIVE_DIR}/minipal/utf8.cpp" PROPERTIES COMPILE_FLAGS "-std=c++11 -fno-rtti -fno-exceptions")
 endif()
 
 set(eglib_headers
diff --git a/src/mono/mono/mini/CMakeLists.txt b/src/mono/mono/mini/CMakeLists.txt
index 8e60babe7bb83c..9df1cb56f14fa2 100644
--- a/src/mono/mono/mini/CMakeLists.txt
+++ b/src/mono/mono/mini/CMakeLists.txt
@@ -24,7 +24,10 @@ set(OS_LIBS "-framework CoreFoundation" "-lobjc" "-lc++")
 elseif(HOST_ANDROID)
 set(OS_LIBS m dl log)
 elseif(HOST_LINUX)
-set(OS_LIBS pthread m dl)
+set(OS_LIBS pthread m dl gcc_s)
+  if(NOT CLR_CMAKE_HOST_LINUX_MUSL) # glibc build also requires libc_nonshared.a for atexit(3) usage
+    set(OS_LIBS ${OS_LIBS} -l:libc_nonshared.a)
+  endif()
 elseif(HOST_WIN32)
 set(OS_LIBS bcrypt.lib Mswsock.lib ws2_32.lib psapi.lib version.lib advapi32.lib winmm.lib kernel32.lib)
 elseif(HOST_SOLARIS)
diff --git a/src/mono/mono/profiler/CMakeLists.txt b/src/mono/mono/profiler/CMakeLists.txt
index 6bca00983686fc..f826371103b6f2 100644
--- a/src/mono/mono/profiler/CMakeLists.txt
+++ b/src/mono/mono/profiler/CMakeLists.txt
@@ -14,7 +14,7 @@ if(NOT DISABLE_LIBS)
     # Build the logging profiler only for certain platforms
     add_library(mono-profiler-log SHARED helper.c log.c log-args.c ${ZLIB_SOURCES})
     target_compile_definitions(mono-profiler-log PRIVATE -DMONO_DLL_EXPORT)
-    target_link_libraries(mono-profiler-log PRIVATE monosgen-shared monoapi eglib_objects)
+    target_link_libraries(mono-profiler-log PRIVATE monosgen-shared monoapi eglib_objects ${CMAKE_DL_LIBS})
     if(HOST_ANDROID)
       target_link_libraries(mono-profiler-log PRIVATE log)
     endif()
diff --git a/src/native/minipal/utf8.cpp b/src/native/minipal/utf8.cpp
index ce967669c46d3d..875b4eeb6008ca 100644
--- a/src/native/minipal/utf8.cpp
+++ b/src/native/minipal/utf8.cpp
@@ -19,7 +19,7 @@ Revision History:
 #include <errno.h>
 #include <limits.h>
 #include <string.h>
-#include <new>
+#include <assert.h>
 
 #define FASTLOOP
 
@@ -29,6 +29,8 @@ Revision History:
 #define W(str) u##str
 #endif
 
+inline void *operator new(size_t, void *p) throw () { return p; }
+
 struct CharUnicodeInfo
 {
     static const char16_t HIGH_SURROGATE_START = 0xd800;
@@ -188,11 +190,11 @@ class DecoderFallback
     //
     // Return the appropriate unicode string alternative to the character that need to fall back.
 
-    virtual DecoderFallbackBuffer* CreateFallbackBuffer() = 0;
+    virtual DecoderFallbackBuffer* CreateFallbackBuffer() { assert(!"pure virtual function called"); while(true); }
 
     // Maximum number of characters that this instance of this fallback could return
 
-    virtual int GetMaxCharCount() = 0;
+    virtual int GetMaxCharCount() { assert(!"pure virtual function called"); while(true); }
 };
 
 class DecoderReplacementFallback : public DecoderFallback
@@ -275,16 +277,16 @@ class DecoderFallbackBuffer
     // These wrap the internal methods so that we can check for people doing stuff that's incorrect
 
 public:
-    virtual bool Fallback(unsigned char bytesUnknown[], int index, int size) = 0;
+    virtual bool Fallback(unsigned char bytesUnknown[], int index, int size) { assert(!"pure virtual function called"); while(true); }
 
     // Get next character
-    virtual char16_t GetNextChar() = 0;
+    virtual char16_t GetNextChar() { assert(!"pure virtual function called"); while(true); }
 
     //Back up a character
-    virtual bool MovePrevious() = 0;
+    virtual bool MovePrevious() { assert(!"pure virtual function called"); while(true); }
 
     // How many chars left in this fallback?
-    virtual int GetRemaining() = 0;
+    virtual int GetRemaining() { assert(!"pure virtual function called"); while(true); }
 
     // Clear the buffer
     virtual void Reset()
@@ -606,10 +608,10 @@ class EncoderFallback
     //
     // Return the appropriate unicode string alternative to the character that need to fall back.
 
-    virtual EncoderFallbackBuffer* CreateFallbackBuffer() = 0;
+    virtual EncoderFallbackBuffer* CreateFallbackBuffer() { assert(!"pure virtual function called"); while(true); }
 
     // Maximum number of characters that this instance of this fallback could return
-    virtual int GetMaxCharCount() = 0;
+    virtual int GetMaxCharCount() { assert(!"pure virtual function called"); while(true); }
 };
 
 class EncoderReplacementFallback : public EncoderFallback
@@ -692,18 +694,18 @@ class EncoderFallbackBuffer
     // These wrap the internal methods so that we can check for people doing stuff that is incorrect
 
 public:
-    virtual bool Fallback(char16_t charUnknown, int index) = 0;
+    virtual bool Fallback(char16_t charUnknown, int index) { assert(!"pure virtual function called"); while(true); }
 
-    virtual bool Fallback(char16_t charUnknownHigh, char16_t charUnknownLow, int index) = 0;
+    virtual bool Fallback(char16_t charUnknownHigh, char16_t charUnknownLow, int index) { assert(!"pure virtual function called"); while(true); }
 
     // Get next character
-    virtual char16_t GetNextChar() = 0;
+    virtual char16_t GetNextChar() { assert(!"pure virtual function called"); while(true); }
 
     // Back up a character
-    virtual bool MovePrevious() = 0;
+    virtual bool MovePrevious() { assert(!"pure virtual function called"); while(true); }
 
     // How many chars left in this fallback?
-    virtual int GetRemaining() = 0;
+    virtual int GetRemaining() { assert(!"pure virtual function called"); while(true); }
 
     // Not sure if this should be public or not.
     // Clear the buffer

From 5e1289496fcccb7f8f7ecab912765185f71dca90 Mon Sep 17 00:00:00 2001
From: Adeel <3840695+am11@users.noreply.github.com>
Date: Sat, 17 Jun 2023 13:08:16 +0300
Subject: [PATCH 5/9] Initial C++ to C conversion

---
 src/coreclr/pal/src/CMakeLists.txt     |    2 +-
 src/coreclr/pal/src/locale/unicode.cpp |   30 +-
 src/mono/CMakeLists.txt                |    8 -
 src/mono/mono/eglib/CMakeLists.txt     |   10 +-
 src/mono/mono/eglib/giconv.c           |   98 +-
 src/mono/mono/metadata/object.c        |    2 +-
 src/mono/mono/mini/CMakeLists.txt      |    7 +-
 src/mono/mono/profiler/CMakeLists.txt  |    2 +-
 src/native/minipal/utf8.c              | 2140 +++++++++++++++++
 src/native/minipal/utf8.cpp            | 2901 ------------------------
 src/native/minipal/utf8.h              |   57 +-
 11 files changed, 2299 insertions(+), 2958 deletions(-)
 create mode 100644 src/native/minipal/utf8.c
 delete mode 100644 src/native/minipal/utf8.cpp

diff --git a/src/coreclr/pal/src/CMakeLists.txt b/src/coreclr/pal/src/CMakeLists.txt
index d61ffc4cca5f11..cd23cadbe2d749 100644
--- a/src/coreclr/pal/src/CMakeLists.txt
+++ b/src/coreclr/pal/src/CMakeLists.txt
@@ -152,7 +152,7 @@ set(SOURCES
   loader/module.cpp
   locale/unicode.cpp
   locale/unicodedata.cpp
-  ${CLR_SRC_NATIVE_DIR}/minipal/utf8.cpp
+  ${CLR_SRC_NATIVE_DIR}/minipal/utf8.c
   map/common.cpp
   map/map.cpp
   map/virtual.cpp
diff --git a/src/coreclr/pal/src/locale/unicode.cpp b/src/coreclr/pal/src/locale/unicode.cpp
index d3f4da7a60b53f..b9a0291394dc9b 100644
--- a/src/coreclr/pal/src/locale/unicode.cpp
+++ b/src/coreclr/pal/src/locale/unicode.cpp
@@ -253,11 +253,22 @@ MultiByteToWideChar(
         goto EXIT;
     }
 
-    // Use minipal_utf8_to_utf16_preallocated on all systems, since it replaces
+    // Use minipal_convert_utf8_to_utf16 on all systems, since it replaces
     // invalid characters and Core Foundation doesn't do that.
     if (CodePage == CP_UTF8 || CodePage == CP_ACP)
     {
-        retval = minipal_utf8_to_utf16_preallocated(lpMultiByteStr, cbMultiByte, &lpWideCharStr, cchWideChar, dwFlags, /* treatAsLE */ false);
+        if (cbMultiByte < 0)
+            cbMultiByte = strlen(lpMultiByteStr) + 1;
+
+        if (!lpWideCharStr || cchWideChar == 0)
+            retval = minipal_get_length_utf8_to_utf16(lpMultiByteStr, cbMultiByte, dwFlags);
+
+        if (lpWideCharStr)
+        {
+            if (cchWideChar == 0) cchWideChar = retval;
+            retval = minipal_convert_utf8_to_utf16(lpMultiByteStr, cbMultiByte, (CHAR16_T*)lpWideCharStr, cchWideChar, dwFlags);
+        }
+
         goto EXIT;
     }
 
@@ -333,11 +344,22 @@ WideCharToMultiByte(
         defaultChar = *lpDefaultChar;
     }
 
-    // Use minipal_utf16_to_utf8_preallocated on all systems because we use
+    // Use minipal_convert_utf16_to_utf8 on all systems because we use
     // UTF8ToUnicode in MultiByteToWideChar() on all systems.
     if (CodePage == CP_UTF8 || CodePage == CP_ACP)
     {
-        retval = minipal_utf16_to_utf8_preallocated(lpWideCharStr, cchWideChar, &lpMultiByteStr, cbMultiByte);
+        if (cchWideChar < 0)
+            cchWideChar = PAL_wcslen(lpWideCharStr) + 1;
+
+        if (!lpMultiByteStr || cbMultiByte == 0)
+            retval = minipal_get_length_utf16_to_utf8((CHAR16_T*)lpWideCharStr, cchWideChar, dwFlags);
+
+        if (lpMultiByteStr)
+        {
+            if (cbMultiByte == 0) cbMultiByte = retval;
+            retval = minipal_convert_utf16_to_utf8((CHAR16_T*)lpWideCharStr, cchWideChar, lpMultiByteStr, cbMultiByte, dwFlags);
+        }
+
         goto EXIT;
     }
 
diff --git a/src/mono/CMakeLists.txt b/src/mono/CMakeLists.txt
index 05cd80948408ea..bc65886aea2f36 100644
--- a/src/mono/CMakeLists.txt
+++ b/src/mono/CMakeLists.txt
@@ -589,14 +589,6 @@ if(GCC)
   if(ENABLE_WERROR)
     append("-Werror" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
   endif()
-
-  # don't link with C++ runtime lib for targets which don't require it (tvOS and iOS need it for ICU)
-  if(NOT LLVM_PREFIX AND NOT HOST_TVOS AND NOT HOST_IOS)
-    append("-nodefaultlibs" CMAKE_CXX_FLAGS)
-    if(NOT HOST_WASM)
-      append("-lc" CMAKE_CXX_FLAGS)
-    endif()
-  endif()
 endif()
 
 ######################################
diff --git a/src/mono/mono/eglib/CMakeLists.txt b/src/mono/mono/eglib/CMakeLists.txt
index 9946145859e030..09cf32eaa81ada 100644
--- a/src/mono/mono/eglib/CMakeLists.txt
+++ b/src/mono/mono/eglib/CMakeLists.txt
@@ -34,12 +34,10 @@ set(eglib_common_sources
     gfile.c
     gfile-posix.c
     gutf8.c
-    ${CLR_SRC_NATIVE_DIR}/minipal/utf8.cpp)
+    ${CLR_SRC_NATIVE_DIR}/minipal/utf8.c)
 
-if(HOST_WIN32)
-set_source_files_properties("${CLR_SRC_NATIVE_DIR}/minipal/utf8.cpp" PROPERTIES COMPILE_FLAGS "/wd4100 /wd4267 /wd4458 /wd4310")
-else()
-set_source_files_properties("${CLR_SRC_NATIVE_DIR}/minipal/utf8.cpp" PROPERTIES COMPILE_FLAGS "-std=c++11 -fno-rtti -fno-exceptions")
+if(IS_BIG_ENDIAN)
+  set_source_files_properties("${CLR_SRC_NATIVE_DIR}/minipal/utf8.c" PROPERTIES COMPILE_FLAGS "-DBIGENDIAN=1")
 endif()
 
 set(eglib_headers
@@ -48,7 +46,7 @@ set(eglib_headers
   gmodule.h)
 
 if(HAVE_CLOCK_NANOSLEEP)
-list(APPEND eglib_common_sources gclock-nanosleep.c)
+  list(APPEND eglib_common_sources gclock-nanosleep.c)
 endif()
 
 set(eglib_sources "${eglib_platform_sources};${eglib_common_sources}")
diff --git a/src/mono/mono/eglib/giconv.c b/src/mono/mono/eglib/giconv.c
index 79c45c8182adea..7863d8cbd35cd6 100644
--- a/src/mono/mono/eglib/giconv.c
+++ b/src/mono/mono/eglib/giconv.c
@@ -28,7 +28,6 @@
 #include <errno.h>
 #include "../utils/mono-errno.h"
 
-typedef gunichar2 char16_t;
 #include <minipal/utf8.h>
 
 #ifdef _MSC_VER
@@ -44,9 +43,6 @@ static FORCE_INLINE (int) decode_utf8 (char *inbuf, size_t inleft, gunichar *out
 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
 #define decode_utf16 decode_utf16le
 #else
-#ifndef BIGENDIAN
-#define BIGENDIAN
-#endif
 #define decode_utf16 decode_utf16be
 #endif
 
@@ -328,27 +324,42 @@ g_utf8_to_ucs4_fast (const gchar *str, glong len, glong *items_written)
 static FORCE_INLINE (void)
 map_error(GError **err)
 {
-	if (errno == 0) return;
-	if (errno == ERROR_INSUFFICIENT_BUFFER) {
+	if (errno == MINIPAL_ERROR_INSUFFICIENT_BUFFER) {
 		g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_MEMORY,
 			     "Allocation failed.");
-	} else if (errno == ERROR_NO_UNICODE_TRANSLATION) {
-		g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
-			     "Illegal byte sequence encountered in the input.");
-	} else {
-		g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
-			     "Partial byte sequence encountered in the input.");
 	}
 }
 
 static gunichar2 *
-g_utf8_to_utf16_impl (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err, int dwFlags, bool treatAsLE)
+g_utf8_to_utf16_impl (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err, int flags, bool treatAsLE)
 {
 	errno = 0;
 	gunichar2* lpDestStr = NULL;
-	int ret = minipal_utf8_to_utf16_allocate (str, len, &lpDestStr, dwFlags, treatAsLE);
+#if G_BYTE_ORDER == G_BIG_ENDIAN
+	if (treatAsLE)
+		flags |= MINIPAL_TREAT_AS_LITTLE_ENDIAN;
+#endif
+
+	if (len < 0)
+		len = (glong)strlen(str) + 1;
+
+	glong ret = (glong)minipal_get_length_utf8_to_utf16 (str, len, flags);
+
+	map_error(err);
+
 	if (items_written)
 		*items_written = errno == 0 ? ret : 0;
+
+	if (ret <= 0)
+		return NULL;
+
+	lpDestStr = malloc((ret + 1) * sizeof(gunichar2));
+	ret = (glong)minipal_convert_utf8_to_utf16 (str, len, lpDestStr, ret, flags);
+    lpDestStr[ret] = '\0';
+
+	if (items_written)
+		*items_written = errno == 0 ? ret : 0;
+
 	map_error(err);
 	return lpDestStr;
 }
@@ -356,8 +367,17 @@ g_utf8_to_utf16_impl (const gchar *str, glong len, glong *items_read, glong *ite
 static gunichar2 *
 g_utf8_to_utf16le_custom_alloc_impl (const gchar *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err, bool treatAsLE)
 {
+	guint flags = 0;
 	errno = 0;
-	int ret = minipal_utf8_to_utf16_preallocated (str, len, 0, 0, 0, /* treatAsLE */ treatAsLE);
+#if G_BYTE_ORDER == G_BIG_ENDIAN
+	if (treatAsLE)
+		flags = MINIPAL_TREAT_AS_LITTLE_ENDIAN;
+#endif
+	if (len < 0)
+		len = (glong)strlen(str) + 1;
+
+	glong ret = (glong)minipal_get_length_utf8_to_utf16 (str, len, flags);
+
 	map_error(err);
 
 	if (items_written)
@@ -366,8 +386,10 @@ g_utf8_to_utf16le_custom_alloc_impl (const gchar *str, glong len, glong *items_r
 	if (ret <= 0)
 		return NULL;
 
-	gunichar2* lpDestStr = custom_alloc_func((ret + 1) * sizeof (gunichar2), custom_alloc_data);
-	ret = minipal_utf8_to_utf16_preallocated (str, len, &lpDestStr, ret, MB_ERR_INVALID_CHARS, /* treatAsLE */ treatAsLE);
+	gunichar2 *lpDestStr = custom_alloc_func((ret + 1) * sizeof (gunichar2), custom_alloc_data);
+	flags |= MINIPAL_MB_NO_REPLACE_INVALID_CHARS;
+	ret = (glong)minipal_convert_utf8_to_utf16 (str, len, lpDestStr, ret, flags);
+
 	map_error(err);
 	return lpDestStr;
 }
@@ -375,13 +397,13 @@ g_utf8_to_utf16le_custom_alloc_impl (const gchar *str, glong len, glong *items_r
 gunichar2 *
 g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return g_utf8_to_utf16_impl (str, len, items_read, items_written, err, MB_ERR_INVALID_CHARS, false);
+	return g_utf8_to_utf16_impl (str, len, items_read, items_written, err, MINIPAL_MB_NO_REPLACE_INVALID_CHARS, false);
 }
 
 gunichar2 *
 g_utf8_to_utf16le (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
 {
-	return g_utf8_to_utf16_impl (str, len, items_read, items_written, err, MB_ERR_INVALID_CHARS, true);
+	return g_utf8_to_utf16_impl (str, len, items_read, items_written, err, MINIPAL_MB_NO_REPLACE_INVALID_CHARS, true);
 }
 
 gunichar2 *
@@ -477,9 +499,31 @@ g_utf8_to_ucs4 (const gchar *str, glong len, glong *items_read, glong *items_wri
 static gchar *
 g_utf16_to_utf8_impl (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **err, bool treatAsLE)
 {
+	guint flags = 0;
 	errno = 0;
 	gchar* lpDestStr = NULL;
-	int ret = minipal_utf16_to_utf8_allocate (str, len, &lpDestStr, treatAsLE);
+#if G_BYTE_ORDER == G_BIG_ENDIAN
+	if (treatAsLE)
+		flags |= MINIPAL_TREAT_AS_LITTLE_ENDIAN;
+#endif
+	if (len < 0) {
+		len = 0;
+		while (str[len])
+			len++;
+	}
+
+	glong ret = (glong)minipal_get_length_utf16_to_utf8 (str, len, flags);
+	map_error(err);
+
+	if (items_written)
+		*items_written = errno == 0 ? ret : 0;
+
+	if (ret <= 0)
+		return NULL;
+
+	lpDestStr = (gchar *)malloc((ret + 1) * sizeof(gchar));
+	ret = (glong)minipal_convert_utf16_to_utf8 (str, len, lpDestStr, ret, flags);
+	lpDestStr[ret] = '\0';
 
 	if (items_written)
 		*items_written = errno == 0 ? ret : 0;
@@ -504,7 +548,14 @@ gchar *
 g_utf16_to_utf8_custom_alloc (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GCustomAllocator custom_alloc_func, gpointer custom_alloc_data, GError **err)
 {
 	errno = 0;
-	int ret = minipal_utf16_to_utf8_preallocated (str, len, 0, 0);
+
+	if (len < 0) {
+		len = 0;
+		while (str[len])
+			len++;
+	}
+
+	glong ret = (glong)minipal_get_length_utf16_to_utf8 (str, len, 0);
 	map_error(err);
 
 	if (items_written)
@@ -513,8 +564,9 @@ g_utf16_to_utf8_custom_alloc (const gunichar2 *str, glong len, glong *items_read
 	if (ret <= 0)
 		return NULL;
 
-	gchar* lpDestStr = custom_alloc_func((ret + 1) * sizeof (gunichar2), custom_alloc_data);
-	ret = minipal_utf16_to_utf8_preallocated (str, len, &lpDestStr, ret);
+	gchar *lpDestStr = custom_alloc_func((ret + 1) * sizeof (gunichar2), custom_alloc_data);
+	ret = (glong)minipal_convert_utf16_to_utf8 (str, len, lpDestStr, ret, 0);
+
 	map_error(err);
 	return lpDestStr;
 }
diff --git a/src/mono/mono/metadata/object.c b/src/mono/mono/metadata/object.c
index b267334250c6c2..b0289cebf414ae 100644
--- a/src/mono/mono/metadata/object.c
+++ b/src/mono/mono/metadata/object.c
@@ -327,7 +327,7 @@ get_type_init_exception_for_vtable (MonoVTable *vtable)
 
 	mono_mem_manager_init_reflection_hashes (mem_manager);
 
-	/* 
+	/*
 	 * If the initializing thread was rudely aborted, the exception is not stored
 	 * in the hash.
 	 */
diff --git a/src/mono/mono/mini/CMakeLists.txt b/src/mono/mono/mini/CMakeLists.txt
index 9df1cb56f14fa2..6f5e8507315dbf 100644
--- a/src/mono/mono/mini/CMakeLists.txt
+++ b/src/mono/mono/mini/CMakeLists.txt
@@ -24,10 +24,7 @@ set(OS_LIBS "-framework CoreFoundation" "-lobjc" "-lc++")
 elseif(HOST_ANDROID)
 set(OS_LIBS m dl log)
 elseif(HOST_LINUX)
-set(OS_LIBS pthread m dl gcc_s)
-  if(NOT CLR_CMAKE_HOST_LINUX_MUSL) # glibc build also requires libc_nonshared.a for atexit(3) usage
-    set(OS_LIBS ${OS_LIBS} -l:libc_nonshared.a)
-  endif()
+set(OS_LIBS pthread m dl)
 elseif(HOST_WIN32)
 set(OS_LIBS bcrypt.lib Mswsock.lib ws2_32.lib psapi.lib version.lib advapi32.lib winmm.lib kernel32.lib)
 elseif(HOST_SOLARIS)
@@ -554,7 +551,7 @@ if(NOT DISABLE_EXECUTABLES)
   target_link_libraries(mono-sgen PRIVATE monoapi eglib_api monosgen-static)
   if(HAVE_ICU_SHIM)
     target_link_libraries(mono-sgen PRIVATE icu_shim_objects)
-  endif() 
+  endif()
   target_link_libraries(mono-sgen PRIVATE ${OS_LIBS} ${LLVM_LIBS} ${ICU_LIBS} ${Z_LIBS})
   # Alpine Linux implements ucontext in a different library
   if(CLR_CMAKE_HOST_ALPINE_LINUX AND TARGET_S390X)
diff --git a/src/mono/mono/profiler/CMakeLists.txt b/src/mono/mono/profiler/CMakeLists.txt
index f826371103b6f2..6bca00983686fc 100644
--- a/src/mono/mono/profiler/CMakeLists.txt
+++ b/src/mono/mono/profiler/CMakeLists.txt
@@ -14,7 +14,7 @@ if(NOT DISABLE_LIBS)
     # Build the logging profiler only for certain platforms
     add_library(mono-profiler-log SHARED helper.c log.c log-args.c ${ZLIB_SOURCES})
     target_compile_definitions(mono-profiler-log PRIVATE -DMONO_DLL_EXPORT)
-    target_link_libraries(mono-profiler-log PRIVATE monosgen-shared monoapi eglib_objects ${CMAKE_DL_LIBS})
+    target_link_libraries(mono-profiler-log PRIVATE monosgen-shared monoapi eglib_objects)
     if(HOST_ANDROID)
       target_link_libraries(mono-profiler-log PRIVATE log)
     endif()
diff --git a/src/native/minipal/utf8.c b/src/native/minipal/utf8.c
new file mode 100644
index 00000000000000..bacad116efcd8a
--- /dev/null
+++ b/src/native/minipal/utf8.c
@@ -0,0 +1,2140 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include <minipal/utf8.h>
+
+#include <errno.h>
+#include <limits.h>
+#include <string.h>
+#include <assert.h>
+
+#define HIGH_SURROGATE_START 0xd800
+#define HIGH_SURROGATE_END 0xdbff
+#define LOW_SURROGATE_START 0xdc00
+#define LOW_SURROGATE_END 0xdfff
+
+// Test if the wide character is a high surrogate
+static bool IsHighSurrogate(const CHAR16_T c)
+{
+    return (c & 0xFC00) == HIGH_SURROGATE_START;
+}
+
+// Test if the wide character is a low surrogate
+static bool IsLowSurrogate(const CHAR16_T c)
+{
+    return (c & 0xFC00) == LOW_SURROGATE_START;
+}
+
+// Test if the wide character is a surrogate half
+static bool IsSurrogate(const CHAR16_T c)
+{
+    return (c & 0xF800) == HIGH_SURROGATE_START;
+}
+
+typedef struct
+{
+    // Store our default string
+    unsigned char* byteStart;
+    CHAR16_T* charEnd;
+    const CHAR16_T strDefault[2];
+    int strDefaultLength;
+    int fallbackCount;
+    int fallbackIndex;
+} DecoderBuffer;
+
+static CHAR16_T DecoderReplacementFallbackBuffer_GetNextChar(DecoderBuffer* self)
+{
+    // We want it to get < 0 because == 0 means that the current/last character is a fallback
+    // and we need to detect recursion.  We could have a flag but we already have this counter.
+    self->fallbackCount--;
+    self->fallbackIndex++;
+
+    // Do we have anything left? 0 is now last fallback char, negative is nothing left
+    if (self->fallbackCount < 0)
+        return '\0';
+
+    // Need to get it out of the buffer.
+    // Make sure it didn't wrap from the fast count-- path
+    if (self->fallbackCount == INT_MAX)
+    {
+        self->fallbackCount = -1;
+        return '\0';
+    }
+
+    // Now make sure its in the expected range
+    assert(self->fallbackIndex < self->strDefaultLength && self->fallbackIndex >= 0);
+
+    return self->strDefault[self->fallbackIndex];
+}
+
+// Fallback Methods
+static bool DecoderReplacementFallbackBuffer_Fallback(DecoderBuffer* self)
+{
+    // We expect no previous fallback in our buffer
+    // We can't call recursively but others might (note, we don't test on last char!!!)
+    assert(self->fallbackCount < 1);
+
+    // Go ahead and get our fallback
+    if (self->strDefaultLength == 0)
+        return false;
+
+    self->fallbackCount = self->strDefaultLength;
+    self->fallbackIndex = -1;
+
+    return true;
+}
+
+// Fallback the current byte by sticking it into the remaining char buffer.
+// This can only be called by our encodings (other have to use the public fallback methods), so
+// we can use our DecoderNLS here too (except we don't).
+// Returns true if we are successful, false if we can't fallback the character (no buffer space)
+// So caller needs to throw buffer space if return false.
+// Right now this has both bytes and bytes[], since we might have extra bytes, hence the
+// array, and we might need the index, hence the byte*
+// Don't touch ref chars unless we succeed
+static bool DecoderReplacementFallbackBuffer_InternalFallback_Copy(DecoderBuffer* self, CHAR16_T** chars)
+{
+    assert(self->byteStart != NULL);
+
+    bool fallbackResult = DecoderReplacementFallbackBuffer_Fallback(self);
+
+    // See if there's a fallback character and we have an output buffer then copy our string.
+    if (fallbackResult)
+    {
+        // Copy the chars to our output
+        CHAR16_T ch;
+        CHAR16_T* charTemp = *chars;
+        bool bHighSurrogate = false;
+        (void)bHighSurrogate; // unused in release build
+        while ((ch = DecoderReplacementFallbackBuffer_GetNextChar(self)) != 0)
+        {
+            // Make sure no mixed up surrogates
+            if (IsSurrogate(ch))
+            {
+                if (IsHighSurrogate(ch))
+                {
+                    // High Surrogate
+                    assert(!bHighSurrogate);
+                    bHighSurrogate = true;
+                }
+                else
+                {
+                    // Low surrogate
+                    assert(bHighSurrogate);
+                    bHighSurrogate = false;
+                }
+            }
+
+            if (charTemp >= self->charEnd)
+            {
+                // No buffer space
+                return false;
+            }
+
+            *(charTemp++) = ch;
+        }
+
+        // Need to make sure that bHighSurrogate isn't true
+        assert(!bHighSurrogate);
+
+        // Now we aren't going to be false, so its OK to update chars
+        *chars = charTemp;
+    }
+
+    return true;
+}
+
+// Clear the buffer
+static void DecoderReplacementFallbackBuffer_Reset(DecoderBuffer* self)
+{
+    self->fallbackCount = -1;
+    self->fallbackIndex = -1;
+    self->byteStart = NULL;
+}
+
+// Set the above values
+static void DecoderBuffer_InternalInitialize(DecoderBuffer* self, unsigned char* byteStart, CHAR16_T* charEnd)
+{
+    self->byteStart = byteStart;
+    self->charEnd = charEnd;
+}
+
+typedef struct
+{
+    const CHAR16_T strDefault[3];
+    int strDefaultLength;
+    CHAR16_T* charStart;
+    CHAR16_T* charEnd;
+    bool setEncoder;
+    bool bUsedEncoder;
+    bool bFallingBack;
+    int iRecursionCount;
+    int fallbackCount;
+    int fallbackIndex;
+} EncoderBuffer;
+
+#define MAX_RECURSION 250
+
+// Set the above values
+// This can't be part of the constructor because EncoderFallbacks would have to know how to implement these.
+static void EncoderReplacementFallbackBuffer_InternalInitialize(EncoderBuffer* self, CHAR16_T* charStart, CHAR16_T* charEnd, bool setEncoder)
+{
+    self->charStart = charStart;
+    self->charEnd = charEnd;
+    self->setEncoder = setEncoder;
+    self->bUsedEncoder = false;
+    self->bFallingBack = false;
+    self->iRecursionCount = 0;
+}
+
+static CHAR16_T EncoderReplacementFallbackBuffer_InternalGetNextChar(EncoderBuffer* self)
+{
+    // We want it to get < 0 because == 0 means that the current/last character is a fallback
+    // and we need to detect recursion.  We could have a flag but we already have this counter.
+    self->fallbackCount--;
+    self->fallbackIndex++;
+
+    // Do we have anything left? 0 is now last fallback char, negative is nothing left
+    if (self->fallbackCount < 0)
+        return '\0';
+
+    // Need to get it out of the buffer.
+    // Make sure it didn't wrap from the fast count-- path
+    if (self->fallbackCount == INT_MAX)
+    {
+        self->fallbackCount = -1;
+        return '\0';
+    }
+
+    // Now make sure its in the expected range
+    assert(self->fallbackIndex < self->strDefaultLength && self->fallbackIndex >= 0);
+
+    CHAR16_T ch = self->strDefault[self->fallbackIndex];
+    self->bFallingBack = (ch != 0);
+    if (ch == 0) self->iRecursionCount = 0;
+    return ch;
+}
+
+// Fallback Methods
+static bool EncoderReplacementFallbackBuffer_Fallback(EncoderBuffer* self)
+{
+    // If we had a buffer already we're being recursive, throw, it's probably at the suspect
+    // character in our array.
+    assert(self->fallbackCount < 1);
+
+    // Go ahead and get our fallback
+    // Divide by 2 because we aren't a surrogate pair
+    self->fallbackCount = self->strDefaultLength / 2;
+    self->fallbackIndex = -1;
+
+    return self->fallbackCount != 0;
+}
+
+static bool EncoderReplacementFallbackBuffer_Fallback_Unknown(EncoderBuffer* self)
+{
+    // If we had a buffer already we're being recursive, throw, it's probably at the suspect
+    // character in our array.
+    assert(self->fallbackCount < 1);
+
+    // Go ahead and get our fallback
+    self->fallbackCount = self->strDefaultLength;
+    self->fallbackIndex = -1;
+
+    return self->fallbackCount != 0;
+}
+
+// Fallback the current character using the remaining buffer and encoder if necessary
+// This can only be called by our encodings (other have to use the public fallback methods), so
+// we can use our EncoderNLS here too.
+// setEncoder is true if we're calling from a GetBytes method, false if we're calling from a GetByteCount
+//
+// Note that this could also change the contents of self->buffer.encoder, which is the same
+// object that the caller is using, so the caller could mess up the encoder for us
+// if they aren't careful.
+static bool EncoderReplacementFallbackBuffer_InternalFallback(EncoderBuffer* self, CHAR16_T ch, CHAR16_T** chars)
+{
+    // Shouldn't have null charStart
+    assert(self->charStart != NULL);
+
+    // See if it was a high surrogate
+    if (IsHighSurrogate(ch))
+    {
+        // See if there's a low surrogate to go with it
+        if (*chars >= self->charEnd)
+        {
+            // Nothing left in input buffer
+            // No input, return 0
+        }
+        else
+        {
+            // Might have a low surrogate
+            CHAR16_T cNext = **chars;
+            if (IsLowSurrogate(cNext))
+            {
+                // If already falling back then fail
+                assert(!self->bFallingBack || self->iRecursionCount++ <= MAX_RECURSION);
+
+                // Next is a surrogate, add it as surrogate pair, and increment chars
+                (*chars)++;
+                self->bFallingBack = EncoderReplacementFallbackBuffer_Fallback_Unknown(self);
+                return self->bFallingBack;
+            }
+
+            // Next isn't a low surrogate, just fallback the high surrogate
+        }
+    }
+
+    // If already falling back then fail
+    assert(!self->bFallingBack || self->iRecursionCount++ <= MAX_RECURSION);
+
+    // Fall back our char
+    self->bFallingBack = EncoderReplacementFallbackBuffer_Fallback(self);
+
+    return self->bFallingBack;
+}
+
+static bool EncoderReplacementFallbackBuffer_MovePrevious(EncoderBuffer* self)
+{
+    // Back up one, only if we just processed the last character (or earlier)
+    if (self->fallbackCount >= -1 && self->fallbackIndex >= 0)
+    {
+        self->fallbackIndex--;
+        self->fallbackCount++;
+        return true;
+    }
+
+    // Return false 'cause we couldn't do it.
+    return false;
+}
+
+typedef struct
+{
+    union
+    {
+        DecoderBuffer decoder;
+        EncoderBuffer encoder;
+    } buffer;
+
+    bool useFallback;
+
+#if BIGENDIAN
+    bool treatAsLE;
+#endif
+} UTF8Encoding;
+
+// These are bitmasks used to maintain the state in the decoder. They occupy the higher bits
+// while the actual character is being built in the lower bits. They are shifted together
+// with the actual bits of the character.
+
+// bits 30 & 31 are used for pending bits fixup
+#define FinalByte (1 << 29)
+#define SupplimentarySeq (1 << 28)
+#define ThreeByteSeq (1 << 27)
+
+static bool InRange(int c, int begin, int end)
+{
+    return begin <= c && c <= end;
+}
+
+// During GetChars we had an invalid byte sequence
+// pSrc is backed up to the start of the bad sequence if we didn't have room to
+// fall it back.  Otherwise pSrc remains where it is.
+static bool FallbackInvalidByteSequence_Copy(UTF8Encoding* self, unsigned char** pSrc, CHAR16_T** pTarget)
+{
+    assert(self->useFallback);
+
+    // Get our byte[]
+    unsigned char* pStart = *pSrc;
+    bool fallbackResult = DecoderReplacementFallbackBuffer_InternalFallback_Copy(&self->buffer.decoder, pTarget);
+
+    // Do the actual fallback
+    if (!fallbackResult)
+    {
+        // Oops, it failed, back up to pStart
+        *pSrc = pStart;
+        return false;
+    }
+
+    // It worked
+    return true;
+}
+
+static size_t GetCharCount(UTF8Encoding* self, unsigned char* bytes, size_t count)
+{
+    assert(bytes != NULL);
+    assert(count >= 0);
+
+    // Initialize stuff
+    unsigned char *pSrc = bytes;
+    unsigned char *pEnd = pSrc + count;
+    int availableBytes, chc;
+
+    // Start by assuming we have as many as count, charCount always includes the adjustment
+    // for the character being decoded
+    size_t charCount = count;
+    int ch = 0;
+    bool fallbackUsed = false;
+
+    while (true)
+    {
+        // SLOWLOOP: does all range checks, handles all special cases, but it is slow
+        if (pSrc >= pEnd) break;
+
+        // read next byte. The JIT optimization seems to be getting confused when
+        // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
+        int cha = *pSrc;
+
+        // no pending bits
+        if (ch == 0) goto ReadChar;
+
+        pSrc++;
+
+        // we are expecting to see trailing bytes like 10vvvvvv
+        if ((cha & 0xC0) != 0x80)
+        {
+            // This can be a valid starting byte for another UTF8 byte sequence, so let's put
+            // the current byte back, and try to see if this is a valid byte for another UTF8 byte sequence
+            pSrc--;
+            charCount += (ch >> 30);
+            goto InvalidByteSequence;
+        }
+
+        // fold in the new byte
+        ch = (ch << 6) | (cha & 0x3F);
+
+        if ((ch & FinalByte) == 0)
+        {
+            assert((ch & (SupplimentarySeq | ThreeByteSeq)) != 0);
+
+            if ((ch & SupplimentarySeq) != 0)
+            {
+                if ((ch & (FinalByte >> 6)) != 0)
+                {
+                    // this is 3rd byte (of 4 byte supplimentary) - nothing to do
+                    continue;
+                }
+
+                // 2nd byte, check for non-shortest form of supplimentary char and the valid
+                // supplimentary characters in range 0x010000 - 0x10FFFF at the same time
+                if (!InRange(ch & 0x1F0, 0x10, 0x100))
+                {
+                    goto InvalidByteSequence;
+                }
+            }
+            else
+            {
+                // Must be 2nd byte of a 3-byte sequence
+                // check for non-shortest form of 3 byte seq
+                if ((ch & (0x1F << 5)) == 0 ||                  // non-shortest form
+                    (ch & (0xF800 >> 6)) == (0xD800 >> 6))     // illegal individually encoded surrogate
+                {
+                    goto InvalidByteSequence;
+                }
+            }
+            continue;
+        }
+
+        // ready to punch
+
+        // adjust for surrogates in non-shortest form
+        if ((ch & (SupplimentarySeq | 0x1F0000)) == SupplimentarySeq) charCount--;
+
+        goto EncodeChar;
+
+    InvalidByteSequence:
+        // this code fragment should be close to the gotos referencing it
+        // Have to do fallback for invalid bytes
+        if (!fallbackUsed)
+        {
+            fallbackUsed = true;
+            if (self->useFallback) DecoderBuffer_InternalInitialize(&self->buffer.decoder, bytes, NULL);
+        }
+        charCount += self->buffer.decoder.strDefaultLength;
+
+        ch = 0;
+        continue;
+
+    ReadChar:
+        ch = *pSrc;
+        pSrc++;
+
+    ProcessChar:
+        if (ch > 0x7F)
+        {
+            // If its > 0x7F, its start of a new multi-byte sequence
+
+            // Long sequence, so unreserve our char.
+            charCount--;
+
+            // bit 6 has to be non-zero for start of multibyte chars.
+            if ((ch & 0x40) == 0) goto InvalidByteSequence;
+
+            // start a new long code
+            if ((ch & 0x20) != 0)
+            {
+                if ((ch & 0x10) != 0)
+                {
+                    // 4 byte encoding - supplimentary character (2 surrogates)
+
+                    ch &= 0x0F;
+
+                    // check that bit 4 is zero and the valid supplimentary character
+                    // range 0x000000 - 0x10FFFF at the same time
+                    if (ch > 0x04)
+                    {
+                        ch |= 0xf0;
+                        goto InvalidByteSequence;
+                    }
+
+                    // Add bit flags so that when we check new characters & rotate we'll be flagged correctly.
+                    // Final byte flag, count fix if we don't make final byte & supplimentary sequence flag.
+                    ch |= (FinalByte >> 3 * 6) |  // Final byte is 3 more bytes from now
+                        (1 << 30) |           // If it dies on next byte we'll need an extra char
+                        (3 << (30 - 2 * 6)) |     // If it dies on last byte we'll need to subtract a char
+                        (SupplimentarySeq) | (SupplimentarySeq >> 6) |
+                        (SupplimentarySeq >> 2 * 6) | (SupplimentarySeq >> 3 * 6);
+
+                    // Our character count will be 2 characters for these 4 bytes, so subtract another char
+                    charCount--;
+                }
+                else
+                {
+                    // 3 byte encoding
+                    // Add bit flags so that when we check new characters & rotate we'll be flagged correctly.
+                    ch = (ch & 0x0F) | ((FinalByte >> 2 * 6) | (1 << 30) |
+                        (ThreeByteSeq) | (ThreeByteSeq >> 6) | (ThreeByteSeq >> 2 * 6));
+
+                    // We'll expect 1 character for these 3 bytes, so subtract another char.
+                    charCount--;
+                }
+            }
+            else
+            {
+                // 2 byte encoding
+
+                ch &= 0x1F;
+
+                // check for non-shortest form
+                if (ch <= 1)
+                {
+                    ch |= 0xc0;
+                    goto InvalidByteSequence;
+                }
+
+                // Add bit flags so we'll be flagged correctly
+                ch |= (FinalByte >> 6);
+            }
+            continue;
+        }
+
+    EncodeChar:
+
+        availableBytes = pEnd - pSrc;
+
+        // don't fall into the fast decoding loop if we don't have enough bytes
+        if (availableBytes <= 13)
+        {
+            // try to get over the remainder of the ascii characters fast though
+            unsigned char* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
+            while (pSrc < pLocalEnd)
+            {
+                ch = *pSrc;
+                pSrc++;
+
+                if (ch > 0x7F)
+                    goto ProcessChar;
+            }
+            // we are done
+            ch = 0;
+            break;
+        }
+
+        // To compute the upper bound, assume that all characters are ASCII characters at this point,
+        //  the boundary will be decreased for every non-ASCII character we encounter
+        // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
+        unsigned char *pStop = pSrc + availableBytes - 7;
+
+        while (pSrc < pStop)
+        {
+            ch = *pSrc;
+            pSrc++;
+
+            if (ch > 0x7F)
+            {
+                goto LongCode;
+            }
+
+            // get pSrc 2-byte aligned
+            if (((size_t)pSrc & 0x1) != 0)
+            {
+                ch = *pSrc;
+                pSrc++;
+                if (ch > 0x7F)
+                {
+                    goto LongCode;
+                }
+            }
+
+            // get pSrc 4-byte aligned
+            if (((size_t)pSrc & 0x2) != 0)
+            {
+                ch = *(unsigned short*)pSrc;
+                if ((ch & 0x8080) != 0)
+                {
+                    goto LongCodeWithMask16;
+                }
+                pSrc += 2;
+            }
+
+
+            // Run 8 + 8 characters at a time!
+            while (pSrc < pStop)
+            {
+                ch = *(int*)pSrc;
+                int chb = *(int*)(pSrc + 4);
+                if (((ch | chb) & (int)0x80808080) != 0)
+                {
+                    goto LongCodeWithMask32;
+                }
+                pSrc += 8;
+
+                // This is a really small loop - unroll it
+                if (pSrc >= pStop)
+                    break;
+
+                ch = *(int*)pSrc;
+                chb = *(int*)(pSrc + 4);
+                if (((ch | chb) & (int)0x80808080) != 0)
+                {
+                    goto LongCodeWithMask32;
+                }
+                pSrc += 8;
+            }
+            break;
+
+        LongCodeWithMask32 :
+#if BIGENDIAN
+        // be careful about the sign extension
+        if (!self->treatAsLE) ch = (int)(((unsigned int)ch) >> 16);
+        else
+#endif
+        ch &= 0xFF;
+
+        LongCodeWithMask16:
+#if BIGENDIAN
+        if (!self->treatAsLE) ch = (int)(((unsigned int)ch) >> 8);
+        else
+#endif
+        ch &= 0xFF;
+
+        pSrc++;
+        if (ch <= 0x7F)
+        {
+            continue;
+        }
+
+        LongCode:
+            chc = *pSrc;
+            pSrc++;
+
+            if (
+                // bit 6 has to be zero
+                (ch & 0x40) == 0 ||
+                // we are expecting to see trailing bytes like 10vvvvvv
+                (chc & 0xC0) != 0x80)
+            {
+                goto BadLongCode;
+            }
+
+            chc &= 0x3F;
+
+            // start a new long code
+            if ((ch & 0x20) != 0)
+            {
+                // fold the first two bytes together
+                chc |= (ch & 0x0F) << 6;
+
+                if ((ch & 0x10) != 0)
+                {
+                    // 4 byte encoding - surrogate
+                    ch = *pSrc;
+                    if (
+                        // check that bit 4 is zero, the non-shortest form of surrogate
+                        // and the valid surrogate range 0x000000 - 0x10FFFF at the same time
+                        !InRange(chc >> 4, 0x01, 0x10) ||
+                        // we are expecting to see trailing bytes like 10vvvvvv
+                        (ch & 0xC0) != 0x80)
+                    {
+                        goto BadLongCode;
+                    }
+
+                    chc = (chc << 6) | (ch & 0x3F);
+
+                    ch = *(pSrc + 1);
+                    // we are expecting to see trailing bytes like 10vvvvvv
+                    if ((ch & 0xC0) != 0x80)
+                    {
+                        goto BadLongCode;
+                    }
+                    pSrc += 2;
+
+                    // extra byte
+                    charCount--;
+                }
+                else
+                {
+                    // 3 byte encoding
+                    ch = *pSrc;
+                    if (
+                        // check for non-shortest form of 3 byte seq
+                        (chc & (0x1F << 5)) == 0 ||
+                        // Can't have surrogates here.
+                        (chc & (0xF800 >> 6)) == (0xD800 >> 6) ||
+                        // we are expecting to see trailing bytes like 10vvvvvv
+                        (ch & 0xC0) != 0x80)
+                    {
+                        goto BadLongCode;
+                    }
+                    pSrc++;
+
+                    // extra byte
+                    charCount--;
+                }
+            }
+            else
+            {
+                // 2 byte encoding
+
+                // check for non-shortest form
+                if ((ch & 0x1E) == 0) goto BadLongCode;
+            }
+
+            // extra byte
+            charCount--;
+        }
+
+        // no pending bits at this point
+        ch = 0;
+        continue;
+
+    BadLongCode:
+        pSrc -= 2;
+        ch = 0;
+        continue;
+    }
+
+    // May have a problem if we have to flush
+    if (ch != 0)
+    {
+        // We were already adjusting for these, so need to unadjust
+        charCount += (ch >> 30);
+        // Have to do fallback for invalid bytes
+        if (!fallbackUsed)
+        {
+            fallbackUsed = true;
+            if (self->useFallback) DecoderBuffer_InternalInitialize(&self->buffer.decoder, bytes, NULL);
+        }
+        charCount += self->buffer.decoder.strDefaultLength;
+    }
+
+    // Shouldn't have anything in fallback buffer for GetCharCount
+    // (don't have to check m_throwOnOverflow for count)
+    assert(!fallbackUsed || !self->useFallback || self->buffer.decoder.fallbackCount < 0);
+
+    return charCount;
+}
+
+static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount, CHAR16_T* chars, size_t charCount)
+{
+    assert(chars != NULL);
+    assert(byteCount >= 0);
+    assert(charCount >= 0);
+    assert(bytes != NULL);
+
+    unsigned char *pSrc = bytes;
+    CHAR16_T *pTarget = chars;
+
+    unsigned char *pEnd = pSrc + byteCount;
+    CHAR16_T *pAllocatedBufferEnd = pTarget + charCount;
+
+    int ch = 0;
+    int chc;
+
+    bool fallbackUsed = false;
+
+    while (true)
+    {
+        // SLOWLOOP: does all range checks, handles all special cases, but it is slow
+
+        if (pSrc >= pEnd) break;
+
+        // read next byte. The JIT optimization seems to be getting confused when
+        // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
+        int cha = *pSrc;
+
+        if (ch == 0)
+        {
+            // no pending bits
+            goto ReadChar;
+        }
+
+        pSrc++;
+
+        // we are expecting to see trailing bytes like 10vvvvvv
+        if ((cha & 0xC0) != 0x80)
+        {
+            // This can be a valid starting byte for another UTF8 byte sequence, so let's put
+            // the current byte back, and try to see if this is a valid byte for another UTF8 byte sequence
+            pSrc--;
+            goto InvalidByteSequence;
+        }
+
+        // fold in the new byte
+        ch = (ch << 6) | (cha & 0x3F);
+
+        if ((ch & FinalByte) == 0)
+        {
+            // Not at last byte yet
+            assert((ch & (SupplimentarySeq | ThreeByteSeq)) != 0);
+
+            if ((ch & SupplimentarySeq) != 0)
+            {
+                // Its a 4-byte supplimentary sequence
+                if ((ch & (FinalByte >> 6)) != 0)
+                {
+                    // this is 3rd byte of 4 byte sequence - nothing to do
+                    continue;
+                }
+
+                // 2nd byte of 4 bytes
+                // check for non-shortest form of surrogate and the valid surrogate
+                // range 0x000000 - 0x10FFFF at the same time
+                if (!InRange(ch & 0x1F0, 0x10, 0x100))
+                {
+                    goto InvalidByteSequence;
+                }
+            }
+            else
+            {
+                // Must be 2nd byte of a 3-byte sequence
+                // check for non-shortest form of 3 byte seq
+                if ((ch & (0x1F << 5)) == 0 ||                  // non-shortest form
+                    (ch & (0xF800 >> 6)) == (0xD800 >> 6))     // illegal individually encoded surrogate
+                {
+                    goto InvalidByteSequence;
+                }
+            }
+            continue;
+        }
+
+        // ready to punch
+
+        // surrogate in shortest form?
+        // Might be possible to get rid of this?  Already did non-shortest check for 4-byte sequence when reading 2nd byte?
+        if ((ch & (SupplimentarySeq | 0x1F0000)) > SupplimentarySeq)
+        {
+            // let the range check for the second char throw the exception
+            if (pTarget < pAllocatedBufferEnd)
+            {
+                *pTarget = (CHAR16_T)(((ch >> 10) & 0x7FF) +
+                    (HIGH_SURROGATE_START - (0x10000 >> 10)));
+                pTarget++;
+
+                ch = (ch & 0x3FF) +
+                    (int)(LOW_SURROGATE_START);
+            }
+        }
+
+        goto EncodeChar;
+
+    InvalidByteSequence:
+        // this code fragment should be close to the gotos referencing it
+        // Have to do fallback for invalid bytes
+        if (!fallbackUsed)
+        {
+            fallbackUsed = true;
+            if (self->useFallback) DecoderBuffer_InternalInitialize(&self->buffer.decoder, bytes, pAllocatedBufferEnd);
+        }
+
+        // That'll back us up the appropriate # of bytes if we didn't get anywhere
+        if (!FallbackInvalidByteSequence_Copy(self, &pSrc, &pTarget))
+        {
+            // Check if we ran out of buffer space
+            assert(pSrc >= bytes || pTarget == chars);
+
+            if (self->useFallback) DecoderReplacementFallbackBuffer_Reset(&self->buffer.decoder);
+            if (pTarget == chars)
+            {
+                errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER;
+                return 0;
+            }
+            ch = 0;
+            break;
+        }
+
+        assert(pSrc >= bytes);
+
+        ch = 0;
+        continue;
+
+    ReadChar:
+        ch = *pSrc;
+        pSrc++;
+
+    ProcessChar:
+        if (ch > 0x7F)
+        {
+            // If its > 0x7F, its start of a new multi-byte sequence
+
+            // bit 6 has to be non-zero
+            if ((ch & 0x40) == 0) goto InvalidByteSequence;
+
+            // start a new long code
+            if ((ch & 0x20) != 0)
+            {
+                if ((ch & 0x10) != 0)
+                {
+                    // 4 byte encoding - supplimentary character (2 surrogates)
+
+                    ch &= 0x0F;
+
+                    // check that bit 4 is zero and the valid supplimentary character
+                    // range 0x000000 - 0x10FFFF at the same time
+                    if (ch > 0x04)
+                    {
+                        ch |= 0xf0;
+                        goto InvalidByteSequence;
+                    }
+
+                    ch |= (FinalByte >> 3 * 6) | (1 << 30) | (3 << (30 - 2 * 6)) |
+                        (SupplimentarySeq) | (SupplimentarySeq >> 6) |
+                        (SupplimentarySeq >> 2 * 6) | (SupplimentarySeq >> 3 * 6);
+                }
+                else
+                {
+                    // 3 byte encoding
+                    ch = (ch & 0x0F) | ((FinalByte >> 2 * 6) | (1 << 30) |
+                        (ThreeByteSeq) | (ThreeByteSeq >> 6) | (ThreeByteSeq >> 2 * 6));
+                }
+            }
+            else
+            {
+                // 2 byte encoding
+
+                ch &= 0x1F;
+
+                // check for non-shortest form
+                if (ch <= 1)
+                {
+                    ch |= 0xc0;
+                    goto InvalidByteSequence;
+                }
+
+                ch |= (FinalByte >> 6);
+            }
+            continue;
+        }
+
+    EncodeChar:
+        // write the pending character
+        if (pTarget >= pAllocatedBufferEnd)
+        {
+            // Fix chars so we make sure to throw if we didn't output anything
+            ch &= 0x1fffff;
+            if (ch > 0x7f)
+            {
+                if (ch > 0x7ff)
+                {
+                    if (ch >= LOW_SURROGATE_START &&
+                        ch <= LOW_SURROGATE_END)
+                    {
+                        pSrc--;     // It was 4 bytes
+                        pTarget--;  // 1 was stored already, but we can't remember 1/2, so back up
+                    }
+                    else if (ch > 0xffff)
+                    {
+                        pSrc--;     // It was 4 bytes, nothing was stored
+                    }
+                    pSrc--;         // It was at least 3 bytes
+                }
+                pSrc--;             // It was at least 2 bytes
+            }
+            pSrc--;
+
+            // Throw that we don't have enough room (pSrc could be < chars if we had started to process
+            // a 4 byte sequence already)
+            assert(pSrc >= bytes || pTarget == chars);
+
+            if (pTarget == chars)
+            {
+                errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER;
+                return 0;
+            }
+
+            // Don't store ch in decoder, we already backed up to its start
+            ch = 0;
+
+            // Didn't throw, just use this buffer size.
+            break;
+        }
+        *pTarget = (CHAR16_T)ch;
+        pTarget++;
+
+        int availableChars = pAllocatedBufferEnd - pTarget;
+        int availableBytes = pEnd - pSrc;
+
+        // don't fall into the fast decoding loop if we don't have enough bytes
+        // Test for availableChars is done because pStop would be <= pTarget.
+        if (availableBytes <= 13)
+        {
+            // we may need as many as 1 character per byte
+            if (availableChars < availableBytes)
+            {
+                // not enough output room.  no pending bits at this point
+                ch = 0;
+                continue;
+            }
+
+            // try to get over the remainder of the ascii characters fast though
+            unsigned char* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
+            while (pSrc < pLocalEnd)
+            {
+                ch = *pSrc;
+                pSrc++;
+
+                if (ch > 0x7F) goto ProcessChar;
+
+                *pTarget = (CHAR16_T)ch;
+                pTarget++;
+            }
+            // we are done
+            ch = 0;
+            break;
+        }
+
+        // we may need as many as 1 character per byte, so reduce the byte count if necessary.
+        // If availableChars is too small, pStop will be before pTarget and we won't do fast loop.
+        if (availableChars < availableBytes) availableBytes = availableChars;
+
+        // To compute the upper bound, assume that all characters are ASCII characters at this point,
+        //  the boundary will be decreased for every non-ASCII character we encounter
+        // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
+        CHAR16_T *pStop = pTarget + availableBytes - 7;
+
+        while (pTarget < pStop)
+        {
+            ch = *pSrc;
+            pSrc++;
+
+            if (ch > 0x7F) goto LongCode;
+
+            *pTarget = (CHAR16_T)ch;
+            pTarget++;
+
+            // get pSrc to be 2-byte aligned
+            if ((((size_t)pSrc) & 0x1) != 0)
+            {
+                ch = *pSrc;
+                pSrc++;
+                if (ch > 0x7F) goto LongCode;
+
+                *pTarget = (CHAR16_T)ch;
+                pTarget++;
+            }
+
+            // get pSrc to be 4-byte aligned
+            if ((((size_t)pSrc) & 0x2) != 0)
+            {
+                ch = *(unsigned short*)pSrc;
+                if ((ch & 0x8080) != 0) goto LongCodeWithMask16;
+
+                // Unfortunately, this is endianness sensitive
+#if BIGENDIAN
+                if (!self->treatAsLE)
+                {
+                    *pTarget = (CHAR16_T)((ch >> 8) & 0x7F);
+                    pSrc += 2;
+                    *(pTarget + 1) = (CHAR16_T)(ch & 0x7F);
+                    pTarget += 2;
+                }
+                else
+#endif
+                {
+                    *pTarget = (CHAR16_T)(ch & 0x7F);
+                    pSrc += 2;
+                    *(pTarget + 1) = (CHAR16_T)((ch >> 8) & 0x7F);
+                    pTarget += 2;
+                }
+            }
+
+            // Run 8 characters at a time!
+            while (pTarget < pStop)
+            {
+                ch = *(int*)pSrc;
+                int chb = *(int*)(pSrc + 4);
+                if (((ch | chb) & (int)0x80808080) != 0) goto LongCodeWithMask32;
+
+                // Unfortunately, this is endianness sensitive
+#if BIGENDIAN
+                if (!self->treatAsLE)
+                {
+                    *pTarget = (CHAR16_T)((ch >> 24) & 0x7F);
+                    *(pTarget + 1) = (CHAR16_T)((ch >> 16) & 0x7F);
+                    *(pTarget + 2) = (CHAR16_T)((ch >> 8) & 0x7F);
+                    *(pTarget + 3) = (CHAR16_T)(ch & 0x7F);
+                    pSrc += 8;
+                    *(pTarget + 4) = (CHAR16_T)((chb >> 24) & 0x7F);
+                    *(pTarget + 5) = (CHAR16_T)((chb >> 16) & 0x7F);
+                    *(pTarget + 6) = (CHAR16_T)((chb >> 8) & 0x7F);
+                    *(pTarget + 7) = (CHAR16_T)(chb & 0x7F);
+                    pTarget += 8;
+                }
+                else
+#endif
+                {
+                    *pTarget = (CHAR16_T)(ch & 0x7F);
+                    *(pTarget + 1) = (CHAR16_T)((ch >> 8) & 0x7F);
+                    *(pTarget + 2) = (CHAR16_T)((ch >> 16) & 0x7F);
+                    *(pTarget + 3) = (CHAR16_T)((ch >> 24) & 0x7F);
+                    pSrc += 8;
+                    *(pTarget + 4) = (CHAR16_T)(chb & 0x7F);
+                    *(pTarget + 5) = (CHAR16_T)((chb >> 8) & 0x7F);
+                    *(pTarget + 6) = (CHAR16_T)((chb >> 16) & 0x7F);
+                    *(pTarget + 7) = (CHAR16_T)((chb >> 24) & 0x7F);
+                    pTarget += 8;
+                }
+            }
+            break;
+
+            LongCodeWithMask32 :
+#if BIGENDIAN
+            // be careful about the sign extension
+            if (!self->treatAsLE) ch = (int)(((unsigned int)ch) >> 16);
+            else
+#endif
+            ch &= 0xFF;
+
+            LongCodeWithMask16:
+#if BIGENDIAN
+            if (!self->treatAsLE) ch = (int)(((unsigned int)ch) >> 8);
+            else
+#endif
+            ch &= 0xFF;
+
+            pSrc++;
+            if (ch <= 0x7F)
+            {
+                *pTarget = (CHAR16_T)ch;
+                pTarget++;
+                continue;
+            }
+
+        LongCode:
+            chc = *pSrc;
+            pSrc++;
+
+            if (
+                // bit 6 has to be zero
+                (ch & 0x40) == 0 ||
+                // we are expecting to see trailing bytes like 10vvvvvv
+                (chc & 0xC0) != 0x80)
+            {
+                goto BadLongCode;
+            }
+
+            chc &= 0x3F;
+
+            // start a new long code
+            if ((ch & 0x20) != 0)
+            {
+
+                // fold the first two bytes together
+                chc |= (ch & 0x0F) << 6;
+
+                if ((ch & 0x10) != 0)
+                {
+                    // 4 byte encoding - surrogate
+                    ch = *pSrc;
+                    if (
+                        // check that bit 4 is zero, the non-shortest form of surrogate
+                        // and the valid surrogate range 0x000000 - 0x10FFFF at the same time
+                        !InRange(chc >> 4, 0x01, 0x10) ||
+                        // we are expecting to see trailing bytes like 10vvvvvv
+                        (ch & 0xC0) != 0x80)
+                    {
+                        goto BadLongCode;
+                    }
+
+                    chc = (chc << 6) | (ch & 0x3F);
+
+                    ch = *(pSrc + 1);
+                    // we are expecting to see trailing bytes like 10vvvvvv
+                    if ((ch & 0xC0) != 0x80) goto BadLongCode;
+
+                    pSrc += 2;
+
+                    ch = (chc << 6) | (ch & 0x3F);
+
+                    *pTarget = (CHAR16_T)(((ch >> 10) & 0x7FF) +
+                        (HIGH_SURROGATE_START - (0x10000 >> 10)));
+                    pTarget++;
+
+                    ch = (ch & 0x3FF) + (LOW_SURROGATE_START);
+
+                    // extra byte, we're already planning 2 chars for 2 of these bytes,
+                    // but the big loop is testing the target against pStop, so we need
+                    // to subtract 2 more or we risk overrunning the input.  Subtract
+                    // one here and one below.
+                    pStop--;
+                }
+                else
+                {
+                    // 3 byte encoding
+                    ch = *pSrc;
+                    if (
+                        // check for non-shortest form of 3 byte seq
+                        (chc & (0x1F << 5)) == 0 ||
+                        // Can't have surrogates here.
+                        (chc & (0xF800 >> 6)) == (0xD800 >> 6) ||
+                        // we are expecting to see trailing bytes like 10vvvvvv
+                        (ch & 0xC0) != 0x80)
+                    {
+                        goto BadLongCode;
+                    }
+                    pSrc++;
+
+                    ch = (chc << 6) | (ch & 0x3F);
+
+                    // extra byte, we're only expecting 1 char for each of these 3 bytes,
+                    // but the loop is testing the target (not source) against pStop, so
+                    // we need to subtract 2 more or we risk overrunning the input.
+                    // Subtract 1 here and one more below
+                    pStop--;
+                }
+            }
+            else
+            {
+                // 2 byte encoding
+
+                ch &= 0x1F;
+
+                // check for non-shortest form
+                if (ch <= 1) goto BadLongCode;
+
+                ch = (ch << 6) | chc;
+            }
+
+            *pTarget = (CHAR16_T)ch;
+            pTarget++;
+
+            // extra byte, we're only expecting 1 char for each of these 2 bytes,
+            // but the loop is testing the target (not source) against pStop.
+            // subtract an extra count from pStop so that we don't overrun the input.
+            pStop--;
+        }
+
+        assert(pTarget <= pAllocatedBufferEnd);
+
+        // no pending bits at this point
+        ch = 0;
+        continue;
+
+    BadLongCode:
+        pSrc -= 2;
+        ch = 0;
+        continue;
+    }
+
+    if (ch != 0)
+    {
+        // Have to do fallback for invalid bytes
+        if (!fallbackUsed)
+        {
+            fallbackUsed = true;
+            if (self->useFallback) DecoderBuffer_InternalInitialize(&self->buffer.decoder, bytes, NULL);
+        }
+
+        // This'll back us up the appropriate # of bytes if we didn't get anywhere
+        if (!self->useFallback)
+        {
+            assert(pSrc >= bytes || pTarget == chars);
+
+            // Ran out of buffer space
+            // Need to throw an exception?
+            if (self->useFallback) DecoderReplacementFallbackBuffer_Reset(&self->buffer.decoder);
+            if (pTarget == chars)
+            {
+                errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER;
+                return 0;
+            }
+        }
+        assert(pSrc >= bytes);
+        ch = 0;
+    }
+
+    // Shouldn't have anything in fallback buffer for GetChars
+    // (don't have to check m_throwOnOverflow for chars)
+    assert(!fallbackUsed || self->buffer.decoder.fallbackCount < 0);
+
+    return pTarget - chars;
+}
+
+static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, unsigned char* bytes, size_t byteCount)
+{
+    assert(chars != NULL);
+    assert(byteCount >= 0);
+    assert(charCount >= 0);
+    assert(bytes != NULL);
+
+    // For fallback we may need a fallback buffer.
+    // We wait to initialize it though in case we don't have any broken input unicode
+    bool fallbackUsed = false;
+    CHAR16_T *pSrc = chars;
+    unsigned char *pTarget = bytes;
+
+    CHAR16_T *pEnd = pSrc + charCount;
+    unsigned char *pAllocatedBufferEnd = pTarget + byteCount;
+
+    int ch = 0;
+    int chd;
+
+    // assume that JIT will enregister pSrc, pTarget and ch
+
+    while (true)
+    {
+        // SLOWLOOP: does all range checks, handles all special cases, but it is slow
+
+        if (pSrc >= pEnd)
+        {
+            if (ch == 0)
+            {
+                // Check if there's anything left to get out of the fallback buffer
+                ch = fallbackUsed ? EncoderReplacementFallbackBuffer_InternalGetNextChar(&self->buffer.encoder) : 0;
+                if (ch > 0) goto ProcessChar;
+            }
+            else
+            {
+                // Case of leftover surrogates in the fallback buffer
+                if (fallbackUsed && self->buffer.encoder.bFallingBack)
+                {
+                    assert(ch >= 0xD800 && ch <= 0xDBFF);
+
+                    int cha = ch;
+
+                    ch = EncoderReplacementFallbackBuffer_InternalGetNextChar(&self->buffer.encoder);
+
+                    if (InRange(ch, LOW_SURROGATE_START, LOW_SURROGATE_END))
+                    {
+                        ch = ch + (cha << 10) + (0x10000 - LOW_SURROGATE_START - (HIGH_SURROGATE_START << 10));
+                        goto EncodeChar;
+                    }
+                    else if (ch > 0)
+                    {
+                        goto ProcessChar;
+                    }
+
+                    break;
+                }
+            }
+
+            // attempt to encode the partial surrogate (will fail or ignore)
+            if (ch > 0) goto EncodeChar;
+
+            // We're done
+            break;
+        }
+
+        if (ch > 0)
+        {
+            // We have a high surrogate left over from a previous loop.
+            assert(ch >= 0xD800 && ch <= 0xDBFF);
+
+            // use separate helper variables for local contexts so that the jit optimizations
+            // won't get confused about the variable lifetimes
+            int cha = *pSrc;
+
+            // In previous byte, we encountered a high surrogate, so we are expecting a low surrogate here.
+            if (InRange(cha, LOW_SURROGATE_START, LOW_SURROGATE_END))
+            {
+                ch = cha + (ch << 10) +
+                    (0x10000
+                    - LOW_SURROGATE_START
+                    - (HIGH_SURROGATE_START << 10));
+
+                pSrc++;
+            }
+            // else ch is still high surrogate and encoding will fail
+
+            // attempt to encode the surrogate or partial surrogate
+            goto EncodeChar;
+        }
+
+        // If we've used a fallback, then we have to check for it
+        if (fallbackUsed)
+        {
+            ch = EncoderReplacementFallbackBuffer_InternalGetNextChar(&self->buffer.encoder);
+            if (ch > 0) goto ProcessChar;
+        }
+
+        // read next char. The JIT optimization seems to be getting confused when
+        // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
+        ch = *pSrc;
+        pSrc++;
+
+    ProcessChar:
+        if (InRange(ch, HIGH_SURROGATE_START, HIGH_SURROGATE_END)) continue;
+
+        // either good char or partial surrogate
+
+    EncodeChar:
+        // throw exception on partial surrogate if necessary
+        if (InRange(ch, HIGH_SURROGATE_START, LOW_SURROGATE_END))
+        {
+            // Lone surrogates aren't allowed, we have to do fallback for them
+            // Have to make a fallback buffer if we don't have one
+            if (!fallbackUsed)
+            {
+                // wait on fallbacks if we can
+                // For fallback we may need a fallback buffer
+                fallbackUsed = true;
+
+                // Set our internal fallback interesting things.
+                EncoderReplacementFallbackBuffer_InternalInitialize(&self->buffer.encoder, chars, pEnd, true);
+            }
+
+            // Do our fallback.  Actually we already know its a mixed up surrogate,
+            // so the ref pSrc isn't gonna do anything.
+            EncoderReplacementFallbackBuffer_InternalFallback(&self->buffer.encoder, (CHAR16_T)ch, &pSrc);
+
+            // Ignore it if we don't throw
+            ch = 0;
+            continue;
+        }
+
+        // Count bytes needed
+        int bytesNeeded = 1;
+        if (ch > 0x7F)
+        {
+            if (ch > 0x7FF)
+            {
+                if (ch > 0xFFFF)
+                {
+                    bytesNeeded++;  // 4 bytes (surrogate pair)
+                }
+                bytesNeeded++;      // 3 bytes (800-FFFF)
+            }
+            bytesNeeded++;          // 2 bytes (80-7FF)
+        }
+
+        if (pTarget > pAllocatedBufferEnd - bytesNeeded)
+        {
+            // Left over surrogate from last time will cause pSrc == chars, so we'll throw
+            if (fallbackUsed && self->buffer.encoder.bFallingBack)
+            {
+                EncoderReplacementFallbackBuffer_MovePrevious(&self->buffer.encoder);              // Didn't use this fallback char
+                if (ch > 0xFFFF)
+                    EncoderReplacementFallbackBuffer_MovePrevious(&self->buffer.encoder);          // Was surrogate, didn't use 2nd part either
+            }
+            else
+            {
+                pSrc--;                                     // Didn't use this char
+                if (ch > 0xFFFF)
+                    pSrc--;                                 // Was surrogate, didn't use 2nd part either
+            }
+
+            assert(pSrc >= chars || pTarget == bytes);
+
+            if (pTarget == bytes)  // Throw if we must
+            {
+                errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER;
+                return 0;
+            }
+            ch = 0;                                         // Nothing left over (we backed up to start of pair if supplimentary)
+            break;
+        }
+
+        if (ch <= 0x7F)
+        {
+            *pTarget = (unsigned char)ch;
+        }
+        else
+        {
+            // use separate helper variables for local contexts so that the jit optimizations
+            // won't get confused about the variable lifetimes
+            int chb;
+            if (ch <= 0x7FF)
+            {
+                // 2 unsigned char encoding
+                chb = (unsigned char)(0xC0 | (ch >> 6));
+            }
+            else
+            {
+                if (ch <= 0xFFFF)
+                {
+                    chb = (unsigned char)(0xE0 | (ch >> 12));
+                }
+                else
+                {
+                    *pTarget = (unsigned char)(0xF0 | (ch >> 18));
+                    pTarget++;
+
+                    chb = 0x80 | ((ch >> 12) & 0x3F);
+                }
+                *pTarget = (unsigned char)chb;
+                pTarget++;
+
+                chb = 0x80 | ((ch >> 6) & 0x3F);
+            }
+            *pTarget = (unsigned char)chb;
+            pTarget++;
+
+            *pTarget = (unsigned char)0x80 | (ch & 0x3F);
+        }
+
+        pTarget++;
+
+        // If still have fallback don't do fast loop
+        if (fallbackUsed && (ch = EncoderReplacementFallbackBuffer_InternalGetNextChar(&self->buffer.encoder)) != 0)
+            goto ProcessChar;
+
+        int availableChars = pEnd - pSrc;
+        int availableBytes = pAllocatedBufferEnd - pTarget;
+
+        // don't fall into the fast decoding loop if we don't have enough characters
+        // Note that if we don't have enough bytes, pStop will prevent us from entering the fast loop.
+        if (availableChars <= 13)
+        {
+            // we are hoping for 1 unsigned char per char
+            if (availableBytes < availableChars)
+            {
+                // not enough output room.  no pending bits at this point
+                ch = 0;
+                continue;
+            }
+
+            // try to get over the remainder of the ascii characters fast though
+            CHAR16_T* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
+            while (pSrc < pLocalEnd)
+            {
+                ch = *pSrc;
+                pSrc++;
+
+                // Not ASCII, need more than 1 unsigned char per char
+                if (ch > 0x7F) goto ProcessChar;
+
+                *pTarget = (unsigned char)ch;
+                pTarget++;
+            }
+            // we are done, let ch be 0 to clear encoder
+            ch = 0;
+            break;
+        }
+
+        // we need at least 1 unsigned char per character, but Convert might allow us to convert
+        // only part of the input, so try as much as we can.  Reduce charCount if necessary
+        if (availableBytes < availableChars)
+        {
+            availableChars = availableBytes;
+        }
+
+        // FASTLOOP:
+        // - optimistic range checks
+        // - fallbacks to the slow loop for all special cases, exception throwing, etc.
+
+        // To compute the upper bound, assume that all characters are ASCII characters at this point,
+        //  the boundary will be decreased for every non-ASCII character we encounter
+        // Also, we need 5 chars reserve for the unrolled ansi decoding loop and for decoding of surrogates
+        // If there aren't enough bytes for the output, then pStop will be <= pSrc and will bypass the loop.
+        CHAR16_T *pStop = pSrc + availableChars - 5;
+
+        while (pSrc < pStop)
+        {
+            ch = *pSrc;
+            pSrc++;
+
+            if (ch > 0x7F) goto LongCode;
+
+            *pTarget = (unsigned char)ch;
+            pTarget++;
+
+            // get pSrc aligned
+            if (((size_t)pSrc & 0x2) != 0)
+            {
+                ch = *pSrc;
+                pSrc++;
+                if (ch > 0x7F) goto LongCode;
+
+                *pTarget = (unsigned char)ch;
+                pTarget++;
+            }
+
+            // Run 4 characters at a time!
+            while (pSrc < pStop)
+            {
+                ch = *(int*)pSrc;
+                int chc = *(int*)(pSrc + 2);
+
+                if (((ch | chc) & (int)0xFF80FF80) != 0) goto LongCodeWithMask;
+
+                // Unfortunately, this is endianness sensitive
+#if BIGENDIAN
+                if (!self->treatAsLE)
+                {
+                    *pTarget = (unsigned char)(ch >> 16);
+                    *(pTarget + 1) = (unsigned char)ch;
+                    pSrc += 4;
+                    *(pTarget + 2) = (unsigned char)(chc >> 16);
+                    *(pTarget + 3) = (unsigned char)chc;
+                    pTarget += 4;
+                }
+                else
+#endif
+                {
+                    *pTarget = (unsigned char)ch;
+                    *(pTarget + 1) = (unsigned char)(ch >> 16);
+                    pSrc += 4;
+                    *(pTarget + 2) = (unsigned char)chc;
+                    *(pTarget + 3) = (unsigned char)(chc >> 16);
+                    pTarget += 4;
+                }
+            }
+            continue;
+
+        LongCodeWithMask:
+#if BIGENDIAN
+        // be careful about the sign extension
+        if (!self->treatAsLE) ch = (int)(((unsigned int)ch) >> 16);
+        else
+#endif
+        ch = (CHAR16_T)ch;
+        pSrc++;
+
+        if (ch > 0x7F) goto LongCode;
+
+        *pTarget = (unsigned char)ch;
+        pTarget++;
+        continue;
+
+        LongCode:
+            // use separate helper variables for slow and fast loop so that the jit optimizations
+            // won't get confused about the variable lifetimes
+            if (ch <= 0x7FF)
+            {
+                // 2 unsigned char encoding
+                chd = 0xC0 | (ch >> 6);
+            }
+            else
+            {
+                if (!InRange(ch, HIGH_SURROGATE_START, LOW_SURROGATE_END))
+                {
+                    // 3 unsigned char encoding
+                    chd = 0xE0 | (ch >> 12);
+                }
+                else
+                {
+                    // 4 unsigned char encoding - high surrogate + low surrogate
+                    if (ch > HIGH_SURROGATE_END)
+                    {
+                        // low without high -> bad, try again in slow loop
+                        pSrc -= 1;
+                        break;
+                    }
+
+                    chd = *pSrc;
+                    pSrc++;
+
+                    if (!InRange(chd, LOW_SURROGATE_START, LOW_SURROGATE_END))
+                    {
+                        // high not followed by low -> bad, try again in slow loop
+                        pSrc -= 2;
+                        break;
+                    }
+
+                    ch = chd + (ch << 10) +
+                        (0x10000
+                        - LOW_SURROGATE_START
+                        - (HIGH_SURROGATE_START << 10));
+
+                    *pTarget = (unsigned char)(0xF0 | (ch >> 18));
+                    // pStop - this unsigned char is compensated by the second surrogate character
+                    // 2 input chars require 4 output bytes.  2 have been anticipated already
+                    // and 2 more will be accounted for by the 2 pStop-- calls below.
+                    pTarget++;
+
+                    chd = 0x80 | ((ch >> 12) & 0x3F);
+                }
+                *pTarget = (unsigned char)chd;
+                pStop--;                    // 3 unsigned char sequence for 1 char, so need pStop-- and the one below too.
+                pTarget++;
+
+                chd = 0x80 | ((ch >> 6) & 0x3F);
+            }
+            *pTarget = (unsigned char)chd;
+            pStop--;                        // 2 unsigned char sequence for 1 char so need pStop--.
+            pTarget++;
+
+            *pTarget = (unsigned char)(0x80 | (ch & 0x3F));
+            // pStop - this unsigned char is already included
+            pTarget++;
+        }
+
+        assert(pTarget <= pAllocatedBufferEnd);
+
+        // no pending char at this point
+        ch = 0;
+    }
+
+    return (int)(pTarget - bytes);
+}
+
+static size_t GetByteCount(UTF8Encoding* self, CHAR16_T *chars, size_t count)
+{
+    // For fallback we may need a fallback buffer.
+    // We wait to initialize it though in case we don't have any broken input unicode
+    bool fallbackUsed = false;
+    CHAR16_T *pSrc = chars;
+    CHAR16_T *pEnd = pSrc + count;
+
+    // Start by assuming we have as many as count
+    size_t byteCount = count;
+
+    int ch = 0;
+
+    while (true)
+    {
+        // SLOWLOOP: does all range checks, handles all special cases, but it is slow
+        if (pSrc >= pEnd)
+        {
+
+            if (ch == 0)
+            {
+                // Unroll any fallback that happens at the end
+                ch = fallbackUsed ? EncoderReplacementFallbackBuffer_InternalGetNextChar(&self->buffer.encoder) : 0;
+                if (ch > 0)
+                {
+                    byteCount++;
+                    goto ProcessChar;
+                }
+            }
+            else
+            {
+                // Case of surrogates in the fallback.
+                if (fallbackUsed && self->buffer.encoder.bFallingBack)
+                {
+                    assert(ch >= 0xD800 && ch <= 0xDBFF);
+
+                    ch = EncoderReplacementFallbackBuffer_InternalGetNextChar(&self->buffer.encoder);
+                    byteCount++;
+
+                    if (InRange(ch, LOW_SURROGATE_START, LOW_SURROGATE_END))
+                    {
+                        ch = 0xfffd;
+                        byteCount++;
+                        goto EncodeChar;
+                    }
+                    else if (ch > 0)
+                    {
+                        goto ProcessChar;
+                    }
+                    else
+                    {
+                        byteCount--; // ignore last one.
+                        break;
+                    }
+                }
+            }
+
+            if (ch <= 0)
+            {
+                break;
+            }
+
+            // attempt to encode the partial surrogate (will fallback or ignore it), it'll also subtract 1.
+            byteCount++;
+            goto EncodeChar;
+        }
+
+        if (ch > 0)
+        {
+            assert(ch >= 0xD800 && ch <= 0xDBFF);
+
+            // use separate helper variables for local contexts so that the jit optimizations
+            // won't get confused about the variable lifetimes
+            int cha = *pSrc;
+
+            // count the pending surrogate
+            byteCount++;
+
+            // In previous byte, we encountered a high surrogate, so we are expecting a low surrogate here.
+            if (InRange(cha, LOW_SURROGATE_START, LOW_SURROGATE_END))
+            {
+                // Don't need a real # because we're just counting, anything > 0x7ff ('cept surrogate) will do.
+                ch = 0xfffd;
+                //                        ch = cha + (ch << 10) +
+                //                            (0x10000
+                //                            - LOW_SURROGATE_START
+                //                            - (HIGH_SURROGATE_START << 10) );
+
+                // Use this next char
+                pSrc++;
+            }
+            // else ch is still high surrogate and encoding will fail (so don't add count)
+
+            // attempt to encode the surrogate or partial surrogate
+            goto EncodeChar;
+        }
+
+        // If we've used a fallback, then we have to check for it
+        if (fallbackUsed)
+        {
+            ch = EncoderReplacementFallbackBuffer_InternalGetNextChar(&self->buffer.encoder);
+            if (ch > 0)
+            {
+                // We have an extra byte we weren't expecting.
+                byteCount++;
+                goto ProcessChar;
+            }
+        }
+
+        // read next char. The JIT optimization seems to be getting confused when
+        // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
+        ch = *pSrc;
+        pSrc++;
+
+    ProcessChar:
+        if (InRange(ch, HIGH_SURROGATE_START, HIGH_SURROGATE_END))
+        {
+            // we will count this surrogate next time around
+            byteCount--;
+            continue;
+        }
+        // either good char or partial surrogate
+
+    EncodeChar:
+        // throw exception on partial surrogate if necessary
+        if (InRange(ch, HIGH_SURROGATE_START, LOW_SURROGATE_END))
+        {
+            // Lone surrogates aren't allowed
+            // Have to make a fallback buffer if we don't have one
+            if (!fallbackUsed)
+            {
+                // wait on fallbacks if we can
+                // For fallback we may need a fallback buffer
+                fallbackUsed = true;
+
+                // Set our internal fallback interesting things.
+                EncoderReplacementFallbackBuffer_InternalInitialize(&self->buffer.encoder, chars, chars + count, false);
+            }
+
+            // Do our fallback.  Actually we already know its a mixed up surrogate,
+            // so the ref pSrc isn't gonna do anything.
+            EncoderReplacementFallbackBuffer_InternalFallback(&self->buffer.encoder, (CHAR16_T)ch, &pSrc);
+
+            // Ignore it if we don't throw (we had preallocated this ch)
+            byteCount--;
+            ch = 0;
+            continue;
+        }
+
+        // Count them
+        if (ch > 0x7F)
+        {
+            if (ch > 0x7FF)
+            {
+                // the extra surrogate byte was compensated by the second surrogate character
+                // (2 surrogates make 4 bytes.  We've already counted 2 bytes, 1 per char)
+                byteCount++;
+            }
+            byteCount++;
+        }
+
+#if WIN64
+        // check for overflow
+        if (byteCount < 0)
+        {
+            break;
+        }
+#endif
+
+        // If still have fallback don't do fast loop
+        if (fallbackUsed && (ch = EncoderReplacementFallbackBuffer_InternalGetNextChar(&self->buffer.encoder)) != 0)
+        {
+            // We're reserving 1 byte for each char by default
+            byteCount++;
+            goto ProcessChar;
+        }
+
+        int availableChars = pEnd - pSrc;
+
+        // don't fall into the fast decoding loop if we don't have enough characters
+        if (availableChars <= 13)
+        {
+            // try to get over the remainder of the ascii characters fast though
+            CHAR16_T* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
+            while (pSrc < pLocalEnd)
+            {
+                ch = *pSrc;
+                pSrc++;
+                if (ch > 0x7F) goto ProcessChar;
+            }
+
+            // we are done
+            break;
+        }
+
+#if WIN64
+        // make sure that we won't get a silent overflow inside the fast loop
+        // (Fall out to slow loop if we have this many characters)
+        availableChars &= 0x0FFFFFFF;
+#endif
+
+        // To compute the upper bound, assume that all characters are ASCII characters at this point,
+        //  the boundary will be decreased for every non-ASCII character we encounter
+        // Also, we need 3 + 4 chars reserve for the unrolled ansi decoding loop and for decoding of surrogates
+        CHAR16_T *pStop = pSrc + availableChars - (3 + 4);
+
+        while (pSrc < pStop)
+        {
+            ch = *pSrc;
+            pSrc++;
+
+            if (ch > 0x7F)                                                  // Not ASCII
+            {
+                if (ch > 0x7FF)                                             // Not 2 Byte
+                {
+                    if ((ch & 0xF800) == 0xD800)                            // See if its a Surrogate
+                        goto LongCode;
+                    byteCount++;
+                }
+                byteCount++;
+            }
+
+            // get pSrc aligned
+            if (((size_t)pSrc & 0x2) != 0)
+            {
+                ch = *pSrc;
+                pSrc++;
+                if (ch > 0x7F)                                              // Not ASCII
+                {
+                    if (ch > 0x7FF)                                         // Not 2 Byte
+                    {
+                        if ((ch & 0xF800) == 0xD800)                        // See if its a Surrogate
+                            goto LongCode;
+                        byteCount++;
+                    }
+                    byteCount++;
+                }
+            }
+
+            // Run 2 * 4 characters at a time!
+            while (pSrc < pStop)
+            {
+                ch = *(int*)pSrc;
+                int chc = *(int*)(pSrc + 2);
+                if (((ch | chc) & (int)0xFF80FF80) != 0)         // See if not ASCII
+                {
+                    if (((ch | chc) & (int)0xF800F800) != 0)     // See if not 2 Byte
+                    {
+                        goto LongCodeWithMask;
+                    }
+
+
+                    if ((ch & (int)0xFF800000) != 0)             // Actually 0x07800780 is all we care about (4 bits)
+                        byteCount++;
+                    if ((ch & (int)0xFF80) != 0)
+                        byteCount++;
+                    if ((chc & (int)0xFF800000) != 0)
+                        byteCount++;
+                    if ((chc & (int)0xFF80) != 0)
+                        byteCount++;
+                }
+                pSrc += 4;
+
+                ch = *(int*)pSrc;
+                chc = *(int*)(pSrc + 2);
+                if (((ch | chc) & (int)0xFF80FF80) != 0)         // See if not ASCII
+                {
+                    if (((ch | chc) & (int)0xF800F800) != 0)     // See if not 2 Byte
+                    {
+                        goto LongCodeWithMask;
+                    }
+
+                    if ((ch & (int)0xFF800000) != 0)
+                        byteCount++;
+                    if ((ch & (int)0xFF80) != 0)
+                        byteCount++;
+                    if ((chc & (int)0xFF800000) != 0)
+                        byteCount++;
+                    if ((chc & (int)0xFF80) != 0)
+                        byteCount++;
+                }
+                pSrc += 4;
+            }
+            break;
+
+        LongCodeWithMask:
+#if BIGENDIAN
+        // be careful about the sign extension
+        if (!self->treatAsLE) ch = (int)(((unsigned int)ch) >> 16);
+        else
+#endif
+        ch = (CHAR16_T)ch;
+
+        pSrc++;
+
+        if (ch <= 0x7F)
+        {
+            continue;
+        }
+
+        LongCode:
+            // use separate helper variables for slow and fast loop so that the jit optimizations
+            // won't get confused about the variable lifetimes
+            if (ch > 0x7FF)
+            {
+                if (InRange(ch, HIGH_SURROGATE_START, LOW_SURROGATE_END))
+                {
+                    // 4 byte encoding - high surrogate + low surrogate
+
+                    int chd = *pSrc;
+                    if (
+                        ch > HIGH_SURROGATE_END ||
+                        !InRange(chd, LOW_SURROGATE_START, LOW_SURROGATE_END))
+                    {
+                        // Back up and drop out to slow loop to figure out error
+                        pSrc--;
+                        break;
+                    }
+                    pSrc++;
+
+                    // byteCount - this byte is compensated by the second surrogate character
+                }
+                byteCount++;
+            }
+            byteCount++;
+
+            // byteCount - the last byte is already included
+        }
+
+        // no pending char at this point
+        ch = 0;
+    }
+
+#if WIN64
+    // check for overflow
+    assert(byteCount >= 0);
+#endif
+    assert(!fallbackUsed || self->buffer.encoder.fallbackCount < 0);
+
+    return byteCount;
+}
+
+size_t minipal_get_length_utf8_to_utf16(const char* source, size_t sourceLength, unsigned int flags)
+{
+    errno = 0;
+
+    if (sourceLength == 0)
+        return 0;
+
+    UTF8Encoding enc =
+    {
+        .buffer = { .decoder = { .fallbackCount = -1, .fallbackIndex = -1, .strDefault = { 0xFFFD, 0 }, .strDefaultLength = 1 } },
+        .useFallback = !(flags & MINIPAL_MB_NO_REPLACE_INVALID_CHARS),
+#if BIGENDIAN
+        .treatAsLE = (flags & MINIPAL_TREAT_AS_LITTLE_ENDIAN)
+#endif
+    };
+
+    return GetCharCount(&enc, (unsigned char*)source, sourceLength);
+}
+
+size_t minipal_get_length_utf16_to_utf8(const CHAR16_T* source, size_t sourceLength, unsigned int flags)
+{
+    errno = 0;
+
+    if (sourceLength == 0)
+        return 0;
+
+    UTF8Encoding enc =
+    {
+        // repeat replacement char (0xFFFD) twice for a surrogate pair
+        .buffer = { .encoder = { .fallbackCount = -1, .fallbackIndex = -1, .strDefault = { 0xFFFD, 0xFFFD, 0 }, .strDefaultLength = 2 } },
+        .useFallback = true,
+#if BIGENDIAN
+        .treatAsLE = (flags & MINIPAL_TREAT_AS_LITTLE_ENDIAN)
+#endif
+    };
+
+#if !BIGENDIAN
+    (void)flags; // unused
+#endif
+
+    return GetByteCount(&enc, (CHAR16_T*)source, sourceLength);
+}
+
+size_t minipal_convert_utf8_to_utf16(const char* source, size_t sourceLength, CHAR16_T* destination, size_t destinationLength, unsigned int flags)
+{
+    size_t ret;
+    errno = 0;
+
+    if (sourceLength == 0)
+        return 0;
+
+    UTF8Encoding enc =
+    {
+        .buffer = { .decoder = { .fallbackCount = -1, .fallbackIndex = -1, .strDefault = { 0xFFFD, 0 }, .strDefaultLength = 1 } },
+        .useFallback = !(flags & MINIPAL_MB_NO_REPLACE_INVALID_CHARS),
+#if BIGENDIAN
+        .treatAsLE = (flags & MINIPAL_TREAT_AS_LITTLE_ENDIAN)
+#endif
+    };
+
+    if (GetCharCount(&enc, (unsigned char*)source, sourceLength) > destinationLength)
+    {
+        errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER;
+        ret = 0;
+    }
+    else
+    {
+        ret = GetChars(&enc, (unsigned char*)source, sourceLength, destination, destinationLength);
+        if (errno) ret = 0;
+    }
+
+    return ret;
+}
+
+size_t minipal_convert_utf16_to_utf8(const CHAR16_T* source, size_t sourceLength, char* destination, size_t destinationLength, unsigned int flags)
+{
+    size_t ret;
+    errno = 0;
+
+    if (sourceLength == 0)
+        return 0;
+
+    UTF8Encoding enc =
+    {
+        // repeat replacement char (0xFFFD) twice for a surrogate pair
+        .buffer = { .encoder = { .fallbackCount = -1, .fallbackIndex = -1, .strDefault = { 0xFFFD, 0xFFFD, 0 }, .strDefaultLength = 2 } },
+        .useFallback = true,
+#if BIGENDIAN
+        .treatAsLE = (flags & MINIPAL_TREAT_AS_LITTLE_ENDIAN)
+#endif
+    };
+
+    if (GetByteCount(&enc, (CHAR16_T*)source, sourceLength) > destinationLength)
+    {
+        errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER;
+        ret = 0;
+    }
+    else
+    {
+#if !BIGENDIAN
+        (void)flags; // unused
+#endif
+
+        ret = GetBytes(&enc, (CHAR16_T*)source, sourceLength, (unsigned char*)destination, destinationLength);
+        if (errno) ret = 0;
+    }
+
+    return ret;
+}
diff --git a/src/native/minipal/utf8.cpp b/src/native/minipal/utf8.cpp
deleted file mode 100644
index 875b4eeb6008ca..00000000000000
--- a/src/native/minipal/utf8.cpp
+++ /dev/null
@@ -1,2901 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-Module Name:
-
-    unicode/utf8.c
-
-Abstract:
-    Functions to encode and decode UTF-8 strings. This is a port of the C# version from Utf8Encoding.cs.
-
-Revision History:
-
---*/
-
-#include <minipal/utf8.h>
-
-#include <errno.h>
-#include <limits.h>
-#include <string.h>
-#include <assert.h>
-
-#define FASTLOOP
-
-#ifdef TARGET_WINDOWS
-#define W(str) L ## str
-#else
-#define W(str) u##str
-#endif
-
-inline void *operator new(size_t, void *p) throw () { return p; }
-
-struct CharUnicodeInfo
-{
-    static const char16_t HIGH_SURROGATE_START = 0xd800;
-    static const char16_t HIGH_SURROGATE_END = 0xdbff;
-    static const char16_t LOW_SURROGATE_START = 0xdc00;
-    static const char16_t LOW_SURROGATE_END = 0xdfff;
-};
-
-struct Char
-{
-    // Test if the wide character is a high surrogate
-    static bool IsHighSurrogate(const char16_t c)
-    {
-        return (c & 0xFC00) == CharUnicodeInfo::HIGH_SURROGATE_START;
-    }
-
-    // Test if the wide character is a low surrogate
-    static bool IsLowSurrogate(const char16_t c)
-    {
-        return (c & 0xFC00) == CharUnicodeInfo::LOW_SURROGATE_START;
-    }
-
-    // Test if the wide character is a surrogate half
-    static bool IsSurrogate(const char16_t c)
-    {
-        return (c & 0xF800) == CharUnicodeInfo::HIGH_SURROGATE_START;
-    }
-
-    // Test if the wide character is a high surrogate
-    static bool IsHighSurrogate(const char16_t* s, int index)
-    {
-        return IsHighSurrogate(s[index]);
-    }
-
-    // Test if the wide character is a low surrogate
-    static bool IsLowSurrogate(const char16_t* s, int index)
-    {
-        return IsLowSurrogate(s[index]);
-    }
-
-    // Test if the wide character is a surrogate half
-    static bool IsSurrogate(const char16_t* s, int index)
-    {
-        return IsSurrogate(s[index]);
-    }
-};
-
-size_t wcslen(const char16_t* str)
-{
-    size_t nChar = 0;
-    while (*str++) nChar++;
-    return nChar;
-}
-
-int wcscpy_s(char16_t *_Dst, size_t _SizeInWords, const char16_t *_Src)
-{
-
-    char16_t* p = _Dst;
-    size_t available = _SizeInWords;
-
-    if (!_Src || !_Dst || _SizeInWords == 0) return EINVAL;
-
-    while ((*p++ = *_Src++) != 0 && --available > 0);
-
-    if (available == 0)
-    {
-        _Dst = 0;
-        return ERANGE;
-    }
-
-#ifdef DEBUG
-    size_t offset = _SizeInWords - available + 1;
-    if (offset < _SizeInWords)
-    {
-        memset((_Dst) + (offset), 0xFD, ((_SizeInWords) - (offset)) * sizeof(*(_Dst)));
-    }
-#endif
-
-    return 0;
-}
-
-int wcscat_s(char16_t *_Dst, size_t _SizeInWords, const char16_t *_Src)
-{
-    char16_t* p = _Dst;
-    size_t available = _SizeInWords;
-
-    if (!_Src || !_Dst || _SizeInWords == 0) return EINVAL;
-
-    while (available > 0 && *p != 0)
-    {
-        p++;
-        available--;
-    }
-
-    if (available == 0)
-    {
-        _Dst = 0;
-        return EINVAL;
-    }
-
-    while ((*p++ = *_Src++) != 0 && --available > 0)
-    {
-    }
-
-    if (available == 0)
-    {
-        _Dst = 0;
-        return ERANGE;
-    }
-
-#ifdef DEBUG
-    size_t offset = _SizeInWords - available + 1;
-    if (offset < _SizeInWords)
-    {
-        memset((_Dst) + (offset), 0xFD, ((_SizeInWords) - (offset)) * sizeof(*(_Dst)));
-    }
-#endif
-    return 0;
-}
-
-#define ContractAssert(cond)             \
-    if (!(cond))                         \
-    {                                    \
-        errno = ERROR_INVALID_PARAMETER; \
-        return 0;                        \
-    }
-
-#define ContractAssertVoid(cond)         \
-    if (!(cond))                         \
-    {                                    \
-        errno = ERROR_INVALID_PARAMETER; \
-        return;                          \
-    }
-
-#define ContractAssertFreeFallback(cond) \
-    if (!(cond))                         \
-    {                                    \
-        errno = ERROR_INVALID_PARAMETER; \
-        if (fallback) free(fallback);    \
-        return 0;                        \
-    }
-
-#define RETURN_ON_ERROR               \
-    if (errno)                        \
-    {                                 \
-        if (fallback) free(fallback); \
-        return 0;                     \
-    }
-
-class DecoderFallbackBuffer;
-
-class DecoderFallback
-{
-public:
-
-    // Fallback
-    //
-    // Return the appropriate unicode string alternative to the character that need to fall back.
-
-    virtual DecoderFallbackBuffer* CreateFallbackBuffer() { assert(!"pure virtual function called"); while(true); }
-
-    // Maximum number of characters that this instance of this fallback could return
-
-    virtual int GetMaxCharCount() { assert(!"pure virtual function called"); while(true); }
-};
-
-class DecoderReplacementFallback : public DecoderFallback
-{
-    // Our variables
-    char16_t strDefault[2];
-    int strDefaultLength;
-
-public:
-    // Construction.  Default replacement fallback uses no best fit and ? replacement string
-    DecoderReplacementFallback() : DecoderReplacementFallback(W("?"))
-    {
-    }
-
-    DecoderReplacementFallback(const char16_t* replacement)
-    {
-        // Must not be null
-        ContractAssertVoid(replacement != nullptr)
-
-        // Make sure it doesn't have bad surrogate pairs
-        bool bFoundHigh = false;
-        int replacementLength = wcslen((const char16_t *)replacement);
-        for (int i = 0; i < replacementLength; i++)
-        {
-            // Found a surrogate?
-            if (Char::IsSurrogate(replacement, i))
-            {
-                // High or Low?
-                if (Char::IsHighSurrogate(replacement, i))
-                {
-                    // if already had a high one, stop
-                    if (bFoundHigh)
-                        break;  // break & throw at the bFoundHIgh below
-                    bFoundHigh = true;
-                }
-                else
-                {
-                    // Low, did we have a high?
-                    if (!bFoundHigh)
-                    {
-                        // Didn't have one, make if fail when we stop
-                        bFoundHigh = true;
-                        break;
-                    }
-
-                    // Clear flag
-                    bFoundHigh = false;
-                }
-            }
-            // If last was high we're in trouble (not surrogate so not low surrogate, so break)
-            else if (bFoundHigh)
-                break;
-        }
-        ContractAssertVoid(!bFoundHigh)
-
-        wcscpy_s(strDefault, ARRAY_SIZE(strDefault), replacement);
-        strDefaultLength = replacementLength;
-    }
-
-    char16_t* GetDefaultString()
-    {
-        return strDefault;
-    }
-
-    virtual DecoderFallbackBuffer* CreateFallbackBuffer();
-
-    // Maximum number of characters that this instance of this fallback could return
-    virtual int GetMaxCharCount()
-    {
-        return strDefaultLength;
-    }
-};
-
-class DecoderFallbackBuffer
-{
-    friend class UTF8Encoding;
-    // Most implementations will probably need an implementation-specific constructor
-
-    // internal methods that cannot be overridden that let us do our fallback thing
-    // These wrap the internal methods so that we can check for people doing stuff that's incorrect
-
-public:
-    virtual bool Fallback(unsigned char bytesUnknown[], int index, int size) { assert(!"pure virtual function called"); while(true); }
-
-    // Get next character
-    virtual char16_t GetNextChar() { assert(!"pure virtual function called"); while(true); }
-
-    //Back up a character
-    virtual bool MovePrevious() { assert(!"pure virtual function called"); while(true); }
-
-    // How many chars left in this fallback?
-    virtual int GetRemaining() { assert(!"pure virtual function called"); while(true); }
-
-    // Clear the buffer
-    virtual void Reset()
-    {
-        while (GetNextChar() != (char16_t)0);
-    }
-
-    // Internal items to help us figure out what we're doing as far as error messages, etc.
-    // These help us with our performance and messages internally
-protected:
-    unsigned char*           byteStart;
-    char16_t*          charEnd;
-
-    // Internal reset
-    void InternalReset()
-    {
-        byteStart = nullptr;
-        Reset();
-    }
-
-    // Set the above values
-    // This can't be part of the constructor because EncoderFallbacks would have to know how to implement these.
-    void InternalInitialize(unsigned char* byteStart, char16_t* charEnd)
-    {
-        this->byteStart = byteStart;
-        this->charEnd = charEnd;
-    }
-
-    // Fallback the current byte by sticking it into the remaining char buffer.
-    // This can only be called by our encodings (other have to use the public fallback methods), so
-    // we can use our DecoderNLS here too (except we don't).
-    // Returns true if we are successful, false if we can't fallback the character (no buffer space)
-    // So caller needs to throw buffer space if return false.
-    // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
-    // array, and we might need the index, hence the byte*
-    // Don't touch ref chars unless we succeed
-    virtual bool InternalFallback(unsigned char bytes[], unsigned char* pBytes, char16_t** chars, int size)
-    {
-
-        ContractAssert(byteStart != nullptr)
-
-        bool fallbackResult = this->Fallback(bytes, (int)(pBytes - byteStart - size), size);
-        if (errno) return false;
-
-        // See if there's a fallback character and we have an output buffer then copy our string.
-        if (fallbackResult)
-        {
-            // Copy the chars to our output
-            char16_t ch;
-            char16_t* charTemp = *chars;
-            bool bHighSurrogate = false;
-            while ((ch = GetNextChar()) != 0)
-            {
-                // Make sure no mixed up surrogates
-                if (Char::IsSurrogate(ch))
-                {
-                    if (Char::IsHighSurrogate(ch))
-                    {
-                        // High Surrogate
-                        ContractAssert(!bHighSurrogate)
-                        bHighSurrogate = true;
-                    }
-                    else
-                    {
-                        // Low surrogate
-                        ContractAssert(bHighSurrogate)
-                        bHighSurrogate = false;
-                    }
-                }
-
-                if (charTemp >= charEnd)
-                {
-                    // No buffer space
-                    return false;
-                }
-
-                *(charTemp++) = ch;
-            }
-
-            // Need to make sure that bHighSurrogate isn't true
-            ContractAssert(!bHighSurrogate)
-
-            // Now we aren't going to be false, so its OK to update chars
-            *chars = charTemp;
-        }
-
-        return true;
-    }
-
-    // This version just counts the fallback and doesn't actually copy anything.
-    virtual int InternalFallback(unsigned char bytes[], unsigned char* pBytes, int size)
-        // Right now this has both bytes[] and unsigned char* bytes, since we might have extra bytes, hence the
-        // array, and we might need the index, hence the byte*
-    {
-
-        ContractAssert(byteStart != nullptr)
-
-        bool fallbackResult = this->Fallback(bytes, (int)(pBytes - byteStart - size), size);
-        if (errno) return 0;
-
-        // See if there's a fallback character and we have an output buffer then copy our string.
-        if (fallbackResult)
-        {
-            int count = 0;
-
-            char16_t ch;
-            bool bHighSurrogate = false;
-            while ((ch = GetNextChar()) != 0)
-            {
-                // Make sure no mixed up surrogates
-                if (Char::IsSurrogate(ch))
-                {
-                    if (Char::IsHighSurrogate(ch))
-                    {
-                        // High Surrogate
-                        ContractAssert(!bHighSurrogate)
-                        bHighSurrogate = true;
-                    }
-                    else
-                    {
-                        // Low surrogate
-                        ContractAssert(bHighSurrogate)
-                        bHighSurrogate = false;
-                    }
-                }
-
-                count++;
-            }
-
-            // Need to make sure that bHighSurrogate isn't true
-            ContractAssert(!bHighSurrogate)
-
-            return count;
-        }
-
-        // If no fallback return 0
-        return 0;
-    }
-};
-
-class DecoderReplacementFallbackBuffer : public DecoderFallbackBuffer
-{
-    // Store our default string
-    char16_t strDefault[2];
-    int strDefaultLength;
-    int fallbackCount = -1;
-    int fallbackIndex = -1;
-
-public:
-    // Construction
-    DecoderReplacementFallbackBuffer(DecoderReplacementFallback* fallback)
-    {
-        wcscpy_s(strDefault, ARRAY_SIZE(strDefault), fallback->GetDefaultString());
-        strDefaultLength = wcslen((const char16_t *)fallback->GetDefaultString());
-    }
-
-    // Fallback Methods
-    virtual bool Fallback(unsigned char bytesUnknown[], int index, int size)
-    {
-        // We expect no previous fallback in our buffer
-        // We can't call recursively but others might (note, we don't test on last char!!!)
-        ContractAssert(fallbackCount < 1)
-
-        // Go ahead and get our fallback
-        if (strDefaultLength == 0)
-            return false;
-
-        fallbackCount = strDefaultLength;
-        fallbackIndex = -1;
-
-        return true;
-    }
-
-    virtual char16_t GetNextChar()
-    {
-        // We want it to get < 0 because == 0 means that the current/last character is a fallback
-        // and we need to detect recursion.  We could have a flag but we already have this counter.
-        fallbackCount--;
-        fallbackIndex++;
-
-        // Do we have anything left? 0 is now last fallback char, negative is nothing left
-        if (fallbackCount < 0)
-            return '\0';
-
-        // Need to get it out of the buffer.
-        // Make sure it didn't wrap from the fast count-- path
-        if (fallbackCount == INT_MAX)
-        {
-            fallbackCount = -1;
-            return '\0';
-        }
-
-        // Now make sure its in the expected range
-        ContractAssert(fallbackIndex < strDefaultLength && fallbackIndex >= 0)
-
-        return strDefault[fallbackIndex];
-    }
-
-    virtual bool MovePrevious()
-    {
-        // Back up one, only if we just processed the last character (or earlier)
-        if (fallbackCount >= -1 && fallbackIndex >= 0)
-        {
-            fallbackIndex--;
-            fallbackCount++;
-            return true;
-        }
-
-        // Return false 'cause we couldn't do it.
-        return false;
-    }
-
-    // How many characters left to output?
-    virtual int GetRemaining()
-    {
-        // Our count is 0 for 1 character left.
-        return (fallbackCount < 0) ? 0 : fallbackCount;
-    }
-
-    // Clear the buffer
-    virtual void Reset()
-    {
-        fallbackCount = -1;
-        fallbackIndex = -1;
-        byteStart = nullptr;
-    }
-
-    // This version just counts the fallback and doesn't actually copy anything.
-    virtual int InternalFallback(unsigned char bytes[], unsigned char* pBytes, int size)
-        // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
-        // array, and we might need the index, hence the byte*
-    {
-        // return our replacement string Length
-        return strDefaultLength;
-    }
-};
-
-class DecoderExceptionFallbackBuffer : public DecoderFallbackBuffer
-{
-public:
-    DecoderExceptionFallbackBuffer()
-    {
-    }
-
-    virtual bool Fallback(unsigned char bytesUnknown[], int index, int size)
-    {
-        ContractAssert(false)
-    }
-
-    virtual char16_t GetNextChar()
-    {
-        return 0;
-    }
-
-    virtual bool MovePrevious()
-    {
-        // Exception fallback doesn't have anywhere to back up to.
-        return false;
-    }
-
-    // Exceptions are always empty
-    virtual int GetRemaining()
-    {
-        return 0;
-    }
-
-};
-
-class DecoderExceptionFallback : public DecoderFallback
-{
-    // Construction
-public:
-    DecoderExceptionFallback()
-    {
-    }
-
-    virtual DecoderFallbackBuffer* CreateFallbackBuffer()
-    {
-        DecoderExceptionFallbackBuffer* pMem = (DecoderExceptionFallbackBuffer*)malloc(sizeof(DecoderExceptionFallbackBuffer));
-        if (pMem == nullptr)
-        {
-            errno = ERROR_INSUFFICIENT_BUFFER;
-            return nullptr;
-        }
-        return new (pMem) DecoderExceptionFallbackBuffer();
-    }
-
-    // Maximum number of characters that this instance of this fallback could return
-    virtual int GetMaxCharCount()
-    {
-        return 0;
-    }
-};
-
-DecoderFallbackBuffer* DecoderReplacementFallback::CreateFallbackBuffer()
-{
-    DecoderReplacementFallbackBuffer* pMem = (DecoderReplacementFallbackBuffer*)malloc(sizeof(DecoderReplacementFallbackBuffer));
-    if (pMem == nullptr)
-    {
-        errno = ERROR_INSUFFICIENT_BUFFER;
-        return nullptr;
-    }
-    pMem = new (pMem) DecoderReplacementFallbackBuffer(this);
-    if (errno)
-    {
-        free(pMem);
-        return nullptr;
-    }
-    return pMem;
-}
-
-class EncoderFallbackBuffer;
-
-class EncoderFallback
-{
-public:
-
-    // Fallback
-    //
-    // Return the appropriate unicode string alternative to the character that need to fall back.
-
-    virtual EncoderFallbackBuffer* CreateFallbackBuffer() { assert(!"pure virtual function called"); while(true); }
-
-    // Maximum number of characters that this instance of this fallback could return
-    virtual int GetMaxCharCount() { assert(!"pure virtual function called"); while(true); }
-};
-
-class EncoderReplacementFallback : public EncoderFallback
-{
-    // Our variables
-    char16_t strDefault[2];
-    int strDefaultLength;
-
-public:
-    // Construction.  Default replacement fallback uses no best fit and ? replacement string
-    EncoderReplacementFallback() : EncoderReplacementFallback(W("?"))
-    {
-    }
-
-    EncoderReplacementFallback(const char16_t* replacement)
-    {
-        // Must not be null
-        ContractAssertVoid(replacement != nullptr)
-
-        // Make sure it doesn't have bad surrogate pairs
-        bool bFoundHigh = false;
-        int replacementLength = wcslen((const char16_t *)replacement);
-        for (int i = 0; i < replacementLength; i++)
-        {
-            // Found a surrogate?
-            if (Char::IsSurrogate(replacement, i))
-            {
-                // High or Low?
-                if (Char::IsHighSurrogate(replacement, i))
-                {
-                    // if already had a high one, stop
-                    if (bFoundHigh)
-                        break;  // break & throw at the bFoundHIgh below
-                    bFoundHigh = true;
-                }
-                else
-                {
-                    // Low, did we have a high?
-                    if (!bFoundHigh)
-                    {
-                        // Didn't have one, make if fail when we stop
-                        bFoundHigh = true;
-                        break;
-                    }
-
-                    // Clear flag
-                    bFoundHigh = false;
-                }
-            }
-            // If last was high we're in trouble (not surrogate so not low surrogate, so break)
-            else if (bFoundHigh)
-                break;
-        }
-        ContractAssertVoid(!bFoundHigh)
-
-        wcscpy_s(strDefault, ARRAY_SIZE(strDefault), replacement);
-        strDefaultLength = replacementLength;
-    }
-
-    char16_t* GetDefaultString()
-    {
-        return strDefault;
-    }
-
-    virtual EncoderFallbackBuffer* CreateFallbackBuffer();
-
-    // Maximum number of characters that this instance of this fallback could return
-    virtual int GetMaxCharCount()
-    {
-        return strDefaultLength;
-    }
-};
-
-class EncoderFallbackBuffer
-{
-    friend class UTF8Encoding;
-    // Most implementations will probably need an implementation-specific constructor
-
-    // Public methods that cannot be overridden that let us do our fallback thing
-    // These wrap the internal methods so that we can check for people doing stuff that is incorrect
-
-public:
-    virtual bool Fallback(char16_t charUnknown, int index) { assert(!"pure virtual function called"); while(true); }
-
-    virtual bool Fallback(char16_t charUnknownHigh, char16_t charUnknownLow, int index) { assert(!"pure virtual function called"); while(true); }
-
-    // Get next character
-    virtual char16_t GetNextChar() { assert(!"pure virtual function called"); while(true); }
-
-    // Back up a character
-    virtual bool MovePrevious() { assert(!"pure virtual function called"); while(true); }
-
-    // How many chars left in this fallback?
-    virtual int GetRemaining() { assert(!"pure virtual function called"); while(true); }
-
-    // Not sure if this should be public or not.
-    // Clear the buffer
-    virtual void Reset()
-    {
-        while (GetNextChar() != (char16_t)0);
-    }
-
-    // Internal items to help us figure out what we're doing as far as error messages, etc.
-    // These help us with our performance and messages internally
-protected:
-    char16_t*          charStart;
-    char16_t*          charEnd;
-    bool            setEncoder;
-    bool            bUsedEncoder;
-    bool            bFallingBack = false;
-    int             iRecursionCount = 0;
-    static const int iMaxRecursion = 250;
-
-    // Internal Reset
-    // For example, what if someone fails a conversion and wants to reset one of our fallback buffers?
-    void InternalReset()
-    {
-        charStart = nullptr;
-        bFallingBack = false;
-        iRecursionCount = 0;
-        Reset();
-    }
-
-    // Set the above values
-    // This can't be part of the constructor because EncoderFallbacks would have to know how to implement these.
-    void InternalInitialize(char16_t* charStart, char16_t* charEnd, bool setEncoder)
-    {
-        this->charStart = charStart;
-        this->charEnd = charEnd;
-        this->setEncoder = setEncoder;
-        this->bUsedEncoder = false;
-        this->bFallingBack = false;
-        this->iRecursionCount = 0;
-    }
-
-    char16_t InternalGetNextChar()
-    {
-        char16_t ch = GetNextChar();
-        bFallingBack = (ch != 0);
-        if (ch == 0) iRecursionCount = 0;
-        return ch;
-    }
-
-    // Fallback the current character using the remaining buffer and encoder if necessary
-    // This can only be called by our encodings (other have to use the public fallback methods), so
-    // we can use our EncoderNLS here too.
-    // setEncoder is true if we're calling from a GetBytes method, false if we're calling from a GetByteCount
-    //
-    // Note that this could also change the contents of this->encoder, which is the same
-    // object that the caller is using, so the caller could mess up the encoder for us
-    // if they aren't careful.
-    virtual bool InternalFallback(char16_t ch, char16_t** chars)
-    {
-        // Shouldn't have null charStart
-        ContractAssert(charStart != nullptr)
-
-        // Get our index, remember chars was preincremented to point at next char, so have to -1
-        int index = (int)(*chars - charStart) - 1;
-
-        // See if it was a high surrogate
-        if (Char::IsHighSurrogate(ch))
-        {
-            // See if there's a low surrogate to go with it
-            if (*chars >= this->charEnd)
-            {
-                // Nothing left in input buffer
-                // No input, return 0
-            }
-            else
-            {
-                // Might have a low surrogate
-                char16_t cNext = **chars;
-                if (Char::IsLowSurrogate(cNext))
-                {
-                    // If already falling back then fail
-                    ContractAssert(!bFallingBack || iRecursionCount++ <= iMaxRecursion)
-
-                    // Next is a surrogate, add it as surrogate pair, and increment chars
-                    (*chars)++;
-                    bFallingBack = Fallback(ch, cNext, index);
-                    return bFallingBack;
-                }
-
-                // Next isn't a low surrogate, just fallback the high surrogate
-            }
-        }
-
-        // If already falling back then fail
-        ContractAssert(!bFallingBack || iRecursionCount++ <= iMaxRecursion)
-
-        // Fall back our char
-        bFallingBack = Fallback(ch, index);
-
-        return bFallingBack;
-    }
-};
-
-class EncoderReplacementFallbackBuffer : public EncoderFallbackBuffer
-{
-    // Store our default string
-    char16_t strDefault[4];
-    int strDefaultLength;
-    int fallbackCount = -1;
-    int fallbackIndex = -1;
-public:
-    // Construction
-    EncoderReplacementFallbackBuffer(EncoderReplacementFallback* fallback)
-    {
-        // 2X in case we're a surrogate pair
-        wcscpy_s(strDefault, ARRAY_SIZE(strDefault), fallback->GetDefaultString());
-        wcscat_s(strDefault, ARRAY_SIZE(strDefault), fallback->GetDefaultString());
-        strDefaultLength = 2 * wcslen((const char16_t *)fallback->GetDefaultString());
-
-    }
-
-    // Fallback Methods
-    virtual bool Fallback(char16_t charUnknown, int index)
-    {
-        // If we had a buffer already we're being recursive, throw, it's probably at the suspect
-        // character in our array.
-        ContractAssert(fallbackCount < 1)
-
-        // Go ahead and get our fallback
-        // Divide by 2 because we aren't a surrogate pair
-        fallbackCount = strDefaultLength / 2;
-        fallbackIndex = -1;
-
-        return fallbackCount != 0;
-    }
-
-    virtual bool Fallback(char16_t charUnknownHigh, char16_t charUnknownLow, int index)
-    {
-        // Double check input surrogate pair
-        ContractAssert(Char::IsHighSurrogate(charUnknownHigh))
-        ContractAssert(Char::IsLowSurrogate(charUnknownLow))
-
-        // If we had a buffer already we're being recursive, throw, it's probably at the suspect
-        // character in our array.
-        ContractAssert(fallbackCount < 1)
-
-        // Go ahead and get our fallback
-        fallbackCount = strDefaultLength;
-        fallbackIndex = -1;
-
-        return fallbackCount != 0;
-    }
-
-    virtual char16_t GetNextChar()
-    {
-        // We want it to get < 0 because == 0 means that the current/last character is a fallback
-        // and we need to detect recursion.  We could have a flag but we already have this counter.
-        fallbackCount--;
-        fallbackIndex++;
-
-        // Do we have anything left? 0 is now last fallback char, negative is nothing left
-        if (fallbackCount < 0)
-            return '\0';
-
-        // Need to get it out of the buffer.
-        // Make sure it didn't wrap from the fast count-- path
-        if (fallbackCount == INT_MAX)
-        {
-            fallbackCount = -1;
-            return '\0';
-        }
-
-        // Now make sure its in the expected range
-        ContractAssert(fallbackIndex < strDefaultLength && fallbackIndex >= 0)
-
-        return strDefault[fallbackIndex];
-    }
-
-    virtual bool MovePrevious()
-    {
-        // Back up one, only if we just processed the last character (or earlier)
-        if (fallbackCount >= -1 && fallbackIndex >= 0)
-        {
-            fallbackIndex--;
-            fallbackCount++;
-            return true;
-        }
-
-        // Return false 'cause we couldn't do it.
-        return false;
-    }
-
-    // How many characters left to output?
-    virtual int GetRemaining()
-    {
-        // Our count is 0 for 1 character left.
-        return (fallbackCount < 0) ? 0 : fallbackCount;
-    }
-
-    // Clear the buffer
-    virtual void Reset()
-    {
-        fallbackCount = -1;
-        fallbackIndex = 0;
-        charStart = nullptr;
-        bFallingBack = false;
-    }
-};
-
-class EncoderExceptionFallbackBuffer : public EncoderFallbackBuffer
-{
-public:
-    EncoderExceptionFallbackBuffer()
-    {
-    }
-
-    virtual bool Fallback(char16_t charUnknown, int index)
-    {
-        // Fall back our char
-        ContractAssert(false)
-    }
-
-    virtual bool Fallback(char16_t charUnknownHigh, char16_t charUnknownLow, int index)
-    {
-        ContractAssert(Char::IsHighSurrogate(charUnknownHigh))
-        ContractAssert(Char::IsLowSurrogate(charUnknownLow))
-
-        //int iTemp = Char::ConvertToUtf32(charUnknownHigh, charUnknownLow);
-
-        // Fall back our char
-        ContractAssert(false)
-    }
-
-    virtual char16_t GetNextChar()
-    {
-        return 0;
-    }
-
-    virtual bool MovePrevious()
-    {
-        // Exception fallback doesn't have anywhere to back up to.
-        return false;
-    }
-
-    // Exceptions are always empty
-    virtual int GetRemaining()
-    {
-        return 0;
-    }
-};
-
-class EncoderExceptionFallback : public EncoderFallback
-{
-    // Construction
-public:
-    EncoderExceptionFallback()
-    {
-    }
-
-    virtual EncoderFallbackBuffer* CreateFallbackBuffer()
-    {
-        EncoderExceptionFallbackBuffer* pMem = (EncoderExceptionFallbackBuffer*)malloc(sizeof(EncoderExceptionFallbackBuffer));
-        if (pMem == nullptr)
-            return nullptr;
-        return new (pMem) EncoderExceptionFallbackBuffer();
-    }
-
-    // Maximum number of characters that this instance of this fallback could return
-    virtual int GetMaxCharCount()
-    {
-        return 0;
-    }
-};
-
-EncoderFallbackBuffer* EncoderReplacementFallback::CreateFallbackBuffer()
-{
-    EncoderReplacementFallbackBuffer* pMem = (EncoderReplacementFallbackBuffer*)malloc(sizeof(EncoderReplacementFallbackBuffer));
-    if (pMem == nullptr)
-    {
-        errno = ERROR_INSUFFICIENT_BUFFER;
-        return nullptr;
-    }
-    return new (pMem) EncoderReplacementFallbackBuffer(this);
-}
-
-class UTF8Encoding
-{
-    EncoderFallback* encoderFallback;
-    // Instances of the two possible fallbacks. The constructor parameter
-    // determines which one to use.
-    EncoderReplacementFallback encoderReplacementFallback;
-    EncoderExceptionFallback encoderExceptionFallback;
-
-    DecoderFallback* decoderFallback;
-    // Instances of the two possible fallbacks. The constructor parameter
-    // determines which one to use.
-    DecoderReplacementFallback decoderReplacementFallback;
-    DecoderExceptionFallback decoderExceptionFallback;
-
-#if BIGENDIAN
-    bool treatAsLE;
-#endif
-
-    bool InRange(int c, int begin, int end)
-    {
-        return begin <= c && c <= end;
-    }
-
-    size_t PtrDiff(char16_t* ptr1, char16_t* ptr2)
-    {
-        return ptr1 - ptr2;
-    }
-
-    size_t PtrDiff(unsigned char* ptr1, unsigned char* ptr2)
-    {
-        return ptr1 - ptr2;
-    }
-
-    // During GetChars we had an invalid byte sequence
-    // pSrc is backed up to the start of the bad sequence if we didn't have room to
-    // fall it back.  Otherwise pSrc remains where it is.
-    bool FallbackInvalidByteSequence(unsigned char** pSrc, int ch, DecoderFallbackBuffer* fallback, char16_t** pTarget)
-    {
-        // Get our byte[]
-        unsigned char* pStart = *pSrc;
-        unsigned char bytesUnknown[3];
-        int size = GetBytesUnknown(pStart, ch, bytesUnknown);
-        bool fallbackResult = fallback->InternalFallback(bytesUnknown, *pSrc, pTarget, size);
-        RETURN_ON_ERROR
-
-        // Do the actual fallback
-        if (!fallbackResult)
-        {
-            // Oops, it failed, back up to pStart
-            *pSrc = pStart;
-            return false;
-        }
-
-        // It worked
-        return true;
-    }
-
-    int FallbackInvalidByteSequence(unsigned char* pSrc, int ch, DecoderFallbackBuffer *fallback)
-    {
-        // Get our byte[]
-        unsigned char bytesUnknown[3];
-        int size = GetBytesUnknown(pSrc, ch, bytesUnknown);
-
-        // Do the actual fallback
-        int count = fallback->InternalFallback(bytesUnknown, pSrc, size);
-
-        // # of fallback chars expected.
-        // Note that we only get here for "long" sequences, and have already unreserved
-        // the count that we prereserved for the input bytes
-        return count;
-    }
-
-    int GetBytesUnknown(unsigned char* pSrc, int ch, unsigned char* bytesUnknown)
-    {
-        int size;
-
-        // See if it was a plain char
-        // (have to check >= 0 because we have all sorts of weird bit flags)
-        if (ch < 0x100 && ch >= 0)
-        {
-            pSrc--;
-            bytesUnknown[0] = (unsigned char)ch;
-            size =  1;
-        }
-        // See if its an unfinished 2 byte sequence
-        else if ((ch & (SupplimentarySeq | ThreeByteSeq)) == 0)
-        {
-            pSrc--;
-            bytesUnknown[0] = (unsigned char)((ch & 0x1F) | 0xc0);
-            size = 1;
-        }
-        // So now we're either 2nd byte of 3 or 4 byte sequence or
-        // we hit a non-trail byte or we ran out of space for 3rd byte of 4 byte sequence
-        // 1st check if its a 4 byte sequence
-        else if ((ch & SupplimentarySeq) != 0)
-        {
-            //  3rd byte of 4 byte sequence?
-            if ((ch & (FinalByte >> 6)) != 0)
-            {
-                // 3rd byte of 4 byte sequence
-                pSrc -= 3;
-                bytesUnknown[0] = (unsigned char)(((ch >> 12) & 0x07) | 0xF0);
-                bytesUnknown[1] = (unsigned char)(((ch >> 6) & 0x3F) | 0x80);
-                bytesUnknown[2] = (unsigned char)(((ch)& 0x3F) | 0x80);
-                size = 3;
-            }
-            else if ((ch & (FinalByte >> 12)) != 0)
-            {
-                // 2nd byte of a 4 byte sequence
-                pSrc -= 2;
-                bytesUnknown[0] = (unsigned char)(((ch >> 6) & 0x07) | 0xF0);
-                bytesUnknown[1] = (unsigned char)(((ch)& 0x3F) | 0x80);
-                size = 2;
-            }
-            else
-            {
-                // 4th byte of a 4 byte sequence
-                pSrc--;
-                bytesUnknown[0] = (unsigned char)(((ch)& 0x07) | 0xF0);
-                size = 1;
-            }
-        }
-        else
-        {
-            // 2nd byte of 3 byte sequence?
-            if ((ch & (FinalByte >> 6)) != 0)
-            {
-                // So its 2nd byte of a 3 byte sequence
-                pSrc -= 2;
-                bytesUnknown[0] = (unsigned char)(((ch >> 6) & 0x0F) | 0xE0);
-                bytesUnknown[1] = (unsigned char)(((ch)& 0x3F) | 0x80);
-                size = 2;
-            }
-            else
-            {
-                // 1st byte of a 3 byte sequence
-                pSrc--;
-                bytesUnknown[0] = (unsigned char)(((ch)& 0x0F) | 0xE0);
-                size = 1;
-            }
-        }
-
-        return size;
-    }
-
-public:
-
-    UTF8Encoding(bool isThrowException, bool treatAsLE)
-        : encoderReplacementFallback(W("\xFFFD")), decoderReplacementFallback(W("\xFFFD"))
-#if BIGENDIAN
-        , treatAsLE(treatAsLE)
-#endif
-    {
-        if (isThrowException)
-        {
-            encoderFallback = &encoderExceptionFallback;
-            decoderFallback = &decoderExceptionFallback;
-        }
-        else
-        {
-            encoderFallback = &encoderReplacementFallback;
-            decoderFallback = &decoderReplacementFallback;
-        }
-    }
-
-    // These are bitmasks used to maintain the state in the decoder. They occupy the higher bits
-    // while the actual character is being built in the lower bits. They are shifted together
-    // with the actual bits of the character.
-
-    // bits 30 & 31 are used for pending bits fixup
-    const int FinalByte = 1 << 29;
-    const int SupplimentarySeq = 1 << 28;
-    const int ThreeByteSeq = 1 << 27;
-
-    int GetCharCount(unsigned char* bytes, int count)
-    {
-        ContractAssert(bytes != nullptr)
-        ContractAssert(count >= 0)
-
-        // Initialize stuff
-        unsigned char *pSrc = bytes;
-        unsigned char *pEnd = pSrc + count;
-
-        // Start by assuming we have as many as count, charCount always includes the adjustment
-        // for the character being decoded
-        int charCount = count;
-        int ch = 0;
-        DecoderFallbackBuffer *fallback = nullptr;
-
-        while (true)
-        {
-            // SLOWLOOP: does all range checks, handles all special cases, but it is slow
-            if (pSrc >= pEnd) {
-                break;
-            }
-
-            // read next byte. The JIT optimization seems to be getting confused when
-            // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
-            int cha = *pSrc;
-
-            if (ch == 0) {
-                // no pending bits
-                goto ReadChar;
-            }
-
-            pSrc++;
-
-            // we are expecting to see trailing bytes like 10vvvvvv
-            if ((cha & 0xC0) != 0x80) {
-                // This can be a valid starting byte for another UTF8 byte sequence, so let's put
-                // the current byte back, and try to see if this is a valid byte for another UTF8 byte sequence
-                pSrc--;
-                charCount += (ch >> 30);
-                goto InvalidByteSequence;
-            }
-
-            // fold in the new byte
-            ch = (ch << 6) | (cha & 0x3F);
-
-            if ((ch & FinalByte) == 0) {
-                ContractAssertFreeFallback((ch & (SupplimentarySeq | ThreeByteSeq)) != 0)
-
-                if ((ch & SupplimentarySeq) != 0) {
-                    if ((ch & (FinalByte >> 6)) != 0) {
-                        // this is 3rd byte (of 4 byte supplimentary) - nothing to do
-                        continue;
-                    }
-
-                    // 2nd byte, check for non-shortest form of supplimentary char and the valid
-                    // supplimentary characters in range 0x010000 - 0x10FFFF at the same time
-                    if (!InRange(ch & 0x1F0, 0x10, 0x100)) {
-                        goto InvalidByteSequence;
-                    }
-                }
-                else {
-                    // Must be 2nd byte of a 3-byte sequence
-                    // check for non-shortest form of 3 byte seq
-                    if ((ch & (0x1F << 5)) == 0 ||                  // non-shortest form
-                        (ch & (0xF800 >> 6)) == (0xD800 >> 6))     // illegal individually encoded surrogate
-                    {
-                        goto InvalidByteSequence;
-                    }
-                }
-                continue;
-            }
-
-            // ready to punch
-
-            // adjust for surrogates in non-shortest form
-            if ((ch & (SupplimentarySeq | 0x1F0000)) == SupplimentarySeq) {
-                charCount--;
-            }
-            goto EncodeChar;
-
-        InvalidByteSequence:
-            // this code fragment should be close to the gotos referencing it
-            // Have to do fallback for invalid bytes
-            if (fallback == nullptr)
-            {
-                fallback = decoderFallback->CreateFallbackBuffer();
-                RETURN_ON_ERROR
-                fallback->InternalInitialize(bytes, nullptr);
-            }
-            charCount += FallbackInvalidByteSequence(pSrc, ch, fallback);
-
-            ch = 0;
-            continue;
-
-        ReadChar:
-            ch = *pSrc;
-            pSrc++;
-
-        ProcessChar:
-            if (ch > 0x7F) {
-                // If its > 0x7F, its start of a new multi-byte sequence
-
-                // Long sequence, so unreserve our char.
-                charCount--;
-
-                // bit 6 has to be non-zero for start of multibyte chars.
-                if ((ch & 0x40) == 0) {
-                    // Unexpected trail byte
-                    goto InvalidByteSequence;
-                }
-
-                // start a new long code
-                if ((ch & 0x20) != 0) {
-                    if ((ch & 0x10) != 0) {
-                        // 4 byte encoding - supplimentary character (2 surrogates)
-
-                        ch &= 0x0F;
-
-                        // check that bit 4 is zero and the valid supplimentary character
-                        // range 0x000000 - 0x10FFFF at the same time
-                        if (ch > 0x04) {
-                            ch |= 0xf0;
-                            goto InvalidByteSequence;
-                        }
-
-                        // Add bit flags so that when we check new characters & rotate we'll be flagged correctly.
-                        // Final byte flag, count fix if we don't make final byte & supplimentary sequence flag.
-                        ch |= (FinalByte >> 3 * 6) |  // Final byte is 3 more bytes from now
-                            (1 << 30) |           // If it dies on next byte we'll need an extra char
-                            (3 << (30 - 2 * 6)) |     // If it dies on last byte we'll need to subtract a char
-                            (SupplimentarySeq) | (SupplimentarySeq >> 6) |
-                            (SupplimentarySeq >> 2 * 6) | (SupplimentarySeq >> 3 * 6);
-
-                        // Our character count will be 2 characters for these 4 bytes, so subtract another char
-                        charCount--;
-                    }
-                    else {
-                        // 3 byte encoding
-                        // Add bit flags so that when we check new characters & rotate we'll be flagged correctly.
-                        ch = (ch & 0x0F) | ((FinalByte >> 2 * 6) | (1 << 30) |
-                            (ThreeByteSeq) | (ThreeByteSeq >> 6) | (ThreeByteSeq >> 2 * 6));
-
-                        // We'll expect 1 character for these 3 bytes, so subtract another char.
-                        charCount--;
-                    }
-                }
-                else {
-                    // 2 byte encoding
-
-                    ch &= 0x1F;
-
-                    // check for non-shortest form
-                    if (ch <= 1) {
-                        ch |= 0xc0;
-                        goto InvalidByteSequence;
-                    }
-
-                    // Add bit flags so we'll be flagged correctly
-                    ch |= (FinalByte >> 6);
-                }
-                continue;
-            }
-
-        EncodeChar:
-
-#ifdef FASTLOOP
-            int availableBytes = PtrDiff(pEnd, pSrc);
-
-            // don't fall into the fast decoding loop if we don't have enough bytes
-            if (availableBytes <= 13) {
-                // try to get over the remainder of the ascii characters fast though
-            unsigned char* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
-                while (pSrc < pLocalEnd) {
-                    ch = *pSrc;
-                    pSrc++;
-
-                    if (ch > 0x7F)
-                        goto ProcessChar;
-                }
-                // we are done
-                ch = 0;
-                break;
-            }
-
-            // To compute the upper bound, assume that all characters are ASCII characters at this point,
-            //  the boundary will be decreased for every non-ASCII character we encounter
-            // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
-            unsigned char *pStop = pSrc + availableBytes - 7;
-
-            while (pSrc < pStop) {
-                ch = *pSrc;
-                pSrc++;
-
-                if (ch > 0x7F) {
-                    goto LongCode;
-                }
-
-                // get pSrc 2-byte aligned
-                if (((size_t)pSrc & 0x1) != 0) {
-                    ch = *pSrc;
-                    pSrc++;
-                    if (ch > 0x7F) {
-                        goto LongCode;
-                    }
-                }
-
-                // get pSrc 4-byte aligned
-                if (((size_t)pSrc & 0x2) != 0) {
-                    ch = *(unsigned short*)pSrc;
-                    if ((ch & 0x8080) != 0) {
-                        goto LongCodeWithMask16;
-                    }
-                    pSrc += 2;
-                }
-
-
-                // Run 8 + 8 characters at a time!
-                while (pSrc < pStop) {
-                    ch = *(int*)pSrc;
-                    int chb = *(int*)(pSrc + 4);
-                    if (((ch | chb) & (int)0x80808080) != 0) {
-                        goto LongCodeWithMask32;
-                    }
-                    pSrc += 8;
-
-                    // This is a really small loop - unroll it
-                    if (pSrc >= pStop)
-                        break;
-
-                    ch = *(int*)pSrc;
-                    chb = *(int*)(pSrc + 4);
-                    if (((ch | chb) & (int)0x80808080) != 0) {
-                        goto LongCodeWithMask32;
-                    }
-                    pSrc += 8;
-                }
-                break;
-
-            LongCodeWithMask32 :
-#if BIGENDIAN
-            // be careful about the sign extension
-            if (!treatAsLE) ch = (int)(((unsigned int)ch) >> 16);
-            else
-#else
-                ch &= 0xFF;
-#endif
-
-            LongCodeWithMask16:
-#if BIGENDIAN
-            if (!treatAsLE) ch = (int)(((unsigned int)ch) >> 8);
-            else
-#else
-                ch &= 0xFF;
-#endif
-
-            pSrc++;
-            if (ch <= 0x7F) {
-                continue;
-            }
-
-            LongCode:
-                int chc = *pSrc;
-                pSrc++;
-
-                if (
-                    // bit 6 has to be zero
-                    (ch & 0x40) == 0 ||
-                    // we are expecting to see trailing bytes like 10vvvvvv
-                    (chc & 0xC0) != 0x80)
-                {
-                    goto BadLongCode;
-                }
-
-                chc &= 0x3F;
-
-                // start a new long code
-                if ((ch & 0x20) != 0) {
-
-                    // fold the first two bytes together
-                    chc |= (ch & 0x0F) << 6;
-
-                    if ((ch & 0x10) != 0) {
-                        // 4 byte encoding - surrogate
-                        ch = *pSrc;
-                        if (
-                            // check that bit 4 is zero, the non-shortest form of surrogate
-                            // and the valid surrogate range 0x000000 - 0x10FFFF at the same time
-                            !InRange(chc >> 4, 0x01, 0x10) ||
-                            // we are expecting to see trailing bytes like 10vvvvvv
-                            (ch & 0xC0) != 0x80)
-                        {
-                            goto BadLongCode;
-                        }
-
-                        chc = (chc << 6) | (ch & 0x3F);
-
-                        ch = *(pSrc + 1);
-                        // we are expecting to see trailing bytes like 10vvvvvv
-                        if ((ch & 0xC0) != 0x80) {
-                            goto BadLongCode;
-                        }
-                        pSrc += 2;
-
-                        // extra byte
-                        charCount--;
-                    }
-                    else {
-                        // 3 byte encoding
-                        ch = *pSrc;
-                        if (
-                            // check for non-shortest form of 3 byte seq
-                            (chc & (0x1F << 5)) == 0 ||
-                            // Can't have surrogates here.
-                            (chc & (0xF800 >> 6)) == (0xD800 >> 6) ||
-                            // we are expecting to see trailing bytes like 10vvvvvv
-                            (ch & 0xC0) != 0x80)
-                        {
-                            goto BadLongCode;
-                        }
-                        pSrc++;
-
-                        // extra byte
-                        charCount--;
-                    }
-                }
-                else {
-                    // 2 byte encoding
-
-                    // check for non-shortest form
-                    if ((ch & 0x1E) == 0) {
-                        goto BadLongCode;
-                    }
-                }
-
-                // extra byte
-                charCount--;
-            }
-#endif // FASTLOOP
-
-            // no pending bits at this point
-            ch = 0;
-            continue;
-
-        BadLongCode:
-            pSrc -= 2;
-            ch = 0;
-            continue;
-        }
-
-        // May have a problem if we have to flush
-        if (ch != 0)
-        {
-            // We were already adjusting for these, so need to unadjust
-            charCount += (ch >> 30);
-            // Have to do fallback for invalid bytes
-            if (fallback == nullptr)
-            {
-                fallback = decoderFallback->CreateFallbackBuffer();
-                RETURN_ON_ERROR
-                fallback->InternalInitialize(bytes, nullptr);
-            }
-            charCount += FallbackInvalidByteSequence(pSrc, ch, fallback);
-        }
-
-        // Shouldn't have anything in fallback buffer for GetCharCount
-        // (don't have to check m_throwOnOverflow for count)
-        ContractAssertFreeFallback(fallback == nullptr || fallback->GetRemaining() == 0)
-
-        free(fallback);
-
-        return charCount;
-
-    }
-
-    int GetChars(unsigned char* bytes, int byteCount, char16_t* chars, int charCount)
-    {
-        ContractAssert(chars != nullptr)
-        ContractAssert(byteCount >= 0)
-        ContractAssert(charCount >= 0)
-        ContractAssert(bytes != nullptr)
-
-        unsigned char *pSrc = bytes;
-        char16_t *pTarget = chars;
-
-        unsigned char *pEnd = pSrc + byteCount;
-        char16_t *pAllocatedBufferEnd = pTarget + charCount;
-
-        int ch = 0;
-
-        DecoderFallbackBuffer *fallback = nullptr;
-
-        while (true)
-        {
-            // SLOWLOOP: does all range checks, handles all special cases, but it is slow
-
-            if (pSrc >= pEnd) {
-                break;
-            }
-
-            // read next byte. The JIT optimization seems to be getting confused when
-            // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
-            int cha = *pSrc;
-
-            if (ch == 0) {
-                // no pending bits
-                goto ReadChar;
-            }
-
-            pSrc++;
-
-            // we are expecting to see trailing bytes like 10vvvvvv
-            if ((cha & 0xC0) != 0x80) {
-                // This can be a valid starting byte for another UTF8 byte sequence, so let's put
-                // the current byte back, and try to see if this is a valid byte for another UTF8 byte sequence
-                pSrc--;
-                goto InvalidByteSequence;
-            }
-
-            // fold in the new byte
-            ch = (ch << 6) | (cha & 0x3F);
-
-            if ((ch & FinalByte) == 0) {
-                // Not at last byte yet
-                ContractAssertFreeFallback((ch & (SupplimentarySeq | ThreeByteSeq)) != 0)
-
-                if ((ch & SupplimentarySeq) != 0) {
-                    // Its a 4-byte supplimentary sequence
-                    if ((ch & (FinalByte >> 6)) != 0) {
-                        // this is 3rd byte of 4 byte sequence - nothing to do
-                        continue;
-                    }
-
-                    // 2nd byte of 4 bytes
-                    // check for non-shortest form of surrogate and the valid surrogate
-                    // range 0x000000 - 0x10FFFF at the same time
-                    if (!InRange(ch & 0x1F0, 0x10, 0x100)) {
-                        goto InvalidByteSequence;
-                    }
-                }
-                else {
-                    // Must be 2nd byte of a 3-byte sequence
-                    // check for non-shortest form of 3 byte seq
-                    if ((ch & (0x1F << 5)) == 0 ||                  // non-shortest form
-                        (ch & (0xF800 >> 6)) == (0xD800 >> 6))     // illegal individually encoded surrogate
-                    {
-                        goto InvalidByteSequence;
-                    }
-                }
-                continue;
-            }
-
-            // ready to punch
-
-            // surrogate in shortest form?
-            // Might be possible to get rid of this?  Already did non-shortest check for 4-byte sequence when reading 2nd byte?
-            if ((ch & (SupplimentarySeq | 0x1F0000)) > SupplimentarySeq) {
-                // let the range check for the second char throw the exception
-                if (pTarget < pAllocatedBufferEnd) {
-                    *pTarget = (char16_t)(((ch >> 10) & 0x7FF) +
-                        (short)((CharUnicodeInfo::HIGH_SURROGATE_START - (0x10000 >> 10))));
-                    pTarget++;
-
-                    ch = (ch & 0x3FF) +
-                        (int)(CharUnicodeInfo::LOW_SURROGATE_START);
-                }
-            }
-
-            goto EncodeChar;
-
-        InvalidByteSequence:
-            // this code fragment should be close to the gotos referencing it
-            // Have to do fallback for invalid bytes
-            if (fallback == nullptr)
-            {
-                fallback = decoderFallback->CreateFallbackBuffer();
-                RETURN_ON_ERROR
-                fallback->InternalInitialize(bytes, pAllocatedBufferEnd);
-            }
-
-            // That'll back us up the appropriate # of bytes if we didn't get anywhere
-            if (!FallbackInvalidByteSequence(&pSrc, ch, fallback, &pTarget))
-            {
-                // Ran out of buffer space
-                // Need to throw an exception?
-                ContractAssertFreeFallback(pSrc >= bytes || pTarget == chars)
-                fallback->InternalReset();
-                if (pTarget == chars)
-                {
-                    errno = ERROR_INSUFFICIENT_BUFFER;
-                    if (fallback) free(fallback);
-                    return 0;
-                }
-                ch = 0;
-                break;
-            }
-            ContractAssert(pSrc >= bytes)
-            ch = 0;
-            continue;
-
-        ReadChar:
-            ch = *pSrc;
-            pSrc++;
-
-        ProcessChar:
-            if (ch > 0x7F) {
-                // If its > 0x7F, its start of a new multi-byte sequence
-
-                // bit 6 has to be non-zero
-                if ((ch & 0x40) == 0) {
-                    goto InvalidByteSequence;
-                }
-
-                // start a new long code
-                if ((ch & 0x20) != 0) {
-                    if ((ch & 0x10) != 0) {
-                        // 4 byte encoding - supplimentary character (2 surrogates)
-
-                        ch &= 0x0F;
-
-                        // check that bit 4 is zero and the valid supplimentary character
-                        // range 0x000000 - 0x10FFFF at the same time
-                        if (ch > 0x04) {
-                            ch |= 0xf0;
-                            goto InvalidByteSequence;
-                        }
-
-                        ch |= (FinalByte >> 3 * 6) | (1 << 30) | (3 << (30 - 2 * 6)) |
-                            (SupplimentarySeq) | (SupplimentarySeq >> 6) |
-                            (SupplimentarySeq >> 2 * 6) | (SupplimentarySeq >> 3 * 6);
-                    }
-                    else {
-                        // 3 byte encoding
-                        ch = (ch & 0x0F) | ((FinalByte >> 2 * 6) | (1 << 30) |
-                            (ThreeByteSeq) | (ThreeByteSeq >> 6) | (ThreeByteSeq >> 2 * 6));
-                    }
-                }
-                else {
-                    // 2 byte encoding
-
-                    ch &= 0x1F;
-
-                    // check for non-shortest form
-                    if (ch <= 1) {
-                        ch |= 0xc0;
-                        goto InvalidByteSequence;
-                    }
-
-                    ch |= (FinalByte >> 6);
-                }
-                continue;
-            }
-
-        EncodeChar:
-            // write the pending character
-            if (pTarget >= pAllocatedBufferEnd)
-            {
-                // Fix chars so we make sure to throw if we didn't output anything
-                ch &= 0x1fffff;
-                if (ch > 0x7f)
-                {
-                    if (ch > 0x7ff)
-                    {
-                        if (ch >= CharUnicodeInfo::LOW_SURROGATE_START &&
-                            ch <= CharUnicodeInfo::LOW_SURROGATE_END)
-                        {
-                            pSrc--;     // It was 4 bytes
-                            pTarget--;  // 1 was stored already, but we can't remember 1/2, so back up
-                        }
-                        else if (ch > 0xffff)
-                        {
-                            pSrc--;     // It was 4 bytes, nothing was stored
-                        }
-                        pSrc--;         // It was at least 3 bytes
-                    }
-                    pSrc--;             // It was at least 2 bytes
-                }
-                pSrc--;
-
-                // Throw that we don't have enough room (pSrc could be < chars if we had started to process
-                // a 4 byte sequence already)
-                ContractAssert(pSrc >= bytes || pTarget == chars)
-                if (pTarget == chars)
-                {
-                    errno = ERROR_INSUFFICIENT_BUFFER;
-                    if (fallback) free(fallback);
-                    return 0;
-                }
-
-                // Don't store ch in decoder, we already backed up to its start
-                ch = 0;
-
-                // Didn't throw, just use this buffer size.
-                break;
-            }
-            *pTarget = (char16_t)ch;
-            pTarget++;
-
-#ifdef FASTLOOP
-            int availableChars = PtrDiff(pAllocatedBufferEnd, pTarget);
-            int availableBytes = PtrDiff(pEnd, pSrc);
-
-            // don't fall into the fast decoding loop if we don't have enough bytes
-            // Test for availableChars is done because pStop would be <= pTarget.
-            if (availableBytes <= 13) {
-                // we may need as many as 1 character per byte
-                if (availableChars < availableBytes) {
-                    // not enough output room.  no pending bits at this point
-                    ch = 0;
-                    continue;
-                }
-
-                // try to get over the remainder of the ascii characters fast though
-                unsigned char* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
-                while (pSrc < pLocalEnd) {
-                    ch = *pSrc;
-                    pSrc++;
-
-                    if (ch > 0x7F)
-                        goto ProcessChar;
-
-                    *pTarget = (char16_t)ch;
-                    pTarget++;
-                }
-                // we are done
-                ch = 0;
-                break;
-            }
-
-            // we may need as many as 1 character per byte, so reduce the byte count if necessary.
-            // If availableChars is too small, pStop will be before pTarget and we won't do fast loop.
-            if (availableChars < availableBytes) {
-                availableBytes = availableChars;
-            }
-
-            // To compute the upper bound, assume that all characters are ASCII characters at this point,
-            //  the boundary will be decreased for every non-ASCII character we encounter
-            // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
-            char16_t *pStop = pTarget + availableBytes - 7;
-
-            while (pTarget < pStop) {
-                ch = *pSrc;
-                pSrc++;
-
-                if (ch > 0x7F) {
-                    goto LongCode;
-                }
-                *pTarget = (char16_t)ch;
-                pTarget++;
-
-                // get pSrc to be 2-byte aligned
-                if ((((size_t)pSrc) & 0x1) != 0) {
-                    ch = *pSrc;
-                    pSrc++;
-                    if (ch > 0x7F) {
-                        goto LongCode;
-                    }
-                    *pTarget = (char16_t)ch;
-                    pTarget++;
-                }
-
-                // get pSrc to be 4-byte aligned
-                if ((((size_t)pSrc) & 0x2) != 0) {
-                    ch = *(unsigned short*)pSrc;
-                    if ((ch & 0x8080) != 0) {
-                        goto LongCodeWithMask16;
-                    }
-
-                    // Unfortunately, this is endianness sensitive
-#if BIGENDIAN
-                    if (!treatAsLE)
-                    {
-                        *pTarget = (char16_t)((ch >> 8) & 0x7F);
-                        pSrc += 2;
-                        *(pTarget + 1) = (char16_t)(ch & 0x7F);
-                        pTarget += 2;
-                    }
-                    else
-#else
-                    {
-                        *pTarget = (char16_t)(ch & 0x7F);
-                        pSrc += 2;
-                        *(pTarget + 1) = (char16_t)((ch >> 8) & 0x7F);
-                        pTarget += 2;
-                    }
-#endif
-                }
-
-                // Run 8 characters at a time!
-                while (pTarget < pStop) {
-                    ch = *(int*)pSrc;
-                    int chb = *(int*)(pSrc + 4);
-                    if (((ch | chb) & (int)0x80808080) != 0) {
-                        goto LongCodeWithMask32;
-                    }
-
-                    // Unfortunately, this is endianness sensitive
-#if BIGENDIAN
-                    if (!treatAsLE)
-                    {
-                        *pTarget = (char16_t)((ch >> 24) & 0x7F);
-                        *(pTarget + 1) = (char16_t)((ch >> 16) & 0x7F);
-                        *(pTarget + 2) = (char16_t)((ch >> 8) & 0x7F);
-                        *(pTarget + 3) = (char16_t)(ch & 0x7F);
-                        pSrc += 8;
-                        *(pTarget + 4) = (char16_t)((chb >> 24) & 0x7F);
-                        *(pTarget + 5) = (char16_t)((chb >> 16) & 0x7F);
-                        *(pTarget + 6) = (char16_t)((chb >> 8) & 0x7F);
-                        *(pTarget + 7) = (char16_t)(chb & 0x7F);
-                        pTarget += 8;
-                    }
-                    else
-#else
-                    {
-                        *pTarget = (char16_t)(ch & 0x7F);
-                        *(pTarget + 1) = (char16_t)((ch >> 8) & 0x7F);
-                        *(pTarget + 2) = (char16_t)((ch >> 16) & 0x7F);
-                        *(pTarget + 3) = (char16_t)((ch >> 24) & 0x7F);
-                        pSrc += 8;
-                        *(pTarget + 4) = (char16_t)(chb & 0x7F);
-                        *(pTarget + 5) = (char16_t)((chb >> 8) & 0x7F);
-                        *(pTarget + 6) = (char16_t)((chb >> 16) & 0x7F);
-                        *(pTarget + 7) = (char16_t)((chb >> 24) & 0x7F);
-                        pTarget += 8;
-                    }
-#endif
-                }
-                break;
-
-                LongCodeWithMask32 :
-#if BIGENDIAN
-                // be careful about the sign extension
-                if (!treatAsLE) ch = (int)(((unsigned int)ch) >> 16);
-                else
-#else
-                ch &= 0xFF;
-#endif
-
-                LongCodeWithMask16:
-#if BIGENDIAN
-                if (!treatAsLE) ch = (int)(((unsigned int)ch) >> 8);
-                else
-#else
-                ch &= 0xFF;
-#endif
-
-                pSrc++;
-                if (ch <= 0x7F) {
-                    *pTarget = (char16_t)ch;
-                    pTarget++;
-                    continue;
-                }
-
-            LongCode:
-                int chc = *pSrc;
-                pSrc++;
-
-                if (
-                    // bit 6 has to be zero
-                    (ch & 0x40) == 0 ||
-                    // we are expecting to see trailing bytes like 10vvvvvv
-                    (chc & 0xC0) != 0x80)
-                {
-                    goto BadLongCode;
-                }
-
-                chc &= 0x3F;
-
-                // start a new long code
-                if ((ch & 0x20) != 0) {
-
-                    // fold the first two bytes together
-                    chc |= (ch & 0x0F) << 6;
-
-                    if ((ch & 0x10) != 0) {
-                        // 4 byte encoding - surrogate
-                        ch = *pSrc;
-                        if (
-                            // check that bit 4 is zero, the non-shortest form of surrogate
-                            // and the valid surrogate range 0x000000 - 0x10FFFF at the same time
-                            !InRange(chc >> 4, 0x01, 0x10) ||
-                            // we are expecting to see trailing bytes like 10vvvvvv
-                            (ch & 0xC0) != 0x80)
-                        {
-                            goto BadLongCode;
-                        }
-
-                        chc = (chc << 6) | (ch & 0x3F);
-
-                        ch = *(pSrc + 1);
-                        // we are expecting to see trailing bytes like 10vvvvvv
-                        if ((ch & 0xC0) != 0x80) {
-                            goto BadLongCode;
-                        }
-                        pSrc += 2;
-
-                        ch = (chc << 6) | (ch & 0x3F);
-
-                        *pTarget = (char16_t)(((ch >> 10) & 0x7FF) +
-                            (short)(CharUnicodeInfo::HIGH_SURROGATE_START - (0x10000 >> 10)));
-                        pTarget++;
-
-                        ch = (ch & 0x3FF) +
-                            (short)(CharUnicodeInfo::LOW_SURROGATE_START);
-
-                        // extra byte, we're already planning 2 chars for 2 of these bytes,
-                        // but the big loop is testing the target against pStop, so we need
-                        // to subtract 2 more or we risk overrunning the input.  Subtract
-                        // one here and one below.
-                        pStop--;
-                    }
-                    else {
-                        // 3 byte encoding
-                        ch = *pSrc;
-                        if (
-                            // check for non-shortest form of 3 byte seq
-                            (chc & (0x1F << 5)) == 0 ||
-                            // Can't have surrogates here.
-                            (chc & (0xF800 >> 6)) == (0xD800 >> 6) ||
-                            // we are expecting to see trailing bytes like 10vvvvvv
-                            (ch & 0xC0) != 0x80)
-                        {
-                            goto BadLongCode;
-                        }
-                        pSrc++;
-
-                        ch = (chc << 6) | (ch & 0x3F);
-
-                        // extra byte, we're only expecting 1 char for each of these 3 bytes,
-                        // but the loop is testing the target (not source) against pStop, so
-                        // we need to subtract 2 more or we risk overrunning the input.
-                        // Subtract 1 here and one more below
-                        pStop--;
-                    }
-                }
-                else {
-                    // 2 byte encoding
-
-                    ch &= 0x1F;
-
-                    // check for non-shortest form
-                    if (ch <= 1) {
-                        goto BadLongCode;
-                    }
-                    ch = (ch << 6) | chc;
-                }
-
-                *pTarget = (char16_t)ch;
-                pTarget++;
-
-                // extra byte, we're only expecting 1 char for each of these 2 bytes,
-                // but the loop is testing the target (not source) against pStop.
-                // subtract an extra count from pStop so that we don't overrun the input.
-                pStop--;
-            }
-#endif // FASTLOOP
-
-            ContractAssert(pTarget <= pAllocatedBufferEnd)
-
-            // no pending bits at this point
-            ch = 0;
-            continue;
-
-        BadLongCode:
-            pSrc -= 2;
-            ch = 0;
-            continue;
-        }
-
-        if (ch != 0)
-        {
-            // Have to do fallback for invalid bytes
-            if (fallback == nullptr)
-            {
-                fallback = decoderFallback->CreateFallbackBuffer();
-                RETURN_ON_ERROR
-                fallback->InternalInitialize(bytes, pAllocatedBufferEnd);
-            }
-
-            // This'll back us up the appropriate # of bytes if we didn't get anywhere
-            if (!FallbackInvalidByteSequence(pSrc, ch, fallback))
-            {
-                ContractAssertFreeFallback(pSrc >= bytes || pTarget == chars)
-
-                // Ran out of buffer space
-                // Need to throw an exception?
-                fallback->InternalReset();
-                if (pTarget == chars)
-                {
-                    errno = ERROR_INSUFFICIENT_BUFFER;
-                    if (fallback) free(fallback);
-                    return 0;
-                }
-            }
-            ContractAssertFreeFallback(pSrc >= bytes)
-            ch = 0;
-        }
-
-        // Shouldn't have anything in fallback buffer for GetChars
-        // (don't have to check m_throwOnOverflow for chars)
-        ContractAssert(fallback == nullptr || fallback->GetRemaining() == 0)
-
-        free(fallback);
-
-        return PtrDiff(pTarget, chars);
-    }
-
-    int GetBytes(char16_t* chars, int charCount, unsigned char* bytes, int byteCount)
-    {
-        ContractAssert(chars != nullptr)
-        ContractAssert(byteCount >= 0)
-        ContractAssert(charCount >= 0)
-        ContractAssert(bytes != nullptr)
-
-        // For fallback we may need a fallback buffer.
-        // We wait to initialize it though in case we don't have any broken input unicode
-        EncoderFallbackBuffer* fallback = nullptr;
-        char16_t *pSrc = chars;
-        unsigned char *pTarget = bytes;
-
-        char16_t *pEnd = pSrc + charCount;
-        unsigned char *pAllocatedBufferEnd = pTarget + byteCount;
-
-        int ch = 0;
-
-        // assume that JIT will enregister pSrc, pTarget and ch
-
-        while (true) {
-            // SLOWLOOP: does all range checks, handles all special cases, but it is slow
-
-            if (pSrc >= pEnd) {
-
-                if (ch == 0) {
-                    // Check if there's anything left to get out of the fallback buffer
-                    ch = fallback != nullptr ? fallback->InternalGetNextChar() : 0;
-                    if (ch > 0) {
-                        goto ProcessChar;
-                    }
-                }
-                else {
-                    // Case of leftover surrogates in the fallback buffer
-                    if (fallback != nullptr && fallback->bFallingBack) {
-                        ContractAssertFreeFallback(ch >= 0xD800 && ch <= 0xDBFF); //, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture))
-
-                        int cha = ch;
-
-                        ch = fallback->InternalGetNextChar();
-
-                        if (InRange(ch, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
-                            ch = ch + (cha << 10) + (0x10000 - CharUnicodeInfo::LOW_SURROGATE_START - (CharUnicodeInfo::HIGH_SURROGATE_START << 10));
-                            goto EncodeChar;
-                        }
-                        else if (ch > 0){
-                            goto ProcessChar;
-                        }
-                        else {
-                            break;
-                        }
-                    }
-                }
-
-                // attempt to encode the partial surrogate (will fail or ignore)
-                if (ch > 0)
-                    goto EncodeChar;
-
-                // We're done
-                break;
-            }
-
-            if (ch > 0) {
-                // We have a high surrogate left over from a previous loop.
-                ContractAssertFreeFallback(ch >= 0xD800 && ch <= 0xDBFF);//, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture))
-
-                // use separate helper variables for local contexts so that the jit optimizations
-                // won't get confused about the variable lifetimes
-                int cha = *pSrc;
-
-                // In previous byte, we encountered a high surrogate, so we are expecting a low surrogate here.
-                // if (IsLowSurrogate(cha)) {
-                if (InRange(cha, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
-                    ch = cha + (ch << 10) +
-                        (0x10000
-                        - CharUnicodeInfo::LOW_SURROGATE_START
-                        - (CharUnicodeInfo::HIGH_SURROGATE_START << 10));
-
-                    pSrc++;
-                }
-                // else ch is still high surrogate and encoding will fail
-
-                // attempt to encode the surrogate or partial surrogate
-                goto EncodeChar;
-            }
-
-            // If we've used a fallback, then we have to check for it
-            if (fallback != nullptr)
-            {
-                ch = fallback->InternalGetNextChar();
-                if (ch > 0) goto ProcessChar;
-            }
-
-            // read next char. The JIT optimization seems to be getting confused when
-            // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
-            ch = *pSrc;
-            pSrc++;
-
-        ProcessChar:
-            if (InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::HIGH_SURROGATE_END)) {
-                continue;
-            }
-            // either good char or partial surrogate
-
-        EncodeChar:
-            // throw exception on partial surrogate if necessary
-            if (InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END))
-            {
-                // Lone surrogates aren't allowed, we have to do fallback for them
-                // Have to make a fallback buffer if we don't have one
-                if (fallback == nullptr)
-                {
-                    // wait on fallbacks if we can
-                    // For fallback we may need a fallback buffer
-                    fallback = encoderFallback->CreateFallbackBuffer();
-                    RETURN_ON_ERROR
-
-                    // Set our internal fallback interesting things.
-                    fallback->InternalInitialize(chars, pEnd, true);
-                }
-
-                // Do our fallback.  Actually we already know its a mixed up surrogate,
-                // so the ref pSrc isn't gonna do anything.
-                fallback->InternalFallback((char16_t)ch, &pSrc);
-                RETURN_ON_ERROR
-
-                // Ignore it if we don't throw
-                ch = 0;
-                continue;
-            }
-
-            // Count bytes needed
-            int bytesNeeded = 1;
-            if (ch > 0x7F) {
-                if (ch > 0x7FF) {
-                    if (ch > 0xFFFF) {
-                        bytesNeeded++;  // 4 bytes (surrogate pair)
-                    }
-                    bytesNeeded++;      // 3 bytes (800-FFFF)
-                }
-                bytesNeeded++;          // 2 bytes (80-7FF)
-            }
-
-            if (pTarget > pAllocatedBufferEnd - bytesNeeded) {
-                // Left over surrogate from last time will cause pSrc == chars, so we'll throw
-                if (fallback != nullptr && fallback->bFallingBack)
-                {
-                    fallback->MovePrevious();              // Didn't use this fallback char
-                    if (ch > 0xFFFF)
-                        fallback->MovePrevious();          // Was surrogate, didn't use 2nd part either
-                }
-                else
-                {
-                    pSrc--;                                     // Didn't use this char
-                    if (ch > 0xFFFF)
-                        pSrc--;                                 // Was surrogate, didn't use 2nd part either
-                }
-                ContractAssertFreeFallback(pSrc >= chars || pTarget == bytes)
-                if (pTarget == bytes)  // Throw if we must
-                {
-                    errno = ERROR_INSUFFICIENT_BUFFER;
-                    if (fallback) free(fallback);
-                    return 0;
-                }
-                ch = 0;                                         // Nothing left over (we backed up to start of pair if supplimentary)
-                break;
-            }
-
-            if (ch <= 0x7F) {
-                *pTarget = (unsigned char)ch;
-            }
-            else {
-                // use separate helper variables for local contexts so that the jit optimizations
-                // won't get confused about the variable lifetimes
-                int chb;
-                if (ch <= 0x7FF) {
-                    // 2 unsigned char encoding
-                    chb = (unsigned char)(0xC0 | (ch >> 6));
-                }
-                else
-                {
-                    if (ch <= 0xFFFF) {
-                        chb = (unsigned char)(0xE0 | (ch >> 12));
-                    }
-                    else
-                    {
-                        *pTarget = (unsigned char)(0xF0 | (ch >> 18));
-                        pTarget++;
-
-                        chb = 0x80 | ((ch >> 12) & 0x3F);
-                    }
-                    *pTarget = (unsigned char)chb;
-                    pTarget++;
-
-                    chb = 0x80 | ((ch >> 6) & 0x3F);
-                }
-                *pTarget = (unsigned char)chb;
-                pTarget++;
-
-                *pTarget = (unsigned char)0x80 | (ch & 0x3F);
-            }
-            pTarget++;
-
-
-#ifdef FASTLOOP
-            // If still have fallback don't do fast loop
-            if (fallback != nullptr && (ch = fallback->InternalGetNextChar()) != 0)
-                goto ProcessChar;
-
-            int availableChars = PtrDiff(pEnd, pSrc);
-            int availableBytes = PtrDiff(pAllocatedBufferEnd, pTarget);
-
-            // don't fall into the fast decoding loop if we don't have enough characters
-            // Note that if we don't have enough bytes, pStop will prevent us from entering the fast loop.
-            if (availableChars <= 13) {
-                // we are hoping for 1 unsigned char per char
-                if (availableBytes < availableChars) {
-                    // not enough output room.  no pending bits at this point
-                    ch = 0;
-                    continue;
-                }
-
-                // try to get over the remainder of the ascii characters fast though
-                char16_t* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
-                while (pSrc < pLocalEnd) {
-                    ch = *pSrc;
-                    pSrc++;
-
-                    // Not ASCII, need more than 1 unsigned char per char
-                    if (ch > 0x7F)
-                        goto ProcessChar;
-
-                    *pTarget = (unsigned char)ch;
-                    pTarget++;
-                }
-                // we are done, let ch be 0 to clear encoder
-                ch = 0;
-                break;
-            }
-
-            // we need at least 1 unsigned char per character, but Convert might allow us to convert
-            // only part of the input, so try as much as we can.  Reduce charCount if necessary
-            if (availableBytes < availableChars)
-            {
-                availableChars = availableBytes;
-            }
-
-            // FASTLOOP:
-            // - optimistic range checks
-            // - fallbacks to the slow loop for all special cases, exception throwing, etc.
-
-            // To compute the upper bound, assume that all characters are ASCII characters at this point,
-            //  the boundary will be decreased for every non-ASCII character we encounter
-            // Also, we need 5 chars reserve for the unrolled ansi decoding loop and for decoding of surrogates
-            // If there aren't enough bytes for the output, then pStop will be <= pSrc and will bypass the loop.
-            char16_t *pStop = pSrc + availableChars - 5;
-
-            while (pSrc < pStop) {
-                ch = *pSrc;
-                pSrc++;
-
-                if (ch > 0x7F) {
-                    goto LongCode;
-                }
-                *pTarget = (unsigned char)ch;
-                pTarget++;
-
-                // get pSrc aligned
-                if (((size_t)pSrc & 0x2) != 0) {
-                    ch = *pSrc;
-                    pSrc++;
-                    if (ch > 0x7F) {
-                        goto LongCode;
-                    }
-                    *pTarget = (unsigned char)ch;
-                    pTarget++;
-                }
-
-                // Run 4 characters at a time!
-                while (pSrc < pStop) {
-                    ch = *(int*)pSrc;
-                    int chc = *(int*)(pSrc + 2);
-                    if (((ch | chc) & (int)0xFF80FF80) != 0) {
-                        goto LongCodeWithMask;
-                    }
-
-                    // Unfortunately, this is endianness sensitive
-#if BIGENDIAN
-                    if (!treatAsLE)
-                    {
-                        *pTarget = (unsigned char)(ch >> 16);
-                        *(pTarget + 1) = (unsigned char)ch;
-                        pSrc += 4;
-                        *(pTarget + 2) = (unsigned char)(chc >> 16);
-                        *(pTarget + 3) = (unsigned char)chc;
-                        pTarget += 4;
-                    }
-                    else
-#else
-                    {
-                        *pTarget = (unsigned char)ch;
-                        *(pTarget + 1) = (unsigned char)(ch >> 16);
-                        pSrc += 4;
-                        *(pTarget + 2) = (unsigned char)chc;
-                        *(pTarget + 3) = (unsigned char)(chc >> 16);
-                        pTarget += 4;
-                    }
-#endif
-                }
-                continue;
-
-            LongCodeWithMask:
-#if BIGENDIAN
-            // be careful about the sign extension
-            if (!treatAsLE) ch = (int)(((unsigned int)ch) >> 16);
-            else
-#else
-                ch = (char16_t)ch;
-#endif
-
-            pSrc++;
-
-            if (ch > 0x7F) {
-                goto LongCode;
-            }
-            *pTarget = (unsigned char)ch;
-            pTarget++;
-            continue;
-
-            LongCode:
-                // use separate helper variables for slow and fast loop so that the jit optimizations
-                // won't get confused about the variable lifetimes
-                int chd;
-                if (ch <= 0x7FF) {
-                    // 2 unsigned char encoding
-                    chd = 0xC0 | (ch >> 6);
-                }
-                else {
-                    if (!InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
-                        // 3 unsigned char encoding
-                        chd = 0xE0 | (ch >> 12);
-                    }
-                    else
-                    {
-                        // 4 unsigned char encoding - high surrogate + low surrogate
-                        if (ch > CharUnicodeInfo::HIGH_SURROGATE_END) {
-                            // low without high -> bad, try again in slow loop
-                            pSrc -= 1;
-                            break;
-                        }
-
-                        chd = *pSrc;
-                        pSrc++;
-
-                        // if (!IsLowSurrogate(chd)) {
-                        if (!InRange(chd, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
-                            // high not followed by low -> bad, try again in slow loop
-                            pSrc -= 2;
-                            break;
-                        }
-
-                        ch = chd + (ch << 10) +
-                            (0x10000
-                            - CharUnicodeInfo::LOW_SURROGATE_START
-                            - (CharUnicodeInfo::HIGH_SURROGATE_START << 10));
-
-                        *pTarget = (unsigned char)(0xF0 | (ch >> 18));
-                        // pStop - this unsigned char is compensated by the second surrogate character
-                        // 2 input chars require 4 output bytes.  2 have been anticipated already
-                        // and 2 more will be accounted for by the 2 pStop-- calls below.
-                        pTarget++;
-
-                        chd = 0x80 | ((ch >> 12) & 0x3F);
-                    }
-                    *pTarget = (unsigned char)chd;
-                    pStop--;                    // 3 unsigned char sequence for 1 char, so need pStop-- and the one below too.
-                    pTarget++;
-
-                    chd = 0x80 | ((ch >> 6) & 0x3F);
-                }
-                *pTarget = (unsigned char)chd;
-                pStop--;                        // 2 unsigned char sequence for 1 char so need pStop--.
-                pTarget++;
-
-                *pTarget = (unsigned char)(0x80 | (ch & 0x3F));
-                // pStop - this unsigned char is already included
-                pTarget++;
-            }
-
-            ContractAssertFreeFallback(pTarget <= pAllocatedBufferEnd)
-
-#endif // FASTLOOP
-
-            // no pending char at this point
-            ch = 0;
-        }
-
-        free(fallback);
-
-        return (int)(pTarget - bytes);
-    }
-
-    int GetByteCount(char16_t *chars, int count)
-    {
-        // For fallback we may need a fallback buffer.
-        // We wait to initialize it though in case we don't have any broken input unicode
-        EncoderFallbackBuffer* fallback = nullptr;
-        char16_t *pSrc = chars;
-        char16_t *pEnd = pSrc + count;
-
-        // Start by assuming we have as many as count
-        int byteCount = count;
-
-        int ch = 0;
-
-        while (true) {
-            // SLOWLOOP: does all range checks, handles all special cases, but it is slow
-            if (pSrc >= pEnd) {
-
-                if (ch == 0) {
-                    // Unroll any fallback that happens at the end
-                    ch = fallback != nullptr ? fallback->InternalGetNextChar() : 0;
-                    if (ch > 0) {
-                        byteCount++;
-                        goto ProcessChar;
-                    }
-                }
-                else {
-                    // Case of surrogates in the fallback.
-                    if (fallback != nullptr && fallback->bFallingBack) {
-                        ContractAssertFreeFallback(ch >= 0xD800 && ch <= 0xDBFF);// , not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture))
-
-                        ch = fallback->InternalGetNextChar();
-                        byteCount++;
-
-                        if (InRange(ch, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
-                            ch = 0xfffd;
-                            byteCount++;
-                            goto EncodeChar;
-                        }
-                        else if (ch > 0){
-                            goto ProcessChar;
-                        }
-                        else {
-                            byteCount--; // ignore last one.
-                            break;
-                        }
-                    }
-                }
-
-                if (ch <= 0) {
-                    break;
-                }
-
-                // attempt to encode the partial surrogate (will fallback or ignore it), it'll also subtract 1.
-                byteCount++;
-                goto EncodeChar;
-            }
-
-            if (ch > 0) {
-                ContractAssertFreeFallback(ch >= 0xD800 && ch <= 0xDBFF); // , not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture))
-
-                // use separate helper variables for local contexts so that the jit optimizations
-                // won't get confused about the variable lifetimes
-                int cha = *pSrc;
-
-                // count the pending surrogate
-                byteCount++;
-
-                // In previous byte, we encountered a high surrogate, so we are expecting a low surrogate here.
-                // if (IsLowSurrogate(cha)) {
-                if (InRange(cha, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
-                    // Don't need a real # because we're just counting, anything > 0x7ff ('cept surrogate) will do.
-                    ch = 0xfffd;
-                    //                        ch = cha + (ch << 10) +
-                    //                            (0x10000
-                    //                            - CharUnicodeInfo::LOW_SURROGATE_START
-                    //                            - (CharUnicodeInfo::HIGH_SURROGATE_START << 10) );
-
-                    // Use this next char
-                    pSrc++;
-                }
-                // else ch is still high surrogate and encoding will fail (so don't add count)
-
-                // attempt to encode the surrogate or partial surrogate
-                goto EncodeChar;
-            }
-
-            // If we've used a fallback, then we have to check for it
-            if (fallback != nullptr)
-            {
-                ch = fallback->InternalGetNextChar();
-                if (ch > 0)
-                {
-                    // We have an extra byte we weren't expecting.
-                    byteCount++;
-                    goto ProcessChar;
-                }
-            }
-
-            // read next char. The JIT optimization seems to be getting confused when
-            // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
-            ch = *pSrc;
-            pSrc++;
-
-        ProcessChar:
-            if (InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::HIGH_SURROGATE_END)) {
-                // we will count this surrogate next time around
-                byteCount--;
-                continue;
-            }
-            // either good char or partial surrogate
-
-        EncodeChar:
-            // throw exception on partial surrogate if necessary
-            if (InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END))
-            {
-                // Lone surrogates aren't allowed
-                // Have to make a fallback buffer if we don't have one
-                if (fallback == nullptr)
-                {
-                    // wait on fallbacks if we can
-                    // For fallback we may need a fallback buffer
-                    fallback = encoderFallback->CreateFallbackBuffer();
-                    RETURN_ON_ERROR
-
-                    // Set our internal fallback interesting things.
-                    fallback->InternalInitialize(chars, chars + count, false);
-                }
-
-                // Do our fallback.  Actually we already know its a mixed up surrogate,
-                // so the ref pSrc isn't gonna do anything.
-                fallback->InternalFallback((char16_t)ch, &pSrc);
-                RETURN_ON_ERROR
-
-                // Ignore it if we don't throw (we had preallocated this ch)
-                byteCount--;
-                ch = 0;
-                continue;
-            }
-
-            // Count them
-            if (ch > 0x7F) {
-                if (ch > 0x7FF) {
-                    // the extra surrogate byte was compensated by the second surrogate character
-                    // (2 surrogates make 4 bytes.  We've already counted 2 bytes, 1 per char)
-                    byteCount++;
-                }
-                byteCount++;
-            }
-
-#if WIN64
-            // check for overflow
-            if (byteCount < 0) {
-                break;
-            }
-#endif
-
-#ifdef FASTLOOP
-            // If still have fallback don't do fast loop
-            if (fallback != nullptr && (ch = fallback->InternalGetNextChar()) != 0)
-            {
-                // We're reserving 1 byte for each char by default
-                byteCount++;
-                goto ProcessChar;
-            }
-
-            int availableChars = PtrDiff(pEnd, pSrc);
-
-            // don't fall into the fast decoding loop if we don't have enough characters
-            if (availableChars <= 13) {
-                // try to get over the remainder of the ascii characters fast though
-                char16_t* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
-                while (pSrc < pLocalEnd) {
-                    ch = *pSrc;
-                    pSrc++;
-                    if (ch > 0x7F)
-                        goto ProcessChar;
-                }
-
-                // we are done
-                break;
-            }
-
-#if WIN64
-            // make sure that we won't get a silent overflow inside the fast loop
-            // (Fall out to slow loop if we have this many characters)
-            availableChars &= 0x0FFFFFFF;
-#endif
-
-            // To compute the upper bound, assume that all characters are ASCII characters at this point,
-            //  the boundary will be decreased for every non-ASCII character we encounter
-            // Also, we need 3 + 4 chars reserve for the unrolled ansi decoding loop and for decoding of surrogates
-            char16_t *pStop = pSrc + availableChars - (3 + 4);
-
-            while (pSrc < pStop) {
-                ch = *pSrc;
-                pSrc++;
-
-                if (ch > 0x7F)                                                  // Not ASCII
-                {
-                    if (ch > 0x7FF)                                             // Not 2 Byte
-                    {
-                        if ((ch & 0xF800) == 0xD800)                            // See if its a Surrogate
-                            goto LongCode;
-                        byteCount++;
-                    }
-                    byteCount++;
-                }
-
-                // get pSrc aligned
-                if (((size_t)pSrc & 0x2) != 0) {
-                    ch = *pSrc;
-                    pSrc++;
-                    if (ch > 0x7F)                                              // Not ASCII
-                    {
-                        if (ch > 0x7FF)                                         // Not 2 Byte
-                        {
-                            if ((ch & 0xF800) == 0xD800)                        // See if its a Surrogate
-                                goto LongCode;
-                            byteCount++;
-                        }
-                        byteCount++;
-                    }
-                }
-
-                // Run 2 * 4 characters at a time!
-                while (pSrc < pStop) {
-                    ch = *(int*)pSrc;
-                    int chc = *(int*)(pSrc + 2);
-                    if (((ch | chc) & (int)0xFF80FF80) != 0)         // See if not ASCII
-                    {
-                        if (((ch | chc) & (int)0xF800F800) != 0)     // See if not 2 Byte
-                        {
-                            goto LongCodeWithMask;
-                        }
-
-
-                        if ((ch & (int)0xFF800000) != 0)             // Actually 0x07800780 is all we care about (4 bits)
-                            byteCount++;
-                        if ((ch & (int)0xFF80) != 0)
-                            byteCount++;
-                        if ((chc & (int)0xFF800000) != 0)
-                            byteCount++;
-                        if ((chc & (int)0xFF80) != 0)
-                            byteCount++;
-                    }
-                    pSrc += 4;
-
-                    ch = *(int*)pSrc;
-                    chc = *(int*)(pSrc + 2);
-                    if (((ch | chc) & (int)0xFF80FF80) != 0)         // See if not ASCII
-                    {
-                        if (((ch | chc) & (int)0xF800F800) != 0)     // See if not 2 Byte
-                        {
-                            goto LongCodeWithMask;
-                        }
-
-                        if ((ch & (int)0xFF800000) != 0)
-                            byteCount++;
-                        if ((ch & (int)0xFF80) != 0)
-                            byteCount++;
-                        if ((chc & (int)0xFF800000) != 0)
-                            byteCount++;
-                        if ((chc & (int)0xFF80) != 0)
-                            byteCount++;
-                    }
-                    pSrc += 4;
-                }
-                break;
-
-            LongCodeWithMask:
-#if BIGENDIAN
-            // be careful about the sign extension
-            if (!treatAsLE) ch = (int)(((unsigned int)ch) >> 16);
-            else
-#else
-                ch = (char16_t)ch;
-#endif
-
-            pSrc++;
-
-            if (ch <= 0x7F) {
-                continue;
-            }
-
-            LongCode:
-                // use separate helper variables for slow and fast loop so that the jit optimizations
-                // won't get confused about the variable lifetimes
-                if (ch > 0x7FF) {
-                    if (InRange(ch, CharUnicodeInfo::HIGH_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END)) {
-                        // 4 byte encoding - high surrogate + low surrogate
-
-                        int chd = *pSrc;
-                        if (
-                            ch > CharUnicodeInfo::HIGH_SURROGATE_END ||
-                            !InRange(chd, CharUnicodeInfo::LOW_SURROGATE_START, CharUnicodeInfo::LOW_SURROGATE_END))
-                        {
-                            // Back up and drop out to slow loop to figure out error
-                            pSrc--;
-                            break;
-                        }
-                        pSrc++;
-
-                        // byteCount - this byte is compensated by the second surrogate character
-                    }
-                    byteCount++;
-                }
-                byteCount++;
-
-                // byteCount - the last byte is already included
-            }
-#endif // FASTLOOP
-
-            // no pending char at this point
-            ch = 0;
-        }
-
-#if WIN64
-        // check for overflow
-        ContractAssertFreeFallback(byteCount >= 0)
-#endif
-        ContractAssertFreeFallback(fallback == nullptr || fallback->GetRemaining() == 0)
-
-        free(fallback);
-
-        return byteCount;
-    }
-};
-
-int minipal_utf8_to_utf16_preallocated(
-    const char* lpSrcStr,
-    int cchSrc,
-    char16_t** lpDestStr,
-    int cchDest,
-    unsigned int dwFlags,
-    bool treatAsLE)
-{
-    int ret;
-    errno = 0;
-
-    if (cchSrc < 0)
-        cchSrc = strlen(lpSrcStr) + 1;
-
-    UTF8Encoding enc(dwFlags & MB_ERR_INVALID_CHARS, treatAsLE);
-    ret = enc.GetCharCount((unsigned char*)lpSrcStr, cchSrc);
-    if (cchDest)
-    {
-        if (ret > cchDest)
-        {
-            errno = ERROR_INSUFFICIENT_BUFFER;
-            ret = 0;
-        }
-        enc.GetChars((unsigned char*)lpSrcStr, cchSrc, (char16_t*)*lpDestStr, ret);
-        if (errno) ret = 0;
-    }
-    return ret;
-}
-
-static int utf16_to_utf8_preallocated(
-    const char16_t* lpSrcStr,
-    int cchSrc,
-    char** lpDestStr,
-    int cchDest,
-    bool treatAsLE)
-{
-    int ret;
-    errno = 0;
-
-    if (cchSrc < 0)
-        cchSrc = wcslen(lpSrcStr) + 1;
-
-    UTF8Encoding enc(false, treatAsLE);
-    ret = enc.GetByteCount((char16_t*)lpSrcStr, cchSrc);
-    if (cchDest)
-    {
-        if (ret > cchDest)
-        {
-            errno = ERROR_INSUFFICIENT_BUFFER;
-            ret = 0;
-        }
-        enc.GetBytes((char16_t*)lpSrcStr, cchSrc, (unsigned char*)*lpDestStr, ret);
-        if (errno) ret = 0;
-    }
-    return ret;
-}
-
-int minipal_utf16_to_utf8_preallocated(
-    const char16_t* lpSrcStr,
-    int cchSrc,
-    char** lpDestStr,
-    int cchDest)
-{
-    return utf16_to_utf8_preallocated(lpSrcStr, cchSrc, lpDestStr, cchDest, false);
-}
-
-int minipal_utf8_to_utf16_allocate(
-    const char* lpSrcStr,
-    int cchSrc,
-    char16_t** lpDestStr,
-    unsigned int dwFlags,
-    bool treatAsLE)
-{
-    int cchDest = minipal_utf8_to_utf16_preallocated(lpSrcStr, cchSrc, nullptr, 0, dwFlags, !treatAsLE);
-    if (cchDest > 0)
-    {
-        *lpDestStr = (char16_t*)malloc((cchDest + 1) * sizeof(char16_t));
-        cchDest = minipal_utf8_to_utf16_preallocated(lpSrcStr, cchSrc, lpDestStr, cchDest, dwFlags, !treatAsLE);
-        (*lpDestStr)[cchDest] = '\0';
-    }
-    return cchDest;
-}
-
-int minipal_utf16_to_utf8_allocate(
-    const char16_t* lpSrcStr,
-    int cchSrc,
-    char** lpDestStr,
-    bool treatAsLE)
-{
-    int cchDest = utf16_to_utf8_preallocated(lpSrcStr, cchSrc, nullptr, 0, treatAsLE);
-    if (cchDest > 0)
-    {
-        *lpDestStr = (char*)malloc((cchDest + 1) * sizeof(char));
-        cchDest = utf16_to_utf8_preallocated(lpSrcStr, cchSrc, lpDestStr, cchDest, treatAsLE);
-        (*lpDestStr)[cchDest] = '\0';
-    }
-    return cchDest;
-}
diff --git a/src/native/minipal/utf8.h b/src/native/minipal/utf8.h
index 71b9a805aa11b5..29c5ba8ab308c2 100644
--- a/src/native/minipal/utf8.h
+++ b/src/native/minipal/utf8.h
@@ -8,23 +8,64 @@
 #include <stdlib.h>
 #include <stdbool.h>
 
-#define MB_ERR_INVALID_CHARS 0x00000008
-#define ERROR_NO_UNICODE_TRANSLATION 1113L
-#define ERROR_INSUFFICIENT_BUFFER 122L
-#define ERROR_INVALID_PARAMETER 87L
+#define MINIPAL_MB_NO_REPLACE_INVALID_CHARS 0x00000008
+#define MINIPAL_TREAT_AS_LITTLE_ENDIAN 0x00000016
+#define MINIPAL_ERROR_INSUFFICIENT_BUFFER 122L
 
 #ifdef __cplusplus
 extern "C"
 {
 #endif // __cplusplus
 
-int minipal_utf8_to_utf16_preallocated(const char* lpSrcStr, int cchSrc, char16_t** lpDestStr, int cchDest, unsigned int dwFlags, bool treatAsLE);
+#ifdef TARGET_WINDOWS
+typedef wchar_t CHAR16_T;
+#else
+typedef unsigned short CHAR16_T;
+#endif
 
-int minipal_utf16_to_utf8_preallocated(const char16_t* lpSrcStr, int cchSrc, char** lpDestStr, int cchDest);
+/**
+ * Get length of destination needed for UTF-8 to UTF-16 (UCS-2) conversion
+ *
+ * @param source The source string in UTF-8 format.
+ * @param sourceLength Length of the source string.
+ * @param flags Flags to alter the behavior of converter. Supported flags are MINIPAL_MB_NO_REPLACE_INVALID_CHARS and MINIPAL_TREAT_AS_LITTLE_ENDIAN.
+ * @return Length of UTF-16 buffer required by the conversion.
+ */
+size_t minipal_get_length_utf8_to_utf16(const char* source, size_t sourceLength, unsigned int flags);
 
-int minipal_utf8_to_utf16_allocate(const char* lpSrcStr, int cchSrc, char16_t** lpDestStr, unsigned int dwFlags, bool treatAsLE);
+/**
+ * Get length of destination needed for UTF-16 (UCS-2) to UTF-8 conversion
+ *
+ * @param source The source string in UTF-16 format.
+ * @param sourceLength Length of the source string.
+ * @param flags Flags to alter the behavior of converter. Supported flags are MINIPAL_MB_NO_REPLACE_INVALID_CHARS and MINIPAL_TREAT_AS_LITTLE_ENDIAN.
+ * @return Length of UTF-8 buffer required by the conversion.
+ */
+size_t minipal_get_length_utf16_to_utf8(const CHAR16_T* source, size_t sourceLength, unsigned int flags);
 
-int minipal_utf16_to_utf8_allocate(const char16_t* lpSrcStr, int cchSrc, char** lpDestStr, bool treatAsLE);
+/**
+ * Convert a string from UTF-8 to UTF-16 (UCS-2) with preallocated memory
+ *
+ * @param source The source string in UTF-8 format.
+ * @param sourceLength Length of the source string.
+ * @param destination Pointer to the destination UTF-16 string. It can be NULL to query number of items required by the conversion.
+ * @param destinationLength Length of the destination string.
+ * @param flags Flags to alter the behavior of converter. Supported flags are MINIPAL_MB_NO_REPLACE_INVALID_CHARS and MINIPAL_TREAT_AS_LITTLE_ENDIAN.
+ * @return Number of items written by the conversion.
+ */
+size_t minipal_convert_utf8_to_utf16(const char* source, size_t sourceLength, CHAR16_T* destination, size_t destinationLength, unsigned int flags);
+
+/**
+ * Convert a string from UTF-16 (UCS-2) to UTF-8 with preallocated memory
+ *
+ * @param source The source string in UTF-16 format.
+ * @param sourceLength Length of the source string.
+ * @param destination Pointer to the destination UTF-8 string. It can be NULL to query number of items required by the conversion.
+ * @param destinationLength Length of the destination string.
+ * @param flags Flags to alter the behavior of converter. Supported flags are MINIPAL_MB_NO_REPLACE_INVALID_CHARS and MINIPAL_TREAT_AS_LITTLE_ENDIAN.
+ * @return Number of items written by the conversion.
+ */
+size_t minipal_convert_utf16_to_utf8(const CHAR16_T* source, size_t sourceLength, char* destination, size_t destinationLength, unsigned int flags);
 
 #ifdef __cplusplus
 }

From 50b05498a44c484406fa8114429edd84b49de260 Mon Sep 17 00:00:00 2001
From: Adeel <3840695+am11@users.noreply.github.com>
Date: Sat, 17 Jun 2023 13:08:36 +0300
Subject: [PATCH 6/9] Delete unused macros

---
 src/coreclr/inc/utilcode.h      | 179 --------------------------------
 src/coreclr/vm/rtlfunctions.cpp |   2 +-
 2 files changed, 1 insertion(+), 180 deletions(-)

diff --git a/src/coreclr/inc/utilcode.h b/src/coreclr/inc/utilcode.h
index a332a6ccd66927..bc84e71644c9d8 100644
--- a/src/coreclr/inc/utilcode.h
+++ b/src/coreclr/inc/utilcode.h
@@ -185,15 +185,6 @@ typedef LPSTR   LPUTF8;
 // given and ANSI String, copy it into a wide buffer.
 // be careful about scoping when using this macro!
 //
-// how to use the below two macros:
-//
-//  ...
-//  LPSTR pszA;
-//  pszA = MyGetAnsiStringRoutine();
-//  MAKE_WIDEPTR_FROMANSI(pwsz, pszA);
-//  MyUseWideStringRoutine(pwsz);
-//  ...
-//
 // similarily for MAKE_ANSIPTR_FROMWIDE.  note that the first param does not
 // have to be declared, and no clean up must be done.
 //
@@ -211,25 +202,6 @@ typedef LPSTR   LPUTF8;
 #define MAKE_TRANSLATIONFAILED ThrowWin32(ERROR_NO_UNICODE_TRANSLATION)
 #endif
 
-// This version throws on conversion errors (ie, no best fit character
-// mapping to characters that look similar, and no use of the default char
-// ('?') when printing out unrepresentable characters.  Use this method for
-// most development in the EE, especially anything like metadata or class
-// names.  See the BESTFIT version if you're printing out info to the console.
-#define MAKE_MULTIBYTE_FROMWIDE(ptrname, widestr, codepage) \
-    int __l##ptrname = (int)u16_strlen(widestr);        \
-    if (__l##ptrname > MAKE_MAX_LENGTH)         \
-        MAKE_TOOLONGACTION;                     \
-    __l##ptrname = (int)((__l##ptrname + 1) * 2 * sizeof(char)); \
-    CQuickBytes __CQuickBytes##ptrname; \
-    __CQuickBytes##ptrname.AllocThrows(__l##ptrname); \
-    BOOL __b##ptrname; \
-    DWORD __cBytes##ptrname = WszWideCharToMultiByte(codepage, WC_NO_BEST_FIT_CHARS, widestr, -1, (LPSTR)__CQuickBytes##ptrname.Ptr(), __l##ptrname, NULL, &__b##ptrname); \
-    if (__b##ptrname || (__cBytes##ptrname == 0 && (widestr[0] != W('\0')))) { \
-        MAKE_TRANSLATIONFAILED; \
-    } \
-    LPSTR ptrname = (LPSTR)__CQuickBytes##ptrname.Ptr()
-
 // This version does best fit character mapping and also allows the use
 // of the default char ('?') for any Unicode character that isn't
 // representable.  This is reasonable for writing to the console, but
@@ -247,40 +219,6 @@ typedef LPSTR   LPUTF8;
     } \
     LPSTR ptrname = (LPSTR)__CQuickBytes##ptrname.Ptr()
 
-// Use for anything critical other than output to console, where weird
-// character mappings are unacceptable.
-#define MAKE_ANSIPTR_FROMWIDE(ptrname, widestr) MAKE_MULTIBYTE_FROMWIDE(ptrname, widestr, CP_ACP)
-
-// Use for output to the console.
-#define MAKE_ANSIPTR_FROMWIDE_BESTFIT(ptrname, widestr) MAKE_MULTIBYTE_FROMWIDE_BESTFIT(ptrname, widestr, CP_ACP)
-
-#define MAKE_WIDEPTR_FROMANSI(ptrname, ansistr) \
-    CQuickBytes __qb##ptrname; \
-    int __l##ptrname; \
-    __l##ptrname = WszMultiByteToWideChar(CP_ACP, 0, ansistr, -1, 0, 0); \
-    if (__l##ptrname > MAKE_MAX_LENGTH) \
-        MAKE_TOOLONGACTION; \
-    LPWSTR ptrname = (LPWSTR) __qb##ptrname.AllocThrows((__l##ptrname+1)*sizeof(WCHAR));  \
-    if (WszMultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, ansistr, -1, ptrname, __l##ptrname) == 0) { \
-        MAKE_TRANSLATIONFAILED; \
-    }
-
-#define MAKE_WIDEPTR_FROMANSI_NOTHROW(ptrname, ansistr) \
-    CQuickBytes __qb##ptrname; \
-    LPWSTR ptrname = 0; \
-    int __l##ptrname; \
-    __l##ptrname = WszMultiByteToWideChar(CP_ACP, 0, ansistr, -1, 0, 0); \
-    if (__l##ptrname <= MAKE_MAX_LENGTH) { \
-        ptrname = (LPWSTR) __qb##ptrname.AllocNoThrow((__l##ptrname+1)*sizeof(WCHAR));  \
-        if (ptrname) { \
-            if (WszMultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, ansistr, -1, ptrname, __l##ptrname) != 0) { \
-                ptrname[__l##ptrname] = 0; \
-            } else { \
-                ptrname = 0; \
-            } \
-        } \
-    }
-
 #define MAKE_UTF8PTR_FROMWIDE(ptrname, widestr) CQuickBytes _##ptrname; _##ptrname.ConvertUnicode_Utf8(widestr); LPSTR ptrname = (LPSTR) _##ptrname.Ptr();
 
 #define MAKE_UTF8PTR_FROMWIDE_NOTHROW(ptrname, widestr) \
@@ -312,22 +250,8 @@ typedef LPSTR   LPUTF8;
         } \
     } \
 
-#define MAKE_WIDEPTR_FROMUTF8N(ptrname, utf8str, n8chrs) \
-    CQuickBytes __qb##ptrname; \
-    int __l##ptrname; \
-    __l##ptrname = WszMultiByteToWideChar(CP_UTF8, 0, utf8str, n8chrs, 0, 0); \
-    if (__l##ptrname > MAKE_MAX_LENGTH) \
-        MAKE_TOOLONGACTION; \
-    LPWSTR ptrname = (LPWSTR) __qb##ptrname .AllocThrows((__l##ptrname+1)*sizeof(WCHAR)); \
-    if (0==WszMultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, utf8str, n8chrs, ptrname, __l##ptrname)) { \
-        MAKE_TRANSLATIONFAILED; \
-    } \
-    ptrname[__l##ptrname] = 0;
-
-
 #define MAKE_WIDEPTR_FROMUTF8(ptrname, utf8str) CQuickBytes _##ptrname;  _##ptrname.ConvertUtf8_Unicode(utf8str); LPCWSTR ptrname = (LPCWSTR) _##ptrname.Ptr();
 
-
 #define MAKE_WIDEPTR_FROMUTF8N_NOTHROW(ptrname, utf8str, n8chrs) \
     CQuickBytes __qb##ptrname; \
     int __l##ptrname; \
@@ -346,42 +270,10 @@ typedef LPSTR   LPUTF8;
 
 #define MAKE_WIDEPTR_FROMUTF8_NOTHROW(ptrname, utf8str)   MAKE_WIDEPTR_FROMUTF8N_NOTHROW(ptrname, utf8str, -1)
 
-// This method takes the number of characters
-#define MAKE_MULTIBYTE_FROMWIDEN(ptrname, widestr, _nCharacters, _pCnt, codepage)        \
-    CQuickBytes __qb##ptrname; \
-    int __l##ptrname; \
-    __l##ptrname = WszWideCharToMultiByte(codepage, WC_NO_BEST_FIT_CHARS, widestr, _nCharacters, NULL, 0, NULL, NULL);           \
-    if (__l##ptrname > MAKE_MAX_LENGTH) \
-        MAKE_TOOLONGACTION; \
-    ptrname = (LPUTF8) __qb##ptrname .AllocThrows(__l##ptrname+1); \
-    BOOL __b##ptrname; \
-    DWORD _pCnt = WszWideCharToMultiByte(codepage, WC_NO_BEST_FIT_CHARS, widestr, _nCharacters, ptrname, __l##ptrname, NULL, &__b##ptrname);  \
-    if (__b##ptrname || (_pCnt == 0 && _nCharacters > 0)) { \
-        MAKE_TRANSLATIONFAILED; \
-    } \
-    ptrname[__l##ptrname] = 0;
-
-#define MAKE_MULTIBYTE_FROMWIDEN_BESTFIT(ptrname, widestr, _nCharacters, _pCnt, codepage)        \
-    CQuickBytes __qb##ptrname; \
-    int __l##ptrname; \
-    __l##ptrname = WszWideCharToMultiByte(codepage, 0, widestr, _nCharacters, NULL, 0, NULL, NULL);           \
-    if (__l##ptrname > MAKE_MAX_LENGTH) \
-        MAKE_TOOLONGACTION; \
-    ptrname = (LPUTF8) __qb##ptrname .AllocThrows(__l##ptrname+1); \
-    DWORD _pCnt = WszWideCharToMultiByte(codepage, 0, widestr, _nCharacters, ptrname, __l##ptrname, NULL, NULL);  \
-    if (_pCnt == 0 && _nCharacters > 0) { \
-        MAKE_TRANSLATIONFAILED; \
-    } \
-    ptrname[__l##ptrname] = 0;
-
-#define MAKE_ANSIPTR_FROMWIDEN(ptrname, widestr, _nCharacters, _pCnt)        \
-       MAKE_MULTIBYTE_FROMWIDEN(ptrname, widestr, _nCharacters, _pCnt, CP_ACP)
-
 const SIZE_T MaxSigned32BitDecString = ARRAY_SIZE("-2147483648") - 1;
 const SIZE_T MaxUnsigned32BitDecString = ARRAY_SIZE("4294967295") - 1;
 const SIZE_T MaxIntegerDecHexString = ARRAY_SIZE("-9223372036854775808") - 1;
 
-const SIZE_T Max16BitHexString = ARRAY_SIZE("1234") - 1;
 const SIZE_T Max32BitHexString = ARRAY_SIZE("12345678") - 1;
 const SIZE_T Max64BitHexString = ARRAY_SIZE("1234567812345678") - 1;
 
@@ -410,77 +302,6 @@ inline WCHAR* FormatInteger(WCHAR* str, size_t strCount, const char* fmt, I v)
     return str;
 }
 
-inline
-LPWSTR DuplicateString(
-    LPCWSTR wszString,
-    size_t  cchString)
-{
-    STATIC_CONTRACT_NOTHROW;
-
-    LPWSTR wszDup = NULL;
-    if (wszString != NULL)
-    {
-        wszDup = new (nothrow) WCHAR[cchString + 1];
-        if (wszDup != NULL)
-        {
-            wcscpy_s(wszDup, cchString + 1, wszString);
-        }
-    }
-    return wszDup;
-}
-
-inline
-LPWSTR DuplicateString(
-    LPCWSTR wszString)
-{
-    STATIC_CONTRACT_NOTHROW;
-
-    if (wszString != NULL)
-    {
-        return DuplicateString(wszString, u16_strlen(wszString));
-    }
-    else
-    {
-        return NULL;
-    }
-}
-
-void DECLSPEC_NORETURN ThrowOutOfMemory();
-
-inline
-LPWSTR DuplicateStringThrowing(
-    LPCWSTR wszString,
-    size_t cchString)
-{
-    STATIC_CONTRACT_THROWS;
-
-    if (wszString == NULL)
-        return NULL;
-
-    LPWSTR wszDup = DuplicateString(wszString, cchString);
-    if (wszDup == NULL)
-        ThrowOutOfMemory();
-
-    return wszDup;
-}
-
-inline
-LPWSTR DuplicateStringThrowing(
-    LPCWSTR wszString)
-{
-    STATIC_CONTRACT_THROWS;
-
-    if (wszString == NULL)
-        return NULL;
-
-    LPWSTR wszDup = DuplicateString(wszString);
-    if (wszDup == NULL)
-        ThrowOutOfMemory();
-
-    return wszDup;
-}
-
-
 //*****************************************************************************
 // Placement new is used to new and object at an exact location.  The pointer
 // is simply returned to the caller without actually using the heap.  The
diff --git a/src/coreclr/vm/rtlfunctions.cpp b/src/coreclr/vm/rtlfunctions.cpp
index 23f662b4d600ae..f3f80338f3f8ec 100644
--- a/src/coreclr/vm/rtlfunctions.cpp
+++ b/src/coreclr/vm/rtlfunctions.cpp
@@ -103,7 +103,7 @@ VOID InstallEEFunctionTable (
         }
         else
         {
-            NewArrayHolder<WCHAR> wzTempName(DuplicateStringThrowing(ssTempName.GetUnicode()));
+            NewArrayHolder<WCHAR> wzTempName(ssTempName.GetCopyOfUnicodeString());
 
             // publish result
             if (InterlockedCompareExchangeT(&wszModuleName, (LPWSTR)wzTempName, nullptr) == nullptr)

From 3b1e48b7d6526dbdf996f555f9e6a81da295a2ef Mon Sep 17 00:00:00 2001
From: Adeel <3840695+am11@users.noreply.github.com>
Date: Sat, 17 Jun 2023 23:30:07 +0300
Subject: [PATCH 7/9] Fix custom alloc in mono

---
 src/mono/mono/eglib/giconv.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/src/mono/mono/eglib/giconv.c b/src/mono/mono/eglib/giconv.c
index 7863d8cbd35cd6..93ee1157bf4dbe 100644
--- a/src/mono/mono/eglib/giconv.c
+++ b/src/mono/mono/eglib/giconv.c
@@ -36,10 +36,6 @@
 #define FORCE_INLINE(RET_TYPE) inline RET_TYPE __attribute__((always_inline))
 #endif
 
-#define UNROLL_DECODE_UTF8 0
-
-static FORCE_INLINE (int) decode_utf8 (char *inbuf, size_t inleft, gunichar *outchar);
-
 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
 #define decode_utf16 decode_utf16le
 #else
@@ -386,9 +382,15 @@ g_utf8_to_utf16le_custom_alloc_impl (const gchar *str, glong len, glong *items_r
 	if (ret <= 0)
 		return NULL;
 
-	gunichar2 *lpDestStr = custom_alloc_func((ret + 1) * sizeof (gunichar2), custom_alloc_data);
+	gunichar2 *lpDestStr = custom_alloc_func((ret + 1) * sizeof(gunichar2), custom_alloc_data);
+	if (G_UNLIKELY (!lpDestStr)) {
+		g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_MEMORY, "Allocation failed.");
+		return NULL;
+	}
+
 	flags |= MINIPAL_MB_NO_REPLACE_INVALID_CHARS;
 	ret = (glong)minipal_convert_utf8_to_utf16 (str, len, lpDestStr, ret, flags);
+	lpDestStr[ret] = '\0';
 
 	map_error(err);
 	return lpDestStr;
@@ -510,6 +512,8 @@ g_utf16_to_utf8_impl (const gunichar2 *str, glong len, glong *items_read, glong
 		len = 0;
 		while (str[len])
 			len++;
+
+		len++;
 	}
 
 	glong ret = (glong)minipal_get_length_utf16_to_utf8 (str, len, flags);
@@ -521,7 +525,7 @@ g_utf16_to_utf8_impl (const gunichar2 *str, glong len, glong *items_read, glong
 	if (ret <= 0)
 		return NULL;
 
-	lpDestStr = (gchar *)malloc((ret + 1) * sizeof(gchar));
+	lpDestStr = (gchar *)g_malloc((ret + 1) * sizeof(gchar));
 	ret = (glong)minipal_convert_utf16_to_utf8 (str, len, lpDestStr, ret, flags);
 	lpDestStr[ret] = '\0';
 
@@ -553,6 +557,8 @@ g_utf16_to_utf8_custom_alloc (const gunichar2 *str, glong len, glong *items_read
 		len = 0;
 		while (str[len])
 			len++;
+
+		len++;
 	}
 
 	glong ret = (glong)minipal_get_length_utf16_to_utf8 (str, len, 0);
@@ -565,7 +571,13 @@ g_utf16_to_utf8_custom_alloc (const gunichar2 *str, glong len, glong *items_read
 		return NULL;
 
 	gchar *lpDestStr = custom_alloc_func((ret + 1) * sizeof (gunichar2), custom_alloc_data);
+	if (G_UNLIKELY (!lpDestStr)) {
+		g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_MEMORY, "Allocation failed.");
+		return NULL;
+	}
+
 	ret = (glong)minipal_convert_utf16_to_utf8 (str, len, lpDestStr, ret, 0);
+	lpDestStr[ret] = '\0';
 
 	map_error(err);
 	return lpDestStr;

From b7d26cdcd1c2977095189d003eff2e293546a2ec Mon Sep 17 00:00:00 2001
From: Adeel <3840695+am11@users.noreply.github.com>
Date: Tue, 20 Jun 2023 10:41:35 +0300
Subject: [PATCH 8/9] Error on invalid sequences when caller requested

---
 src/mono/mono/eglib/giconv.c |  7 +++---
 src/native/minipal/utf8.c    | 43 ++++++++++++++----------------------
 src/native/minipal/utf8.h    |  1 +
 3 files changed, 22 insertions(+), 29 deletions(-)

diff --git a/src/mono/mono/eglib/giconv.c b/src/mono/mono/eglib/giconv.c
index 93ee1157bf4dbe..8ae955c303fe25 100644
--- a/src/mono/mono/eglib/giconv.c
+++ b/src/mono/mono/eglib/giconv.c
@@ -321,8 +321,9 @@ static FORCE_INLINE (void)
 map_error(GError **err)
 {
 	if (errno == MINIPAL_ERROR_INSUFFICIENT_BUFFER) {
-		g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_MEMORY,
-			     "Allocation failed.");
+		g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_MEMORY, "Allocation failed.");
+	} else if (errno == MINIPAL_ERROR_NO_UNICODE_TRANSLATION) {
+		g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, "Illegal byte sequence encountered in the input.");
 	}
 }
 
@@ -351,7 +352,7 @@ g_utf8_to_utf16_impl (const gchar *str, glong len, glong *items_read, glong *ite
 
 	lpDestStr = malloc((ret + 1) * sizeof(gunichar2));
 	ret = (glong)minipal_convert_utf8_to_utf16 (str, len, lpDestStr, ret, flags);
-    lpDestStr[ret] = '\0';
+	lpDestStr[ret] = '\0';
 
 	if (items_written)
 		*items_written = errno == 0 ? ret : 0;
diff --git a/src/native/minipal/utf8.c b/src/native/minipal/utf8.c
index bacad116efcd8a..b93b7308c1c2c3 100644
--- a/src/native/minipal/utf8.c
+++ b/src/native/minipal/utf8.c
@@ -152,13 +152,6 @@ static void DecoderReplacementFallbackBuffer_Reset(DecoderBuffer* self)
     self->byteStart = NULL;
 }
 
-// Set the above values
-static void DecoderBuffer_InternalInitialize(DecoderBuffer* self, unsigned char* byteStart, CHAR16_T* charEnd)
-{
-    self->byteStart = byteStart;
-    self->charEnd = charEnd;
-}
-
 typedef struct
 {
     const CHAR16_T strDefault[3];
@@ -442,12 +435,17 @@ static size_t GetCharCount(UTF8Encoding* self, unsigned char* bytes, size_t coun
         goto EncodeChar;
 
     InvalidByteSequence:
-        // this code fragment should be close to the gotos referencing it
-        // Have to do fallback for invalid bytes
+        if (!self->useFallback)
+        {
+            errno = MINIPAL_ERROR_NO_UNICODE_TRANSLATION;
+            return 0;
+        }
+
         if (!fallbackUsed)
         {
             fallbackUsed = true;
-            if (self->useFallback) DecoderBuffer_InternalInitialize(&self->buffer.decoder, bytes, NULL);
+            self->buffer.decoder.byteStart = bytes;
+            self->buffer.decoder.charEnd = NULL;
         }
         charCount += self->buffer.decoder.strDefaultLength;
 
@@ -728,12 +726,6 @@ static size_t GetCharCount(UTF8Encoding* self, unsigned char* bytes, size_t coun
     {
         // We were already adjusting for these, so need to unadjust
         charCount += (ch >> 30);
-        // Have to do fallback for invalid bytes
-        if (!fallbackUsed)
-        {
-            fallbackUsed = true;
-            if (self->useFallback) DecoderBuffer_InternalInitialize(&self->buffer.decoder, bytes, NULL);
-        }
         charCount += self->buffer.decoder.strDefaultLength;
     }
 
@@ -848,12 +840,19 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
         goto EncodeChar;
 
     InvalidByteSequence:
+        if (!self->useFallback)
+        {
+            errno = MINIPAL_ERROR_NO_UNICODE_TRANSLATION;
+            return 0;
+        }
+
         // this code fragment should be close to the gotos referencing it
         // Have to do fallback for invalid bytes
         if (!fallbackUsed)
         {
             fallbackUsed = true;
-            if (self->useFallback) DecoderBuffer_InternalInitialize(&self->buffer.decoder, bytes, pAllocatedBufferEnd);
+            self->buffer.decoder.byteStart = bytes;
+            self->buffer.decoder.charEnd = pAllocatedBufferEnd;
         }
 
         // That'll back us up the appropriate # of bytes if we didn't get anywhere
@@ -862,7 +861,7 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
             // Check if we ran out of buffer space
             assert(pSrc >= bytes || pTarget == chars);
 
-            if (self->useFallback) DecoderReplacementFallbackBuffer_Reset(&self->buffer.decoder);
+            DecoderReplacementFallbackBuffer_Reset(&self->buffer.decoder);
             if (pTarget == chars)
             {
                 errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER;
@@ -1247,13 +1246,6 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
 
     if (ch != 0)
     {
-        // Have to do fallback for invalid bytes
-        if (!fallbackUsed)
-        {
-            fallbackUsed = true;
-            if (self->useFallback) DecoderBuffer_InternalInitialize(&self->buffer.decoder, bytes, NULL);
-        }
-
         // This'll back us up the appropriate # of bytes if we didn't get anywhere
         if (!self->useFallback)
         {
@@ -1261,7 +1253,6 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
 
             // Ran out of buffer space
             // Need to throw an exception?
-            if (self->useFallback) DecoderReplacementFallbackBuffer_Reset(&self->buffer.decoder);
             if (pTarget == chars)
             {
                 errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER;
diff --git a/src/native/minipal/utf8.h b/src/native/minipal/utf8.h
index 29c5ba8ab308c2..bd648f137a2bb1 100644
--- a/src/native/minipal/utf8.h
+++ b/src/native/minipal/utf8.h
@@ -11,6 +11,7 @@
 #define MINIPAL_MB_NO_REPLACE_INVALID_CHARS 0x00000008
 #define MINIPAL_TREAT_AS_LITTLE_ENDIAN 0x00000016
 #define MINIPAL_ERROR_INSUFFICIENT_BUFFER 122L
+#define MINIPAL_ERROR_NO_UNICODE_TRANSLATION 1113L
 
 #ifdef __cplusplus
 extern "C"

From 18c04bfb2dbe680b90eb549d46212d70e1c34127 Mon Sep 17 00:00:00 2001
From: Adeel <3840695+am11@users.noreply.github.com>
Date: Tue, 20 Jun 2023 22:52:17 +0300
Subject: [PATCH 9/9] Remove count from convert APIs

---
 src/coreclr/pal/src/locale/unicode.cpp |   4 -
 src/native/minipal/utf8.c              | 138 ++++++++++++++-----------
 2 files changed, 78 insertions(+), 64 deletions(-)

diff --git a/src/coreclr/pal/src/locale/unicode.cpp b/src/coreclr/pal/src/locale/unicode.cpp
index b9a0291394dc9b..8bfa58608e5942 100644
--- a/src/coreclr/pal/src/locale/unicode.cpp
+++ b/src/coreclr/pal/src/locale/unicode.cpp
@@ -253,8 +253,6 @@ MultiByteToWideChar(
         goto EXIT;
     }
 
-    // Use minipal_convert_utf8_to_utf16 on all systems, since it replaces
-    // invalid characters and Core Foundation doesn't do that.
     if (CodePage == CP_UTF8 || CodePage == CP_ACP)
     {
         if (cbMultiByte < 0)
@@ -344,8 +342,6 @@ WideCharToMultiByte(
         defaultChar = *lpDefaultChar;
     }
 
-    // Use minipal_convert_utf16_to_utf8 on all systems because we use
-    // UTF8ToUnicode in MultiByteToWideChar() on all systems.
     if (CodePage == CP_UTF8 || CodePage == CP_ACP)
     {
         if (cchWideChar < 0)
diff --git a/src/native/minipal/utf8.c b/src/native/minipal/utf8.c
index b93b7308c1c2c3..a54b805540f897 100644
--- a/src/native/minipal/utf8.c
+++ b/src/native/minipal/utf8.c
@@ -92,7 +92,7 @@ static bool DecoderReplacementFallbackBuffer_Fallback(DecoderBuffer* self)
 // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
 // array, and we might need the index, hence the byte*
 // Don't touch ref chars unless we succeed
-static bool DecoderReplacementFallbackBuffer_InternalFallback_Copy(DecoderBuffer* self, CHAR16_T** chars)
+static bool DecoderReplacementFallbackBuffer_InternalFallback_Copy(DecoderBuffer* self, CHAR16_T** chars, CHAR16_T* pAllocatedBufferEnd)
 {
     assert(self->byteStart != NULL);
 
@@ -132,6 +132,11 @@ static bool DecoderReplacementFallbackBuffer_InternalFallback_Copy(DecoderBuffer
             }
 
             *(charTemp++) = ch;
+            if (charTemp > pAllocatedBufferEnd)
+            {
+                errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER;
+                return false;
+            }
         }
 
         // Need to make sure that bHighSurrogate isn't true
@@ -332,13 +337,13 @@ static bool InRange(int c, int begin, int end)
 // During GetChars we had an invalid byte sequence
 // pSrc is backed up to the start of the bad sequence if we didn't have room to
 // fall it back.  Otherwise pSrc remains where it is.
-static bool FallbackInvalidByteSequence_Copy(UTF8Encoding* self, unsigned char** pSrc, CHAR16_T** pTarget)
+static bool FallbackInvalidByteSequence_Copy(UTF8Encoding* self, unsigned char** pSrc, CHAR16_T** pTarget, CHAR16_T* pAllocatedBufferEnd)
 {
     assert(self->useFallback);
 
     // Get our byte[]
     unsigned char* pStart = *pSrc;
-    bool fallbackResult = DecoderReplacementFallbackBuffer_InternalFallback_Copy(&self->buffer.decoder, pTarget);
+    bool fallbackResult = DecoderReplacementFallbackBuffer_InternalFallback_Copy(&self->buffer.decoder, pTarget, pAllocatedBufferEnd);
 
     // Do the actual fallback
     if (!fallbackResult)
@@ -736,6 +741,14 @@ static size_t GetCharCount(UTF8Encoding* self, unsigned char* bytes, size_t coun
     return charCount;
 }
 
+#define ENSURE_BUFFER_INC                          \
+    pTarget++;                                     \
+    if (pTarget > pAllocatedBufferEnd)             \
+    {                                              \
+        errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER; \
+        return 0;                                  \
+    }
+
 static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount, CHAR16_T* chars, size_t charCount)
 {
     assert(chars != NULL);
@@ -830,7 +843,8 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
             {
                 *pTarget = (CHAR16_T)(((ch >> 10) & 0x7FF) +
                     (HIGH_SURROGATE_START - (0x10000 >> 10)));
-                pTarget++;
+
+                ENSURE_BUFFER_INC
 
                 ch = (ch & 0x3FF) +
                     (int)(LOW_SURROGATE_START);
@@ -856,17 +870,14 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
         }
 
         // That'll back us up the appropriate # of bytes if we didn't get anywhere
-        if (!FallbackInvalidByteSequence_Copy(self, &pSrc, &pTarget))
+        if (!FallbackInvalidByteSequence_Copy(self, &pSrc, &pTarget, pAllocatedBufferEnd))
         {
+            if (errno == MINIPAL_ERROR_INSUFFICIENT_BUFFER) return 0;
+
             // Check if we ran out of buffer space
-            assert(pSrc >= bytes || pTarget == chars);
+            assert(pSrc >= bytes);
 
             DecoderReplacementFallbackBuffer_Reset(&self->buffer.decoder);
-            if (pTarget == chars)
-            {
-                errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER;
-                return 0;
-            }
             ch = 0;
             break;
         }
@@ -960,15 +971,7 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
             }
             pSrc--;
 
-            // Throw that we don't have enough room (pSrc could be < chars if we had started to process
-            // a 4 byte sequence already)
-            assert(pSrc >= bytes || pTarget == chars);
-
-            if (pTarget == chars)
-            {
-                errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER;
-                return 0;
-            }
+            assert(pSrc >= bytes);
 
             // Don't store ch in decoder, we already backed up to its start
             ch = 0;
@@ -977,7 +980,7 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
             break;
         }
         *pTarget = (CHAR16_T)ch;
-        pTarget++;
+        ENSURE_BUFFER_INC
 
         int availableChars = pAllocatedBufferEnd - pTarget;
         int availableBytes = pEnd - pSrc;
@@ -1004,7 +1007,7 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
                 if (ch > 0x7F) goto ProcessChar;
 
                 *pTarget = (CHAR16_T)ch;
-                pTarget++;
+                ENSURE_BUFFER_INC
             }
             // we are done
             ch = 0;
@@ -1028,7 +1031,7 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
             if (ch > 0x7F) goto LongCode;
 
             *pTarget = (CHAR16_T)ch;
-            pTarget++;
+            ENSURE_BUFFER_INC
 
             // get pSrc to be 2-byte aligned
             if ((((size_t)pSrc) & 0x1) != 0)
@@ -1038,7 +1041,7 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
                 if (ch > 0x7F) goto LongCode;
 
                 *pTarget = (CHAR16_T)ch;
-                pTarget++;
+                ENSURE_BUFFER_INC
             }
 
             // get pSrc to be 4-byte aligned
@@ -1047,6 +1050,13 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
                 ch = *(unsigned short*)pSrc;
                 if ((ch & 0x8080) != 0) goto LongCodeWithMask16;
 
+
+                if (pTarget + 2 > pAllocatedBufferEnd)
+                {
+                    errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER;
+                    return 0;
+                }
+
                 // Unfortunately, this is endianness sensitive
 #if BIGENDIAN
                 if (!self->treatAsLE)
@@ -1073,6 +1083,12 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
                 int chb = *(int*)(pSrc + 4);
                 if (((ch | chb) & (int)0x80808080) != 0) goto LongCodeWithMask32;
 
+                if (pTarget + 8 > pAllocatedBufferEnd)
+                {
+                    errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER;
+                    return 0;
+                }
+
                 // Unfortunately, this is endianness sensitive
 #if BIGENDIAN
                 if (!self->treatAsLE)
@@ -1124,7 +1140,7 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
             if (ch <= 0x7F)
             {
                 *pTarget = (CHAR16_T)ch;
-                pTarget++;
+                ENSURE_BUFFER_INC
                 continue;
             }
 
@@ -1176,7 +1192,7 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
 
                     *pTarget = (CHAR16_T)(((ch >> 10) & 0x7FF) +
                         (HIGH_SURROGATE_START - (0x10000 >> 10)));
-                    pTarget++;
+                    ENSURE_BUFFER_INC
 
                     ch = (ch & 0x3FF) + (LOW_SURROGATE_START);
 
@@ -1224,7 +1240,7 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
             }
 
             *pTarget = (CHAR16_T)ch;
-            pTarget++;
+            ENSURE_BUFFER_INC
 
             // extra byte, we're only expecting 1 char for each of these 2 bytes,
             // but the loop is testing the target (not source) against pStop.
@@ -1267,6 +1283,12 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
     // (don't have to check m_throwOnOverflow for chars)
     assert(!fallbackUsed || self->buffer.decoder.fallbackCount < 0);
 
+    if (pSrc < pEnd)
+    {
+        errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER;
+        return 0;
+    }
+
     return pTarget - chars;
 }
 
@@ -1467,22 +1489,22 @@ static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, un
                 else
                 {
                     *pTarget = (unsigned char)(0xF0 | (ch >> 18));
-                    pTarget++;
+                    ENSURE_BUFFER_INC
 
                     chb = 0x80 | ((ch >> 12) & 0x3F);
                 }
                 *pTarget = (unsigned char)chb;
-                pTarget++;
+                ENSURE_BUFFER_INC
 
                 chb = 0x80 | ((ch >> 6) & 0x3F);
             }
             *pTarget = (unsigned char)chb;
-            pTarget++;
+            ENSURE_BUFFER_INC
 
             *pTarget = (unsigned char)0x80 | (ch & 0x3F);
         }
 
-        pTarget++;
+        ENSURE_BUFFER_INC
 
         // If still have fallback don't do fast loop
         if (fallbackUsed && (ch = EncoderReplacementFallbackBuffer_InternalGetNextChar(&self->buffer.encoder)) != 0)
@@ -1514,7 +1536,7 @@ static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, un
                 if (ch > 0x7F) goto ProcessChar;
 
                 *pTarget = (unsigned char)ch;
-                pTarget++;
+                ENSURE_BUFFER_INC
             }
             // we are done, let ch be 0 to clear encoder
             ch = 0;
@@ -1546,7 +1568,7 @@ static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, un
             if (ch > 0x7F) goto LongCode;
 
             *pTarget = (unsigned char)ch;
-            pTarget++;
+            ENSURE_BUFFER_INC
 
             // get pSrc aligned
             if (((size_t)pSrc & 0x2) != 0)
@@ -1556,7 +1578,7 @@ static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, un
                 if (ch > 0x7F) goto LongCode;
 
                 *pTarget = (unsigned char)ch;
-                pTarget++;
+                ENSURE_BUFFER_INC
             }
 
             // Run 4 characters at a time!
@@ -1567,6 +1589,12 @@ static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, un
 
                 if (((ch | chc) & (int)0xFF80FF80) != 0) goto LongCodeWithMask;
 
+                if (pTarget + 4 > pAllocatedBufferEnd)
+                {
+                    errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER;
+                    return 0;
+                }
+
                 // Unfortunately, this is endianness sensitive
 #if BIGENDIAN
                 if (!self->treatAsLE)
@@ -1603,7 +1631,7 @@ static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, un
         if (ch > 0x7F) goto LongCode;
 
         *pTarget = (unsigned char)ch;
-        pTarget++;
+        ENSURE_BUFFER_INC
         continue;
 
         LongCode:
@@ -1650,23 +1678,23 @@ static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, un
                     // pStop - this unsigned char is compensated by the second surrogate character
                     // 2 input chars require 4 output bytes.  2 have been anticipated already
                     // and 2 more will be accounted for by the 2 pStop-- calls below.
-                    pTarget++;
+                    ENSURE_BUFFER_INC
 
                     chd = 0x80 | ((ch >> 12) & 0x3F);
                 }
                 *pTarget = (unsigned char)chd;
                 pStop--;                    // 3 unsigned char sequence for 1 char, so need pStop-- and the one below too.
-                pTarget++;
+                ENSURE_BUFFER_INC
 
                 chd = 0x80 | ((ch >> 6) & 0x3F);
             }
             *pTarget = (unsigned char)chd;
             pStop--;                        // 2 unsigned char sequence for 1 char so need pStop--.
-            pTarget++;
+            ENSURE_BUFFER_INC
 
             *pTarget = (unsigned char)(0x80 | (ch & 0x3F));
             // pStop - this unsigned char is already included
-            pTarget++;
+            ENSURE_BUFFER_INC
         }
 
         assert(pTarget <= pAllocatedBufferEnd);
@@ -1675,6 +1703,12 @@ static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, un
         ch = 0;
     }
 
+    if (pSrc < pEnd)
+    {
+        errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER;
+        return 0;
+    }
+
     return (int)(pTarget - bytes);
 }
 
@@ -2080,16 +2114,8 @@ size_t minipal_convert_utf8_to_utf16(const char* source, size_t sourceLength, CH
 #endif
     };
 
-    if (GetCharCount(&enc, (unsigned char*)source, sourceLength) > destinationLength)
-    {
-        errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER;
-        ret = 0;
-    }
-    else
-    {
-        ret = GetChars(&enc, (unsigned char*)source, sourceLength, destination, destinationLength);
-        if (errno) ret = 0;
-    }
+    ret = GetChars(&enc, (unsigned char*)source, sourceLength, destination, destinationLength);
+    if (errno) ret = 0;
 
     return ret;
 }
@@ -2112,20 +2138,12 @@ size_t minipal_convert_utf16_to_utf8(const CHAR16_T* source, size_t sourceLength
 #endif
     };
 
-    if (GetByteCount(&enc, (CHAR16_T*)source, sourceLength) > destinationLength)
-    {
-        errno = MINIPAL_ERROR_INSUFFICIENT_BUFFER;
-        ret = 0;
-    }
-    else
-    {
 #if !BIGENDIAN
-        (void)flags; // unused
+    (void)flags; // unused
 #endif
 
-        ret = GetBytes(&enc, (CHAR16_T*)source, sourceLength, (unsigned char*)destination, destinationLength);
-        if (errno) ret = 0;
-    }
+    ret = GetBytes(&enc, (CHAR16_T*)source, sourceLength, (unsigned char*)destination, destinationLength);
+    if (errno) ret = 0;
 
     return ret;
 }