Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Share UTF8 converters between coreclr and mono #85558

Merged
merged 9 commits into from
Jun 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 0 additions & 179 deletions src/coreclr/inc/utilcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,15 +185,6 @@ typedef LPSTR LPUTF8;
// given and ANSI String, copy it into a wide buffer.
// be careful about scoping when using this macro!
//
// how to use the below two macros:
//
// ...
// LPSTR pszA;
// pszA = MyGetAnsiStringRoutine();
// MAKE_WIDEPTR_FROMANSI(pwsz, pszA);
// MyUseWideStringRoutine(pwsz);
// ...
//
// similarily for MAKE_ANSIPTR_FROMWIDE. note that the first param does not
// have to be declared, and no clean up must be done.
//
Expand All @@ -211,25 +202,6 @@ typedef LPSTR LPUTF8;
#define MAKE_TRANSLATIONFAILED ThrowWin32(ERROR_NO_UNICODE_TRANSLATION)
#endif

// This version throws on conversion errors (ie, no best fit character
// mapping to characters that look similar, and no use of the default char
// ('?') when printing out unrepresentable characters. Use this method for
// most development in the EE, especially anything like metadata or class
// names. See the BESTFIT version if you're printing out info to the console.
#define MAKE_MULTIBYTE_FROMWIDE(ptrname, widestr, codepage) \
int __l##ptrname = (int)u16_strlen(widestr); \
if (__l##ptrname > MAKE_MAX_LENGTH) \
MAKE_TOOLONGACTION; \
__l##ptrname = (int)((__l##ptrname + 1) * 2 * sizeof(char)); \
CQuickBytes __CQuickBytes##ptrname; \
__CQuickBytes##ptrname.AllocThrows(__l##ptrname); \
BOOL __b##ptrname; \
DWORD __cBytes##ptrname = WszWideCharToMultiByte(codepage, WC_NO_BEST_FIT_CHARS, widestr, -1, (LPSTR)__CQuickBytes##ptrname.Ptr(), __l##ptrname, NULL, &__b##ptrname); \
if (__b##ptrname || (__cBytes##ptrname == 0 && (widestr[0] != W('\0')))) { \
MAKE_TRANSLATIONFAILED; \
} \
LPSTR ptrname = (LPSTR)__CQuickBytes##ptrname.Ptr()

// This version does best fit character mapping and also allows the use
// of the default char ('?') for any Unicode character that isn't
// representable. This is reasonable for writing to the console, but
Expand All @@ -247,40 +219,6 @@ typedef LPSTR LPUTF8;
} \
LPSTR ptrname = (LPSTR)__CQuickBytes##ptrname.Ptr()

// Use for anything critical other than output to console, where weird
// character mappings are unacceptable.
#define MAKE_ANSIPTR_FROMWIDE(ptrname, widestr) MAKE_MULTIBYTE_FROMWIDE(ptrname, widestr, CP_ACP)

// Use for output to the console.
#define MAKE_ANSIPTR_FROMWIDE_BESTFIT(ptrname, widestr) MAKE_MULTIBYTE_FROMWIDE_BESTFIT(ptrname, widestr, CP_ACP)

#define MAKE_WIDEPTR_FROMANSI(ptrname, ansistr) \
CQuickBytes __qb##ptrname; \
int __l##ptrname; \
__l##ptrname = WszMultiByteToWideChar(CP_ACP, 0, ansistr, -1, 0, 0); \
if (__l##ptrname > MAKE_MAX_LENGTH) \
MAKE_TOOLONGACTION; \
LPWSTR ptrname = (LPWSTR) __qb##ptrname.AllocThrows((__l##ptrname+1)*sizeof(WCHAR)); \
if (WszMultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, ansistr, -1, ptrname, __l##ptrname) == 0) { \
MAKE_TRANSLATIONFAILED; \
}

#define MAKE_WIDEPTR_FROMANSI_NOTHROW(ptrname, ansistr) \
CQuickBytes __qb##ptrname; \
LPWSTR ptrname = 0; \
int __l##ptrname; \
__l##ptrname = WszMultiByteToWideChar(CP_ACP, 0, ansistr, -1, 0, 0); \
if (__l##ptrname <= MAKE_MAX_LENGTH) { \
ptrname = (LPWSTR) __qb##ptrname.AllocNoThrow((__l##ptrname+1)*sizeof(WCHAR)); \
if (ptrname) { \
if (WszMultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, ansistr, -1, ptrname, __l##ptrname) != 0) { \
ptrname[__l##ptrname] = 0; \
} else { \
ptrname = 0; \
} \
} \
}

#define MAKE_UTF8PTR_FROMWIDE(ptrname, widestr) CQuickBytes _##ptrname; _##ptrname.ConvertUnicode_Utf8(widestr); LPSTR ptrname = (LPSTR) _##ptrname.Ptr();

#define MAKE_UTF8PTR_FROMWIDE_NOTHROW(ptrname, widestr) \
Expand Down Expand Up @@ -312,22 +250,8 @@ typedef LPSTR LPUTF8;
} \
} \

#define MAKE_WIDEPTR_FROMUTF8N(ptrname, utf8str, n8chrs) \
CQuickBytes __qb##ptrname; \
int __l##ptrname; \
__l##ptrname = WszMultiByteToWideChar(CP_UTF8, 0, utf8str, n8chrs, 0, 0); \
if (__l##ptrname > MAKE_MAX_LENGTH) \
MAKE_TOOLONGACTION; \
LPWSTR ptrname = (LPWSTR) __qb##ptrname .AllocThrows((__l##ptrname+1)*sizeof(WCHAR)); \
if (0==WszMultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, utf8str, n8chrs, ptrname, __l##ptrname)) { \
MAKE_TRANSLATIONFAILED; \
} \
ptrname[__l##ptrname] = 0;


#define MAKE_WIDEPTR_FROMUTF8(ptrname, utf8str) CQuickBytes _##ptrname; _##ptrname.ConvertUtf8_Unicode(utf8str); LPCWSTR ptrname = (LPCWSTR) _##ptrname.Ptr();


#define MAKE_WIDEPTR_FROMUTF8N_NOTHROW(ptrname, utf8str, n8chrs) \
CQuickBytes __qb##ptrname; \
int __l##ptrname; \
Expand All @@ -346,42 +270,10 @@ typedef LPSTR LPUTF8;

#define MAKE_WIDEPTR_FROMUTF8_NOTHROW(ptrname, utf8str) MAKE_WIDEPTR_FROMUTF8N_NOTHROW(ptrname, utf8str, -1)

// This method takes the number of characters
#define MAKE_MULTIBYTE_FROMWIDEN(ptrname, widestr, _nCharacters, _pCnt, codepage) \
CQuickBytes __qb##ptrname; \
int __l##ptrname; \
__l##ptrname = WszWideCharToMultiByte(codepage, WC_NO_BEST_FIT_CHARS, widestr, _nCharacters, NULL, 0, NULL, NULL); \
if (__l##ptrname > MAKE_MAX_LENGTH) \
MAKE_TOOLONGACTION; \
ptrname = (LPUTF8) __qb##ptrname .AllocThrows(__l##ptrname+1); \
BOOL __b##ptrname; \
DWORD _pCnt = WszWideCharToMultiByte(codepage, WC_NO_BEST_FIT_CHARS, widestr, _nCharacters, ptrname, __l##ptrname, NULL, &__b##ptrname); \
if (__b##ptrname || (_pCnt == 0 && _nCharacters > 0)) { \
MAKE_TRANSLATIONFAILED; \
} \
ptrname[__l##ptrname] = 0;

#define MAKE_MULTIBYTE_FROMWIDEN_BESTFIT(ptrname, widestr, _nCharacters, _pCnt, codepage) \
CQuickBytes __qb##ptrname; \
int __l##ptrname; \
__l##ptrname = WszWideCharToMultiByte(codepage, 0, widestr, _nCharacters, NULL, 0, NULL, NULL); \
if (__l##ptrname > MAKE_MAX_LENGTH) \
MAKE_TOOLONGACTION; \
ptrname = (LPUTF8) __qb##ptrname .AllocThrows(__l##ptrname+1); \
DWORD _pCnt = WszWideCharToMultiByte(codepage, 0, widestr, _nCharacters, ptrname, __l##ptrname, NULL, NULL); \
if (_pCnt == 0 && _nCharacters > 0) { \
MAKE_TRANSLATIONFAILED; \
} \
ptrname[__l##ptrname] = 0;

#define MAKE_ANSIPTR_FROMWIDEN(ptrname, widestr, _nCharacters, _pCnt) \
MAKE_MULTIBYTE_FROMWIDEN(ptrname, widestr, _nCharacters, _pCnt, CP_ACP)

const SIZE_T MaxSigned32BitDecString = ARRAY_SIZE("-2147483648") - 1;
const SIZE_T MaxUnsigned32BitDecString = ARRAY_SIZE("4294967295") - 1;
const SIZE_T MaxIntegerDecHexString = ARRAY_SIZE("-9223372036854775808") - 1;

const SIZE_T Max16BitHexString = ARRAY_SIZE("1234") - 1;
const SIZE_T Max32BitHexString = ARRAY_SIZE("12345678") - 1;
const SIZE_T Max64BitHexString = ARRAY_SIZE("1234567812345678") - 1;

Expand Down Expand Up @@ -410,77 +302,6 @@ inline WCHAR* FormatInteger(WCHAR* str, size_t strCount, const char* fmt, I v)
return str;
}

inline
LPWSTR DuplicateString(
LPCWSTR wszString,
size_t cchString)
{
STATIC_CONTRACT_NOTHROW;

LPWSTR wszDup = NULL;
if (wszString != NULL)
{
wszDup = new (nothrow) WCHAR[cchString + 1];
if (wszDup != NULL)
{
wcscpy_s(wszDup, cchString + 1, wszString);
}
}
return wszDup;
}

inline
LPWSTR DuplicateString(
LPCWSTR wszString)
{
STATIC_CONTRACT_NOTHROW;

if (wszString != NULL)
{
return DuplicateString(wszString, u16_strlen(wszString));
}
else
{
return NULL;
}
}

void DECLSPEC_NORETURN ThrowOutOfMemory();

inline
LPWSTR DuplicateStringThrowing(
LPCWSTR wszString,
size_t cchString)
{
STATIC_CONTRACT_THROWS;

if (wszString == NULL)
return NULL;

LPWSTR wszDup = DuplicateString(wszString, cchString);
if (wszDup == NULL)
ThrowOutOfMemory();

return wszDup;
}

inline
LPWSTR DuplicateStringThrowing(
LPCWSTR wszString)
{
STATIC_CONTRACT_THROWS;

if (wszString == NULL)
return NULL;

LPWSTR wszDup = DuplicateString(wszString);
if (wszDup == NULL)
ThrowOutOfMemory();

return wszDup;
}


//*****************************************************************************
// Placement new is used to new and object at an exact location. The pointer
// is simply returned to the caller without actually using the heap. The
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/pal/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ set(SOURCES
loader/module.cpp
locale/unicode.cpp
locale/unicodedata.cpp
locale/utf8.cpp
${CLR_SRC_NATIVE_DIR}/minipal/utf8.c
map/common.cpp
map/map.cpp
map/virtual.cpp
Expand Down
52 changes: 0 additions & 52 deletions src/coreclr/pal/src/include/pal/utf8.h

This file was deleted.

31 changes: 20 additions & 11 deletions src/coreclr/pal/src/locale/unicode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Revision History:
#include "pal/palinternal.h"
#include "pal/dbgmsg.h"
#include "pal/file.h"
#include "pal/utf8.h"
#include <minipal/utf8.h>
#include "pal/cruntime.h"
#include "pal/stackstring.hpp"
#include "pal/unicodedata.h"
Expand Down Expand Up @@ -253,16 +253,20 @@ MultiByteToWideChar(
goto EXIT;
}

// Use UTF8ToUnicode on all systems, since it replaces
// invalid characters and Core Foundation doesn't do that.
if (CodePage == CP_UTF8 || CodePage == CP_ACP)
{
if (cbMultiByte <= -1)
if (cbMultiByte < 0)
cbMultiByte = strlen(lpMultiByteStr) + 1;

if (!lpWideCharStr || cchWideChar == 0)
retval = minipal_get_length_utf8_to_utf16(lpMultiByteStr, cbMultiByte, dwFlags);

if (lpWideCharStr)
{
cbMultiByte = strlen(lpMultiByteStr) + 1;
if (cchWideChar == 0) cchWideChar = retval;
retval = minipal_convert_utf8_to_utf16(lpMultiByteStr, cbMultiByte, (CHAR16_T*)lpWideCharStr, cchWideChar, dwFlags);
}

retval = UTF8ToUnicode(lpMultiByteStr, cbMultiByte, lpWideCharStr, cchWideChar, dwFlags);
goto EXIT;
}

Expand Down Expand Up @@ -338,15 +342,20 @@ WideCharToMultiByte(
defaultChar = *lpDefaultChar;
}

// Use UnicodeToUTF8 on all systems because we use
// UTF8ToUnicode in MultiByteToWideChar() on all systems.
if (CodePage == CP_UTF8 || CodePage == CP_ACP)
{
if (cchWideChar == -1)
{
if (cchWideChar < 0)
cchWideChar = PAL_wcslen(lpWideCharStr) + 1;

if (!lpMultiByteStr || cbMultiByte == 0)
retval = minipal_get_length_utf16_to_utf8((CHAR16_T*)lpWideCharStr, cchWideChar, dwFlags);

if (lpMultiByteStr)
{
if (cbMultiByte == 0) cbMultiByte = retval;
retval = minipal_convert_utf16_to_utf8((CHAR16_T*)lpWideCharStr, cchWideChar, lpMultiByteStr, cbMultiByte, dwFlags);
}
retval = UnicodeToUTF8(lpWideCharStr, cchWideChar, lpMultiByteStr, cbMultiByte);

goto EXIT;
}

Expand Down
Loading