Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix garbling when copying multibyte text via OSC 52 #7870

Merged
3 commits merged into from
Oct 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/actions/spell-check/dictionary/apis.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ serializer
SIZENS
spsc
STDCPP
strchr
syscall
tmp
tx
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/spell-check/patterns/patterns.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ TestUtils::VerifyExpectedString\(tb, L"[^"]+"
\b([A-Za-z])\1{3,}\b
Base64::s_(?:En|De)code\(L"[^"]+"
VERIFY_ARE_EQUAL\(L"[^"]+"
L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\+/"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\+/"
std::memory_order_[\w]+
29 changes: 15 additions & 14 deletions src/terminal/parser/base64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

using namespace Microsoft::Console::VirtualTerminal;

static const wchar_t base64Chars[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static const wchar_t padChar = L'=';
static const char base64Chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static const char padChar = '=';

#pragma warning(disable : 26446 26447 26482 26485 26493 26494)

Expand Down Expand Up @@ -75,15 +75,16 @@ std::wstring Base64::s_Encode(const std::wstring_view src) noexcept
// - true if decoding successfully, otherwise false.
bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
{
std::string mbStr;
int state = 0;
wchar_t tmp;
char tmp;

const auto len = src.size() / 4 * 3;
if (len == 0)
{
return false;
}
dst.reserve(len);
mbStr.reserve(len);

auto iter = src.cbegin();
while (iter < src.cend())
Expand All @@ -99,7 +100,7 @@ bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
break;
}

auto pos = wcschr(base64Chars, *iter);
auto pos = strchr(base64Chars, *iter);
if (!pos) // A non-base64 character found.
{
return false;
Expand All @@ -108,24 +109,24 @@ bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
switch (state)
{
case 0:
tmp = (wchar_t)(pos - base64Chars) << 2;
tmp = (char)(pos - base64Chars) << 2;
state = 1;
break;
case 1:
tmp |= (pos - base64Chars) >> 4;
dst.push_back(tmp);
tmp = (wchar_t)((pos - base64Chars) & 0x0f) << 4;
tmp |= (char)(pos - base64Chars) >> 4;
mbStr += tmp;
tmp = (char)((pos - base64Chars) & 0x0f) << 4;
state = 2;
break;
case 2:
tmp |= (pos - base64Chars) >> 2;
dst.push_back(tmp);
tmp = (wchar_t)((pos - base64Chars) & 0x03) << 6;
tmp |= (char)(pos - base64Chars) >> 2;
mbStr += tmp;
tmp = (char)((pos - base64Chars) & 0x03) << 6;
state = 3;
break;
case 3:
tmp |= pos - base64Chars;
dst.push_back(tmp);
mbStr += tmp;
state = 0;
break;
default:
Expand Down Expand Up @@ -176,7 +177,7 @@ bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
return false;
}

return true;
return SUCCEEDED(til::u8u16(mbStr, dst));
}

// Routine Description:
Expand Down
13 changes: 13 additions & 0 deletions src/terminal/parser/ut_parser/Base64Test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,5 +84,18 @@ class Microsoft::Console::VirtualTerminal::Base64Test

success = Base64::s_Decode(L"Zm9vYg=", result);
VERIFY_ARE_EQUAL(false, success);

// U+306b U+307b U+3093 U+3054 U+6c49 U+8bed U+d55c U+ad6d
result = L"";
success = Base64::s_Decode(L"44Gr44G744KT44GU5rGJ6K+t7ZWc6rWt", result);
VERIFY_ARE_EQUAL(true, success);
VERIFY_ARE_EQUAL(L"にほんご汉语한국", result);

// U+d83d U+dc4d U+d83d U+dc4d U+d83c U+dffb U+d83d U+dc4d U+d83c U+dffc U+d83d
// U+dc4d U+d83c U+dffd U+d83d U+dc4d U+d83c U+dffe U+d83d U+dc4d U+d83c U+dfff
result = L"";
success = Base64::s_Decode(L"8J+RjfCfkY3wn4+78J+RjfCfj7zwn5GN8J+PvfCfkY3wn4++8J+RjfCfj78=", result);
VERIFY_ARE_EQUAL(true, success);
VERIFY_ARE_EQUAL(L"👍👍🏻👍🏼👍🏽👍🏾👍🏿", result);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm mildly worried about putting the emoji in the file as literals like this - I know in the past we've had issues where someone's text editor saves a file with emoji as the wrong encoding which then end up breaking the tests later. I could be mistaken though, I'll let @DHowett or @miniksa refresh my memory if this is a bad idea or not.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Naw, we've done this before. We'll survive!

}
};
15 changes: 15 additions & 0 deletions src/terminal/parser/ut_parser/OutputEngineTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2627,6 +2627,21 @@ class StateMachineExternalTest final

pDispatch->ClearState();

// Passing an empty `Pc` param and a base64-encoded multibyte text `Pd` works.
// U+306b U+307b U+3093 U+3054 U+6c49 U+8bed U+d55c U+ad6d
mach.ProcessString(L"\x1b]52;;44Gr44G744KT44GU5rGJ6K+t7ZWc6rWt\x07");
VERIFY_ARE_EQUAL(L"にほんご汉语한국", pDispatch->_copyContent);

pDispatch->ClearState();

// Passing an empty `Pc` param and a base64-encoded multibyte text w/ emoji sequences `Pd` works.
// U+d83d U+dc4d U+d83d U+dc4d U+d83c U+dffb U+d83d U+dc4d U+d83c U+dffc U+d83d
// U+dc4d U+d83c U+dffd U+d83d U+dc4d U+d83c U+dffe U+d83d U+dc4d U+d83c U+dfff
mach.ProcessString(L"\x1b]52;;8J+RjfCfkY3wn4+78J+RjfCfj7zwn5GN8J+PvfCfkY3wn4++8J+RjfCfj78=\x07");
VERIFY_ARE_EQUAL(L"👍👍🏻👍🏼👍🏽👍🏾👍🏿", pDispatch->_copyContent);

pDispatch->ClearState();

// Passing a non-empty `Pc` param (`s0` is ignored) and a valid `Pd` param works.
mach.ProcessString(L"\x1b]52;s0;Zm9v\x07");
VERIFY_ARE_EQUAL(L"foo", pDispatch->_copyContent);
Expand Down