Skip to content

Commit

Permalink
Don't write 0 to the end of strings in embind (emscripten-core#10844)
Browse files Browse the repository at this point in the history
This is unsafe because it's one past the end of the string, and in
threaded contexts it can cause data races
  • Loading branch information
jgravelle-google authored May 14, 2020
1 parent dcb4112 commit cfd03f3
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 33 deletions.
33 changes: 6 additions & 27 deletions src/embind/embind.js
Original file line number Diff line number Diff line change
Expand Up @@ -635,20 +635,13 @@ var LibraryEmbind = {

var str;
if (stdStringIsUTF8) {
//ensure null termination at one-past-end byte if not present yet
var endChar = HEAPU8[value + 4 + length];
var endCharSwap = 0;
if (endChar != 0) {
endCharSwap = endChar;
HEAPU8[value + 4 + length] = 0;
}

var decodeStartPtr = value + 4;
// Looping here to support possible embedded '0' bytes
for (var i = 0; i <= length; ++i) {
var currentBytePtr = value + 4 + i;
if (HEAPU8[currentBytePtr] == 0) {
var stringSegment = UTF8ToString(decodeStartPtr);
if (HEAPU8[currentBytePtr] == 0 || i == length) {
var maxRead = currentBytePtr - decodeStartPtr;
var stringSegment = UTF8ToString(decodeStartPtr, maxRead);
if (str === undefined) {
str = stringSegment;
} else {
Expand All @@ -658,10 +651,6 @@ var LibraryEmbind = {
decodeStartPtr = currentBytePtr + 1;
}
}

if (endCharSwap != 0) {
HEAPU8[value + 4 + length] = endCharSwap;
}
} else {
var a = new Array(length);
for (var i = 0; i < length; ++i) {
Expand Down Expand Up @@ -754,20 +743,14 @@ var LibraryEmbind = {
var length = HEAPU32[value >> 2];
var HEAP = getHeap();
var str;
// Ensure null termination at one-past-end byte if not present yet
var endChar = HEAP[(value + 4 + length * charSize) >> shift];
var endCharSwap = 0;
if (endChar != 0) {
endCharSwap = endChar;
HEAP[(value + 4 + length * charSize) >> shift] = 0;
}

var decodeStartPtr = value + 4;
// Looping here to support possible embedded '0' bytes
for (var i = 0; i <= length; ++i) {
var currentBytePtr = value + 4 + i * charSize;
if (HEAP[currentBytePtr >> shift] == 0) {
var stringSegment = decodeString(decodeStartPtr);
if (HEAP[currentBytePtr >> shift] == 0 || i == length) {
var maxReadBytes = currentBytePtr - decodeStartPtr;
var stringSegment = decodeString(decodeStartPtr, maxReadBytes);
if (str === undefined) {
str = stringSegment;
} else {
Expand All @@ -778,10 +761,6 @@ var LibraryEmbind = {
}
}

if (endCharSwap != 0) {
HEAP[(value + 4 + length * charSize) >> shift] = endCharSwap;
}

_free(value);

return str;
Expand Down
18 changes: 12 additions & 6 deletions src/runtime_strings_extra.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ var UTF16Decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder('utf-16l
#endif // TEXTDECODER
#endif // TEXTDECODER == 2

function UTF16ToString(ptr) {
function UTF16ToString(ptr, maxBytesToRead) {
#if ASSERTIONS
assert(ptr % 2 == 0, 'Pointer passed to UTF16ToString must be aligned to two bytes!');
#endif
Expand All @@ -48,7 +48,10 @@ function UTF16ToString(ptr) {
// TextDecoder needs to know the byte length in advance, it doesn't stop on null terminator by itself.
// Also, use the length info to avoid running tiny strings through TextDecoder, since .subarray() allocates garbage.
var idx = endPtr >> 1;
while (HEAP16[idx]) ++idx;
var maxIdx = idx + maxBytesToRead / 2;
// If maxBytesToRead is not passed explicitly, it will be undefined, and this
// will always evaluate to true. This saves on code size.
while (!(idx >= maxIdx) && HEAPU16[idx]) ++idx;
endPtr = idx << 1;

#if TEXTDECODER != 2
Expand All @@ -64,7 +67,7 @@ function UTF16ToString(ptr) {
var str = '';
while (1) {
var codeUnit = {{{ makeGetValue('ptr', 'i*2', 'i16') }}};
if (codeUnit == 0) return str;
if (codeUnit == 0 || i == maxBytesToRead / 2) return str;
++i;
// fromCharCode constructs a character from a UTF-16 code unit, so we can pass the UTF16 string right through.
str += String.fromCharCode(codeUnit);
Expand Down Expand Up @@ -117,16 +120,18 @@ function lengthBytesUTF16(str) {
return str.length*2;
}

function UTF32ToString(ptr) {
function UTF32ToString(ptr, maxBytesToRead) {
#if ASSERTIONS
assert(ptr % 4 == 0, 'Pointer passed to UTF32ToString must be aligned to four bytes!');
#endif
var i = 0;

var str = '';
while (1) {
// If maxBytesToRead is not passed explicitly, it will be undefined, and this
// will always evaluate to true. This saves on code size.
while (!(i >= maxBytesToRead / 4)) {
var utf32 = {{{ makeGetValue('ptr', 'i*4', 'i32') }}};
if (utf32 == 0) return str;
if (utf32 == 0) break;
++i;
// Gotcha: fromCharCode constructs a character from a UTF-16 encoded code (pair), not from a Unicode code point! So encode the code point to UTF-16 for constructing.
// See http://unicode.org/faq/utf_bom.html#utf16-3
Expand All @@ -137,6 +142,7 @@ function UTF32ToString(ptr) {
str += String.fromCharCode(utf32);
}
}
return str;
}

// Copies the given Javascript String object 'str' to the emscripten HEAP at address 'outPtr',
Expand Down

0 comments on commit cfd03f3

Please sign in to comment.