From de2287e224c3d26e7fd1a540f20d767e03dfe86e Mon Sep 17 00:00:00 2001 From: Fedor Indutny Date: Wed, 24 Jan 2018 14:21:15 -0500 Subject: [PATCH] buffer: port `byteLengthUtf8` to JavaScript Prior to this change the majority of the time spent when calling `Buffer.byteLength` was spent on crossing JS->C++ boundary. This change move the function to JavaScript, making it much faster. --- lib/buffer.js | 37 ++++++++++++++++++++++++++++++++++++- src/node_buffer.cc | 8 -------- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/lib/buffer.js b/lib/buffer.js index 4b800039fe6f80..56acacf14c1278 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -22,7 +22,6 @@ 'use strict'; const { - byteLengthUtf8, copy: _copy, compare: _compare, compareOffset, @@ -320,6 +319,42 @@ function allocate(size) { } +// Ported from deps/v8/src/unicode-inl.h +function isTrailSurrogate(code) { + return (code & 0xfc00) === 0xdc00; +} + + +function isLeadSurrogate(code) { + // No previous character + if (code === -1) return false; + return (code & 0xfc00) === 0xd800; +} + + +function byteLengthUtf8(string) { + var len = 0; + var previous = -1; + for (var i = 0; i < string.length; i++) { + // NOTE: 0 <= code <= 0xffff + var code = string.charCodeAt(i); + if (code <= 0x7f) { + len++; + } else if (code <= 0x7ff) { + len += 2; + } else { + // kSizeOfUnmatchedSurrogate - kBytesSavedByCombiningSurrogates + if (isTrailSurrogate(code) && isLeadSurrogate(previous)) + len += 1; + else + len += 3; + } + previous = code; + } + return len; +} + + function fromString(string, encoding) { var length; if (typeof encoding !== 'string' || encoding.length === 0) { diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 72776304c8eb70..964a9288a686f9 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -785,13 +785,6 @@ void WriteDoubleBE(const FunctionCallbackInfo& args) { } -void ByteLengthUtf8(const FunctionCallbackInfo &args) { - CHECK(args[0]->IsString()); - - // Fast case: avoid StringBytes on UTF8 string. Jump to v8. - args.GetReturnValue().Set(args[0].As()->Utf8Length()); -} - // Normalize val to be an integer in the range of [1, -1] since // implementations of memcmp() can vary by platform. static int normalizeCompareVal(int val, size_t a_length, size_t b_length) { @@ -1214,7 +1207,6 @@ void Initialize(Local target, env->SetMethod(target, "setupBufferJS", SetupBufferJS); env->SetMethod(target, "createFromString", CreateFromString); - env->SetMethod(target, "byteLengthUtf8", ByteLengthUtf8); env->SetMethod(target, "copy", Copy); env->SetMethod(target, "compare", Compare); env->SetMethod(target, "compareOffset", CompareOffset);