From 335aed279d70f7643f6b6105b97095ca7c536c55 Mon Sep 17 00:00:00 2001 From: Eugene Lazutkin Date: Tue, 4 Jun 2024 20:21:10 -0500 Subject: [PATCH] Added an internal flag to enable caching. --- binding.gyp | 7 +++---- lib/accessors.cc | 30 +++++++++++++++++++++++++++++- lib/addon.cc | 15 ++------------- lib/exec.cc | 2 +- lib/match.cc | 2 +- lib/new.cc | 7 ++++++- lib/replace.cc | 2 +- lib/search.cc | 2 +- lib/split.cc | 2 +- lib/test.cc | 2 +- lib/to_string.cc | 4 ++++ lib/wrapped_re2.h | 21 +++++++++++++++++++++ re2.js | 2 +- 13 files changed, 72 insertions(+), 26 deletions(-) diff --git a/binding.gyp b/binding.gyp index bfdb3a2..db156eb 100644 --- a/binding.gyp +++ b/binding.gyp @@ -3,8 +3,10 @@ { "target_name": "re2", "sources": [ - "lib/str-val.cc", "lib/addon.cc", + "lib/accessors.cc", + "lib/str-val.cc", + "lib/util.cc", "lib/new.cc", "lib/exec.cc", "lib/test.cc", @@ -13,9 +15,6 @@ "lib/search.cc", "lib/split.cc", "lib/to_string.cc", - "lib/accessors.cc", - "lib/util.cc", - "lib/str-val.cc", "vendor/re2/re2/bitmap256.cc", "vendor/re2/re2/bitstate.cc", "vendor/re2/re2/compile.cc", diff --git a/lib/accessors.cc b/lib/accessors.cc index f586c7b..77c4984 100644 --- a/lib/accessors.cc +++ b/lib/accessors.cc @@ -28,6 +28,30 @@ NAN_GETTER(WrappedRE2::GetInternalSource) info.GetReturnValue().Set(Nan::New(re2->regexp.pattern()).ToLocalChecked()); } +NAN_GETTER(WrappedRE2::GetEnabledCache) +{ + if (!WrappedRE2::HasInstance(info.This())) + { + info.GetReturnValue().SetUndefined(); + return; + } + + auto re2 = Nan::ObjectWrap::Unwrap(info.This()); + info.GetReturnValue().Set(re2->enabledCache); +} + +NAN_GETTER(WrappedRE2::GetIsCached) +{ + if (!WrappedRE2::HasInstance(info.This())) + { + info.GetReturnValue().SetUndefined(); + return; + } + + auto re2 = Nan::ObjectWrap::Unwrap(info.This()); + info.GetReturnValue().Set(!!re2->lastStringValue); +} + NAN_GETTER(WrappedRE2::GetFlags) { if (!WrappedRE2::HasInstance(info.This())) @@ -39,9 +63,13 @@ NAN_GETTER(WrappedRE2::GetFlags) auto re2 = Nan::ObjectWrap::Unwrap(info.This()); std::string flags; + if (re2->enabledCache) + { + flags += "\b"; + } if (re2->hasIndices) { - flags = "d"; + flags += "d"; } if (re2->global) { diff --git a/lib/addon.cc b/lib/addon.cc index b572410..7b22159 100644 --- a/lib/addon.cc +++ b/lib/addon.cc @@ -74,6 +74,8 @@ v8::Local WrappedRE2::Init() Nan::SetAccessor(instanceTemplate, Nan::New("hasIndices").ToLocalChecked(), GetHasIndices); Nan::SetAccessor(instanceTemplate, Nan::New("lastIndex").ToLocalChecked(), GetLastIndex, SetLastIndex); Nan::SetAccessor(instanceTemplate, Nan::New("internalSource").ToLocalChecked(), GetInternalSource); + Nan::SetAccessor(instanceTemplate, Nan::New("enabledCache").ToLocalChecked(), GetEnabledCache); + Nan::SetAccessor(instanceTemplate, Nan::New("isCached").ToLocalChecked(), GetIsCached); auto ctr = Nan::GetFunction(tpl).ToLocalChecked(); @@ -104,19 +106,6 @@ void WrappedRE2::dropLastString() } } -inline size_t countBytes(const char *data, size_t from, size_t n) -{ - for (; n > 0; --n) - { - size_t s = getUtf8CharSize(data[from]); - from += s; - if (s == 4 && n >= 2) - --n; // this utf8 character will take two utf16 characters - // the decrement above is protected to avoid an overflow of an unsigned integer - } - return from; -} - void WrappedRE2::weakLastStringCallback(const Nan::WeakCallbackInfo &data) { WrappedRE2* re2 = data.GetParameter(); diff --git a/lib/exec.cc b/lib/exec.cc index e96b1bd..d3ddcf7 100644 --- a/lib/exec.cc +++ b/lib/exec.cc @@ -15,7 +15,7 @@ NAN_METHOD(WrappedRE2::Exec) return; } - re2->prepareLastString(info[0]); + PrepareLastString prepare(re2, info[0]); StrValBase &str = *re2->lastStringValue; if (str.isBad) return; // throws an exception diff --git a/lib/match.cc b/lib/match.cc index cbcec74..c6e36a6 100644 --- a/lib/match.cc +++ b/lib/match.cc @@ -15,7 +15,7 @@ NAN_METHOD(WrappedRE2::Match) return; } - re2->prepareLastString(info[0], re2->global); + PrepareLastString prepare(re2, info[0], re2->global); StrValBase &str = *re2->lastStringValue; if (str.isBad) return; // throws an exception diff --git a/lib/new.cc b/lib/new.cc index e82cc05..b0d2d8e 100644 --- a/lib/new.cc +++ b/lib/new.cc @@ -233,6 +233,7 @@ NAN_METHOD(WrappedRE2::New) bool unicode = false; bool sticky = false; bool hasIndices = false; + bool enabledCache = false; auto context = Nan::GetCurrentContext(); bool needFlags = true; @@ -256,6 +257,9 @@ NAN_METHOD(WrappedRE2::New) { switch (data[i]) { + case '\b': + enabledCache = true; + break; case 'g': global = true; break; @@ -339,6 +343,7 @@ NAN_METHOD(WrappedRE2::New) if (needFlags) { + enabledCache = re2->enabledCache; global = re2->global; ignoreCase = re2->ignoreCase; multiline = re2->multiline; @@ -401,7 +406,7 @@ NAN_METHOD(WrappedRE2::New) options.set_dot_nl(dotAll); options.set_log_errors(false); // inappropriate when embedding - std::unique_ptr re2(new WrappedRE2(re2::StringPiece(data, size), options, source, global, ignoreCase, multiline, dotAll, sticky, hasIndices)); + std::unique_ptr re2(new WrappedRE2(re2::StringPiece(data, size), options, source, enabledCache, global, ignoreCase, multiline, dotAll, sticky, hasIndices)); if (!re2->regexp.ok()) { return Nan::ThrowSyntaxError(re2->regexp.error().c_str()); diff --git a/lib/replace.cc b/lib/replace.cc index fc13841..30bd565 100644 --- a/lib/replace.cc +++ b/lib/replace.cc @@ -492,7 +492,7 @@ NAN_METHOD(WrappedRE2::Replace) return; } - re2->prepareLastString(info[0]); + PrepareLastString prepare(re2, info[0]); StrValBase &replacee = *re2->lastStringValue; if (replacee.isBad) return; // throws an exception diff --git a/lib/search.cc b/lib/search.cc index 7ca1582..a92121a 100644 --- a/lib/search.cc +++ b/lib/search.cc @@ -13,7 +13,7 @@ NAN_METHOD(WrappedRE2::Search) return; } - re2->prepareLastString(info[0], true); + PrepareLastString prepare(re2, info[0], true); StrValBase &str = *re2->lastStringValue; if (str.isBad) return; // throws an exception diff --git a/lib/split.cc b/lib/split.cc index 0fce48c..591c6d1 100644 --- a/lib/split.cc +++ b/lib/split.cc @@ -20,7 +20,7 @@ NAN_METHOD(WrappedRE2::Split) return; } - re2->prepareLastString(info[0], true); + PrepareLastString prepare(re2, info[0], true); StrValBase &str = *re2->lastStringValue; if (str.isBad) return; // throws an exception diff --git a/lib/test.cc b/lib/test.cc index bbced0c..103fa61 100644 --- a/lib/test.cc +++ b/lib/test.cc @@ -15,7 +15,7 @@ NAN_METHOD(WrappedRE2::Test) return; } - re2->prepareLastString(info[0]); + PrepareLastString prepare(re2, info[0]); StrValBase &str = *re2->lastStringValue; if (str.isBad) return; // throws an exception diff --git a/lib/to_string.cc b/lib/to_string.cc index b246ca5..6b899b9 100644 --- a/lib/to_string.cc +++ b/lib/to_string.cc @@ -20,6 +20,10 @@ NAN_METHOD(WrappedRE2::ToString) buffer += re2->source; buffer += "/"; + if (re2->enabledCache) + { + buffer += "\b"; + } if (re2->global) { buffer += "g"; diff --git a/lib/wrapped_re2.h b/lib/wrapped_re2.h index c51db84..33ed15a 100644 --- a/lib/wrapped_re2.h +++ b/lib/wrapped_re2.h @@ -16,6 +16,7 @@ class WrappedRE2 : public Nan::ObjectWrap const re2::StringPiece &pattern, const re2::RE2::Options &options, const std::string &src, + const bool &c, const bool &g, const bool &i, const bool &m, @@ -23,6 +24,7 @@ class WrappedRE2 : public Nan::ObjectWrap const bool &y, const bool &d) : regexp(pattern, options), source(src), + enabledCache(c), global(g), ignoreCase(i), multiline(m), @@ -47,6 +49,8 @@ class WrappedRE2 : public Nan::ObjectWrap static NAN_GETTER(GetLastIndex); static NAN_SETTER(SetLastIndex); static NAN_GETTER(GetInternalSource); + static NAN_GETTER(GetEnabledCache); + static NAN_GETTER(GetIsCached); // RegExp methods static NAN_METHOD(Exec); @@ -89,6 +93,7 @@ class WrappedRE2 : public Nan::ObjectWrap re2::RE2 regexp; std::string source; + bool enabledCache; bool global; bool ignoreCase; bool multiline; @@ -97,6 +102,8 @@ class WrappedRE2 : public Nan::ObjectWrap bool hasIndices; size_t lastIndex; + friend class PrepareLastString; + private: Nan::Persistent lastString; // weak pointer StrValBase *lastStringValue; @@ -107,6 +114,20 @@ class WrappedRE2 : public Nan::ObjectWrap void prepareLastString(const v8::Local &arg, bool ignoreLastIndex = false); }; +struct PrepareLastString +{ + PrepareLastString(WrappedRE2 *re2, const v8::Local &arg, bool ignoreLastIndex = false) : re2(re2) { + re2->prepareLastString(arg, ignoreLastIndex); + } + + ~PrepareLastString() { + if (!re2->enabledCache || !(re2->global || re2->sticky)) + re2->dropLastString(); + } + + WrappedRE2 *re2; +}; + // utilities inline size_t getUtf8Length(const uint16_t *from, const uint16_t *to) diff --git a/re2.js b/re2.js index 3f32be9..6786d42 100644 --- a/re2.js +++ b/re2.js @@ -24,7 +24,7 @@ if (typeof Symbol != 'undefined') { if (!this.global) { throw TypeError('String.prototype.matchAll called with a non-global RE2 argument'); } - const re = new RE2(this); + const re = new RE2(this, this.flags + '\b'); re.lastIndex = this.lastIndex; for (;;) { const result = re.exec(str);