diff --git a/.editorconfig b/.editorconfig index 9eda3f95b66ab0..52b84b424fb9a0 100644 --- a/.editorconfig +++ b/.editorconfig @@ -9,7 +9,7 @@ insert_final_newline = true [vcbuild.bat] end_of_line = crlf -[{lib,src,test}/**.js] +[{lib,test,tools}/**.js] indent_style = space indent_size = 2 @@ -29,7 +29,7 @@ indent_size = 2 indent_style = tab indent_size = 8 -[{deps,tools}/**] +[{deps}/**] indent_style = ignore indent_size = ignore end_of_line = ignore diff --git a/.eslintrc.yaml b/.eslintrc.yaml index 99489d2e7d8589..201af11f017733 100644 --- a/.eslintrc.yaml +++ b/.eslintrc.yaml @@ -214,4 +214,3 @@ globals: LTTNG_HTTP_SERVER_RESPONSE: false LTTNG_NET_SERVER_CONNECTION: false LTTNG_NET_STREAM_END: false - internalBinding: false diff --git a/CHANGELOG.md b/CHANGELOG.md index c2635573cd9ebc..49349ede4add89 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,7 +31,8 @@ release. -9.10.1
+9.11.0
+9.10.1
9.10.0
9.9.0
9.8.0
diff --git a/COLLABORATOR_GUIDE.md b/COLLABORATOR_GUIDE.md index 6702fb1c7f624b..cc213f502a8862 100644 --- a/COLLABORATOR_GUIDE.md +++ b/COLLABORATOR_GUIDE.md @@ -408,10 +408,8 @@ recommended but not required. ### Deprecations -_Deprecation_ refers to the identification of Public APIs that should no longer -be used and that may be removed or modified in backward-incompatible ways in -a future major release of Node.js. Deprecation may be used with internal APIs if -there is expected impact on the user community. +**Deprecation** refers to the identification of Public APIs that should no +longer be used. Node.js uses three Deprecation levels: @@ -420,9 +418,9 @@ Node.js uses three Deprecation levels: notice indicating the deprecated status is added to the API documentation but no functional changes are implemented in the code. There will be no runtime deprecation warnings emitted for such deprecations by default. - Documentation-only deprecations may trigger a runtime warning when launched - with [`--pending-deprecation`][] flag (or its alternative, - `NODE_PENDING_DEPRECATION=1` environment variable). + Documentation-only deprecations may trigger a runtime warning when Node.js + is started with the [`--pending-deprecation`][] flag or the + `NODE_PENDING_DEPRECATION=1` environment variable is set. * *Runtime Deprecation* refers to the use of process warnings emitted at runtime the first time that a deprecated API is used. A command-line @@ -432,7 +430,9 @@ Node.js uses three Deprecation levels: deprecated status. * *End-of-life* refers to APIs that have gone through Runtime Deprecation and - are ready to be removed from Node.js entirely. + are no longer subject to the semantic versioning rules used by the project. + Backward-incompatible changes including complete removal of such APIs may + occur at any time. Documentation-Only Deprecations may be handled as semver-minor or semver-major changes. Such deprecations have no impact on the successful operation of running @@ -457,7 +457,9 @@ Deprecations may land in a Node.js minor release but must not be upgraded to a Runtime Deprecation until the next major release.) No API can be moved to End-of-life without first having gone through a -Runtime Deprecation cycle. +Runtime Deprecation cycle. However, there is no requirement that deprecated +code must progress ultimately to *End-of-Life*. Documentation-only and runtime +deprecations may remain indefinitely. A best effort will be made to communicate pending deprecations and associated mitigations with the ecosystem as soon as possible (preferably before the pull diff --git a/LICENSE b/LICENSE index 4d4fd710946b24..116b9d1cdbb3b6 100644 --- a/LICENSE +++ b/LICENSE @@ -99,8 +99,8 @@ The externally maintained libraries used by Node.js are: """ COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later) - Copyright © 1991-2017 Unicode, Inc. All rights reserved. - Distributed under the Terms of Use in http://www.unicode.org/copyright.html + Copyright © 1991-2018 Unicode, Inc. All rights reserved. + Distributed under the Terms of Use in http://www.unicode.org/copyright.html. Permission is hereby granted, free of charge, to any person obtaining a copy of the Unicode data files and any associated documentation @@ -482,6 +482,35 @@ The externally maintained libraries used by Node.js are: # by ICANN or the IETF Trust on the database or the code. Any person # making a contribution to the database or code waives all rights to # future claims in that contribution or in the TZ Database. + + 6. Google double-conversion + + Copyright 2006-2011, the V8 project authors. All rights reserved. + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ - libuv, located at deps/uv, is licensed as follows: diff --git a/Makefile b/Makefile index e9afdc87e72758..bde51631ff5871 100644 --- a/Makefile +++ b/Makefile @@ -236,36 +236,40 @@ v8: tools/make-v8.sh $(MAKE) -C deps/v8 $(V8_ARCH).$(BUILDTYPE_LOWER) $(V8_BUILD_OPTIONS) +.PHONY: jstest +jstest: build-addons build-addons-napi ## Runs addon tests and JS tests + $(PYTHON) tools/test.py --mode=release -J \ + $(CI_JS_SUITES) \ + $(CI_NATIVE_SUITES) + .PHONY: test # This does not run tests of third-party libraries inside deps. test: all ## Runs default tests, linters, and builds docs. + # Build the addons before running the tests so the test results + # can be displayed together $(MAKE) -s build-addons $(MAKE) -s build-addons-napi - $(MAKE) -s doc-only - $(MAKE) -s lint + $(MAKE) -s test-doc $(MAKE) -s cctest - $(PYTHON) tools/test.py --mode=release -J \ - $(CI_JS_SUITES) \ - $(CI_NATIVE_SUITES) \ - $(CI_DOC) + $(MAKE) -s jstest .PHONY: test-only test-only: all ## For a quick test, does not run linter or build docs. + # Build the addons before running the tests so the test results + # can be displayed together $(MAKE) build-addons $(MAKE) build-addons-napi $(MAKE) cctest - $(PYTHON) tools/test.py --mode=release -J \ - $(CI_JS_SUITES) \ - $(CI_NATIVE_SUITES) + $(MAKE) jstest # Used by `make coverage-test` test-cov: all + # Build the addons before running the tests so the test results + # can be displayed together $(MAKE) build-addons $(MAKE) build-addons-napi # $(MAKE) cctest - $(PYTHON) tools/test.py --mode=release -J \ - $(CI_JS_SUITES) \ - $(CI_NATIVE_SUITES) + $(MAKE) jstest $(MAKE) lint test-parallel: all diff --git a/README.md b/README.md index d249fd7147d85c..e8c1f0169319d9 100644 --- a/README.md +++ b/README.md @@ -252,6 +252,8 @@ For more information about the governance of the Node.js project, see **James M Snell** <jasnell@gmail.com> (he/him) * [joyeecheung](https://github.com/joyeecheung) - **Joyee Cheung** <joyeec9h3@gmail.com> (she/her) +* [mafintosh](https://github.com/mafintosh) +**Mathias Buus** <mathiasbuus@gmail.com> (he/him) * [mcollina](https://github.com/mcollina) - **Matteo Collina** <matteo.collina@gmail.com> (he/him) * [mhdawson](https://github.com/mhdawson) - diff --git a/configure b/configure index d999cfc47d87fd..b32a7961189e6c 100755 --- a/configure +++ b/configure @@ -439,6 +439,11 @@ intl_optgroup.add_option('--download-path', parser.add_option_group(intl_optgroup) +parser.add_option('--debug-lib', + action='store_true', + dest='node_debug_lib', + help='build lib with DCHECK macros') + http2_optgroup.add_option('--debug-http2', action='store_true', dest='debug_http2', @@ -949,6 +954,8 @@ def configure_node(o): if options.enable_static: o['variables']['node_target_type'] = 'static_library' + o['variables']['node_debug_lib'] = b(options.node_debug_lib) + if options.debug_http2: o['variables']['debug_http2'] = 1 else: @@ -1128,8 +1135,8 @@ def glob_to_var(dir_base, dir_sub, patch_dir): def configure_intl(o): icus = [ { - 'url': 'https://ssl.icu-project.org/files/icu4c/60.2/icu4c-60_2-src.zip', - 'md5': '115908818fd0324530b2acb1b029738d', + 'url': 'https://ssl.icu-project.org/files/icu4c/61.1/icu4c-61_1-src.zip', + 'md5': '780d8524c8a860ed8d8f6fe75cb7ce3f', }, ] def icu_download(path): diff --git a/deps/icu-small/LICENSE b/deps/icu-small/LICENSE index c84076cd072b80..25b6eb9d3415e6 100644 --- a/deps/icu-small/LICENSE +++ b/deps/icu-small/LICENSE @@ -1,7 +1,7 @@ COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later) -Copyright © 1991-2017 Unicode, Inc. All rights reserved. -Distributed under the Terms of Use in http://www.unicode.org/copyright.html +Copyright © 1991-2018 Unicode, Inc. All rights reserved. +Distributed under the Terms of Use in http://www.unicode.org/copyright.html. Permission is hereby granted, free of charge, to any person obtaining a copy of the Unicode data files and any associated documentation @@ -383,3 +383,32 @@ Database section 7. # by ICANN or the IETF Trust on the database or the code. Any person # making a contribution to the database or code waives all rights to # future claims in that contribution or in the TZ Database. + +6. Google double-conversion + +Copyright 2006-2011, the V8 project authors. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/deps/icu-small/README-SMALL-ICU.txt b/deps/icu-small/README-SMALL-ICU.txt index c6dc0b30515162..b3919ec52b21ae 100644 --- a/deps/icu-small/README-SMALL-ICU.txt +++ b/deps/icu-small/README-SMALL-ICU.txt @@ -1,8 +1,8 @@ Small ICU sources - auto generated by shrink-icu-src.py This directory contains the ICU subset used by --with-intl=small-icu (the default) -It is a strict subset of ICU 60 source files with the following exception(s): -* deps/icu-small/source/data/in/icudt60l.dat : Reduced-size data file +It is a strict subset of ICU 61 source files with the following exception(s): +* deps/icu-small/source/data/in/icudt61l.dat : Reduced-size data file To rebuild this directory, see ../../tools/icu/README.md diff --git a/deps/icu-small/source/common/bmpset.cpp b/deps/icu-small/source/common/bmpset.cpp index f84bfd7f5bfcf1..35bc80dce359eb 100644 --- a/deps/icu-small/source/common/bmpset.cpp +++ b/deps/icu-small/source/common/bmpset.cpp @@ -100,9 +100,9 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) { ++lead; } if(lead= 0 && breakType < UPRV_LENGTHOF(fHandled) - && fHandled[breakType] != 0 && fHandled[breakType]->contains(c)); +UnhandledEngine::handles(UChar32 c) const { + return fHandled && fHandled->contains(c); } int32_t UnhandledEngine::findBreaks( UText *text, int32_t /* startPos */, int32_t endPos, - int32_t breakType, UVector32 &/*foundBreaks*/ ) const { - if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) { - UChar32 c = utext_current32(text); - while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) { - utext_next32(text); // TODO: recast loop to work with post-increment operations. - c = utext_current32(text); - } + UChar32 c = utext_current32(text); + while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) { + utext_next32(text); // TODO: recast loop to work with post-increment operations. + c = utext_current32(text); } return 0; } void -UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) { - if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) { - if (fHandled[breakType] == 0) { - fHandled[breakType] = new UnicodeSet(); - if (fHandled[breakType] == 0) { - return; - } - } - if (!fHandled[breakType]->contains(c)) { - UErrorCode status = U_ZERO_ERROR; - // Apply the entire script of the character. - int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT); - fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status); +UnhandledEngine::handleCharacter(UChar32 c) { + if (fHandled == nullptr) { + fHandled = new UnicodeSet(); + if (fHandled == nullptr) { + return; } } + if (!fHandled->contains(c)) { + UErrorCode status = U_ZERO_ERROR; + // Apply the entire script of the character. + int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT); + fHandled->applyIntPropertyValue(UCHAR_SCRIPT, script, status); + } } /* @@ -138,7 +127,7 @@ U_NAMESPACE_BEGIN static UMutex gBreakEngineMutex = U_MUTEX_INITIALIZER; const LanguageBreakEngine * -ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { +ICULanguageBreakFactory::getEngineFor(UChar32 c) { const LanguageBreakEngine *lbe = NULL; UErrorCode status = U_ZERO_ERROR; @@ -156,14 +145,14 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { int32_t i = fEngines->size(); while (--i >= 0) { lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); - if (lbe != NULL && lbe->handles(c, breakType)) { + if (lbe != NULL && lbe->handles(c)) { return lbe; } } } // We didn't find an engine. Create one. - lbe = loadEngineFor(c, breakType); + lbe = loadEngineFor(c); if (lbe != NULL) { fEngines->push((void *)lbe, status); } @@ -171,11 +160,11 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { } const LanguageBreakEngine * -ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) { +ICULanguageBreakFactory::loadEngineFor(UChar32 c) { UErrorCode status = U_ZERO_ERROR; UScriptCode code = uscript_getScript(c, &status); if (U_SUCCESS(status)) { - DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType); + DictionaryMatcher *m = loadDictionaryMatcherFor(code); if (m != NULL) { const LanguageBreakEngine *engine = NULL; switch(code) { @@ -236,7 +225,7 @@ ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) { } DictionaryMatcher * -ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t /* brkType */) { +ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) { UErrorCode status = U_ZERO_ERROR; // open root from brkitr tree. UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status); diff --git a/deps/icu-small/source/common/brkeng.h b/deps/icu-small/source/common/brkeng.h index 5c61d2ed5d5d70..e40fce13f64b8a 100644 --- a/deps/icu-small/source/common/brkeng.h +++ b/deps/icu-small/source/common/brkeng.h @@ -54,11 +54,10 @@ class LanguageBreakEngine : public UMemory { * a particular kind of break.

* * @param c A character which begins a run that the engine might handle - * @param breakType The type of text break which the caller wants to determine * @return TRUE if this engine handles the particular character and break * type. */ - virtual UBool handles(UChar32 c, int32_t breakType) const = 0; + virtual UBool handles(UChar32 c) const = 0; /** *

Find any breaks within a run in the supplied text.

@@ -68,14 +67,12 @@ class LanguageBreakEngine : public UMemory { * is capable of handling. * @param startPos The start of the run within the supplied text. * @param endPos The end of the run within the supplied text. - * @param breakType The type of break desired, or -1. * @param foundBreaks A Vector of int32_t to receive the breaks. * @return The number of breaks found. */ virtual int32_t findBreaks( UText *text, int32_t startPos, int32_t endPos, - int32_t breakType, UVector32 &foundBreaks ) const = 0; }; @@ -125,11 +122,9 @@ class LanguageBreakFactory : public UMemory { * * @param c A character that begins a run for which a LanguageBreakEngine is * sought. - * @param breakType The kind of text break for which a LanguageBreakEngine is - * sought. * @return A LanguageBreakEngine with the desired characteristics, or 0. */ - virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType) = 0; + virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0; }; @@ -152,11 +147,11 @@ class UnhandledEngine : public LanguageBreakEngine { private: /** - * The sets of characters handled, for each break type + * The sets of characters handled. * @internal */ - UnicodeSet *fHandled[4]; + UnicodeSet *fHandled; public: @@ -176,11 +171,10 @@ class UnhandledEngine : public LanguageBreakEngine { * a particular kind of break.

* * @param c A character which begins a run that the engine might handle - * @param breakType The type of text break which the caller wants to determine * @return TRUE if this engine handles the particular character and break * type. */ - virtual UBool handles(UChar32 c, int32_t breakType) const; + virtual UBool handles(UChar32 c) const; /** *

Find any breaks within a run in the supplied text.

@@ -190,23 +184,20 @@ class UnhandledEngine : public LanguageBreakEngine { * is capable of handling. * @param startPos The start of the run within the supplied text. * @param endPos The end of the run within the supplied text. - * @param breakType The type of break desired, or -1. * @param foundBreaks An allocated C array of the breaks found, if any * @return The number of breaks found. */ virtual int32_t findBreaks( UText *text, int32_t startPos, int32_t endPos, - int32_t breakType, UVector32 &foundBreaks ) const; /** *

Tell the engine to handle a particular character and break type.

* * @param c A character which the engine should handle - * @param breakType The type of text break for which the engine should handle c */ - virtual void handleCharacter(UChar32 c, int32_t breakType); + virtual void handleCharacter(UChar32 c); }; @@ -250,11 +241,9 @@ class ICULanguageBreakFactory : public LanguageBreakFactory { * * @param c A character that begins a run for which a LanguageBreakEngine is * sought. - * @param breakType The kind of text break for which a LanguageBreakEngine is - * sought. * @return A LanguageBreakEngine with the desired characteristics, or 0. */ - virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType); + virtual const LanguageBreakEngine *getEngineFor(UChar32 c); protected: /** @@ -263,21 +252,17 @@ class ICULanguageBreakFactory : public LanguageBreakFactory { * * @param c A character that begins a run for which a LanguageBreakEngine is * sought. - * @param breakType The kind of text break for which a LanguageBreakEngine is - * sought. * @return A LanguageBreakEngine with the desired characteristics, or 0. */ - virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType); + virtual const LanguageBreakEngine *loadEngineFor(UChar32 c); /** *

Create a DictionaryMatcher for the specified script and break type.

* @param script An ISO 15924 script code that identifies the dictionary to be * created. - * @param breakType The kind of text break for which a dictionary is - * sought. * @return A DictionaryMatcher with the desired characteristics, or NULL. */ - virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script, int32_t breakType); + virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script); }; U_NAMESPACE_END diff --git a/deps/icu-small/source/common/brkiter.cpp b/deps/icu-small/source/common/brkiter.cpp index a509ff10c946ec..23e0cc3c153392 100644 --- a/deps/icu-small/source/common/brkiter.cpp +++ b/deps/icu-small/source/common/brkiter.cpp @@ -52,7 +52,7 @@ U_NAMESPACE_BEGIN // ------------------------------------- BreakIterator* -BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status) +BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &status) { char fnbuff[256]; char ext[4]={'\0'}; @@ -121,7 +121,6 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, U_LOCALE_BASED(locBased, *(BreakIterator*)result); locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), actualLocale.data()); - result->setBreakType(kind); } ures_close(b); @@ -413,10 +412,10 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) BreakIterator *result = NULL; switch (kind) { case UBRK_CHARACTER: - result = BreakIterator::buildInstance(loc, "grapheme", kind, status); + result = BreakIterator::buildInstance(loc, "grapheme", status); break; case UBRK_WORD: - result = BreakIterator::buildInstance(loc, "word", kind, status); + result = BreakIterator::buildInstance(loc, "word", status); break; case UBRK_LINE: uprv_strcpy(lbType, "line"); @@ -429,10 +428,10 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) uprv_strcat(lbType, lbKeyValue); } } - result = BreakIterator::buildInstance(loc, lbType, kind, status); + result = BreakIterator::buildInstance(loc, lbType, status); break; case UBRK_SENTENCE: - result = BreakIterator::buildInstance(loc, "sentence", kind, status); + result = BreakIterator::buildInstance(loc, "sentence", status); #if !UCONFIG_NO_FILTERED_BREAK_ITERATION { char ssKeyValue[kKeyValueLenMax] = {0}; @@ -449,7 +448,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) #endif break; case UBRK_TITLE: - result = BreakIterator::buildInstance(loc, "title", kind, status); + result = BreakIterator::buildInstance(loc, "title", status); break; default: status = U_ILLEGAL_ARGUMENT_ERROR; diff --git a/deps/icu-small/source/common/bytesinkutil.cpp b/deps/icu-small/source/common/bytesinkutil.cpp index bf1a2d45f8ae5a..6af7ddfd597638 100644 --- a/deps/icu-small/source/common/bytesinkutil.cpp +++ b/deps/icu-small/source/common/bytesinkutil.cpp @@ -92,20 +92,16 @@ ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) { sink.Append(s8, 2); } -UBool -ByteSinkUtil::appendUnchanged(const uint8_t *s, int32_t length, - ByteSink &sink, uint32_t options, Edits *edits, - UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return FALSE; } - if (length > 0) { - if (edits != nullptr) { - edits->addUnchanged(length); - } - if ((options & U_OMIT_UNCHANGED_TEXT) == 0) { - sink.Append(reinterpret_cast(s), length); - } +void +ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length, + ByteSink &sink, uint32_t options, Edits *edits) { + U_ASSERT(length > 0); + if (edits != nullptr) { + edits->addUnchanged(length); + } + if ((options & U_OMIT_UNCHANGED_TEXT) == 0) { + sink.Append(reinterpret_cast(s), length); } - return TRUE; } UBool @@ -117,7 +113,11 @@ ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit, errorCode = U_INDEX_OUTOFBOUNDS_ERROR; return FALSE; } - return appendUnchanged(s, (int32_t)(limit - s), sink, options, edits, errorCode); + int32_t length = (int32_t)(limit - s); + if (length > 0) { + appendNonEmptyUnchanged(s, length, sink, options, edits); + } + return TRUE; } U_NAMESPACE_END diff --git a/deps/icu-small/source/common/bytesinkutil.h b/deps/icu-small/source/common/bytesinkutil.h index 004b49c4ce62ea..8287ffea4ca713 100644 --- a/deps/icu-small/source/common/bytesinkutil.h +++ b/deps/icu-small/source/common/bytesinkutil.h @@ -43,11 +43,19 @@ class U_COMMON_API ByteSinkUtil { static UBool appendUnchanged(const uint8_t *s, int32_t length, ByteSink &sink, uint32_t options, Edits *edits, - UErrorCode &errorCode); + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return FALSE; } + if (length > 0) { appendNonEmptyUnchanged(s, length, sink, options, edits); } + return TRUE; + } static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit, ByteSink &sink, uint32_t options, Edits *edits, UErrorCode &errorCode); + +private: + static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length, + ByteSink &sink, uint32_t options, Edits *edits); }; U_NAMESPACE_END diff --git a/deps/icu-small/source/common/cmemory.cpp b/deps/icu-small/source/common/cmemory.cpp index 300279c2430d10..0b7e432c4dee46 100644 --- a/deps/icu-small/source/common/cmemory.cpp +++ b/deps/icu-small/source/common/cmemory.cpp @@ -41,30 +41,6 @@ static int n=0; static long b=0; #endif -#if U_DEBUG - -static char gValidMemorySink = 0; - -U_CAPI void uprv_checkValidMemory(const void *p, size_t n) { - /* - * Access the memory to ensure that it's all valid. - * Load and save a computed value to try to ensure that the compiler - * does not throw away the whole loop. - * A thread analyzer might complain about un-mutexed access to gValidMemorySink - * which is true but harmless because no one ever uses the value in gValidMemorySink. - */ - const char *s = (const char *)p; - char c = gValidMemorySink; - size_t i; - U_ASSERT(p != NULL); - for(i = 0; i < n; ++i) { - c ^= s[i]; - } - gValidMemorySink = c; -} - -#endif /* U_DEBUG */ - U_CAPI void * U_EXPORT2 uprv_malloc(size_t s) { #if U_DEBUG && defined(UPRV_MALLOC_COUNT) diff --git a/deps/icu-small/source/common/cmemory.h b/deps/icu-small/source/common/cmemory.h index 83a0129651e468..a44f9a190293bd 100644 --- a/deps/icu-small/source/common/cmemory.h +++ b/deps/icu-small/source/common/cmemory.h @@ -36,31 +36,10 @@ #include #endif -#if U_DEBUG - -/* - * The C++ standard requires that the source pointer for memcpy() & memmove() - * is valid, not NULL, and not at the end of an allocated memory block. - * In debug mode, we read one byte from the source point to verify that it's - * a valid, readable pointer. - */ - -U_CAPI void uprv_checkValidMemory(const void *p, size_t n); - -#define uprv_memcpy(dst, src, size) ( \ - uprv_checkValidMemory(src, 1), \ - U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)) -#define uprv_memmove(dst, src, size) ( \ - uprv_checkValidMemory(src, 1), \ - U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)) - -#else #define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size) #define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size) -#endif /* U_DEBUG */ - /** * \def UPRV_LENGTHOF * Convenience macro to determine the length of a fixed array at compile-time. diff --git a/deps/icu-small/source/common/cstring.h b/deps/icu-small/source/common/cstring.h index 2232efcda5c0ef..ed0b1a7c8b0be8 100644 --- a/deps/icu-small/source/common/cstring.h +++ b/deps/icu-small/source/common/cstring.h @@ -40,28 +40,10 @@ #define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c) #define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c) #define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c) - -#if U_DEBUG - -#define uprv_strncpy(dst, src, size) ( \ - uprv_checkValidMemory(src, 1), \ - U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size)) -#define uprv_strncmp(s1, s2, n) ( \ - uprv_checkValidMemory(s1, 1), \ - uprv_checkValidMemory(s2, 1), \ - U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n)) -#define uprv_strncat(dst, src, n) ( \ - uprv_checkValidMemory(src, 1), \ - U_STANDARD_CPP_NAMESPACE strncat(dst, src, n)) - -#else - #define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size) #define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n) #define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n) -#endif /* U_DEBUG */ - /** * Is c an ASCII-repertoire letter a-z or A-Z? * Note: The implementation is specific to whether ICU is compiled for diff --git a/deps/icu-small/source/common/dictbe.cpp b/deps/icu-small/source/common/dictbe.cpp index 02fc8a4726cf21..419d062ef25d44 100644 --- a/deps/icu-small/source/common/dictbe.cpp +++ b/deps/icu-small/source/common/dictbe.cpp @@ -29,24 +29,21 @@ U_NAMESPACE_BEGIN ****************************************************************** */ -DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes) { - fTypes = breakTypes; +DictionaryBreakEngine::DictionaryBreakEngine() { } DictionaryBreakEngine::~DictionaryBreakEngine() { } UBool -DictionaryBreakEngine::handles(UChar32 c, int32_t breakType) const { - return (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes) - && fSet.contains(c)); +DictionaryBreakEngine::handles(UChar32 c) const { + return fSet.contains(c); } int32_t DictionaryBreakEngine::findBreaks( UText *text, int32_t startPos, int32_t endPos, - int32_t breakType, UVector32 &foundBreaks ) const { (void)startPos; // TODO: remove this param? int32_t result = 0; @@ -66,10 +63,8 @@ DictionaryBreakEngine::findBreaks( UText *text, } rangeStart = start; rangeEnd = current; - if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) { - result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks); - utext_setNativeIndex(text, current); - } + result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks); + utext_setNativeIndex(text, current); return result; } @@ -194,7 +189,7 @@ static const int32_t THAI_MIN_WORD = 2; static const int32_t THAI_MIN_WORD_SPAN = THAI_MIN_WORD * 2; ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status) - : DictionaryBreakEngine((1<Default constructor.

- * - */ - DictionaryBreakEngine(); - public: /** - *

Constructor setting the break types handled.

- * - * @param breakTypes A bitmap of types handled by the engine. + *

Constructor

*/ - DictionaryBreakEngine( uint32_t breakTypes ); + DictionaryBreakEngine(); /** *

Virtual destructor.

@@ -74,11 +59,10 @@ class DictionaryBreakEngine : public LanguageBreakEngine { * a particular kind of break.

* * @param c A character which begins a run that the engine might handle - * @param breakType The type of text break which the caller wants to determine * @return TRUE if this engine handles the particular character and break * type. */ - virtual UBool handles( UChar32 c, int32_t breakType ) const; + virtual UBool handles(UChar32 c) const; /** *

Find any breaks within a run in the supplied text.

@@ -88,14 +72,12 @@ class DictionaryBreakEngine : public LanguageBreakEngine { * that starts from the first character in the range. * @param startPos The start of the run within the supplied text. * @param endPos The end of the run within the supplied text. - * @param breakType The type of break desired, or -1. * @param foundBreaks vector of int32_t to receive the break positions * @return The number of breaks found. */ virtual int32_t findBreaks( UText *text, int32_t startPos, int32_t endPos, - int32_t breakType, UVector32 &foundBreaks ) const; protected: @@ -107,13 +89,6 @@ class DictionaryBreakEngine : public LanguageBreakEngine { */ virtual void setCharacters( const UnicodeSet &set ); - /** - *

Set the break types handled by this engine.

- * - * @param breakTypes A bitmap of types handled by the engine. - */ -// virtual void setBreakTypes( uint32_t breakTypes ); - /** *

Divide up a range of known dictionary characters handled by this break engine.

* diff --git a/deps/icu-small/source/common/filteredbrk.cpp b/deps/icu-small/source/common/filteredbrk.cpp index 6a38b1bf3baf40..162b38de5d6fa2 100644 --- a/deps/icu-small/source/common/filteredbrk.cpp +++ b/deps/icu-small/source/common/filteredbrk.cpp @@ -693,6 +693,11 @@ FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& st return (U_SUCCESS(status))? ret.orphan(): NULL; } +FilteredBreakIteratorBuilder * +FilteredBreakIteratorBuilder::createInstance(UErrorCode &status) { + return createEmptyInstance(status); +} + FilteredBreakIteratorBuilder * FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) { if(U_FAILURE(status)) return NULL; diff --git a/deps/icu-small/source/common/rbbi.cpp b/deps/icu-small/source/common/rbbi.cpp index 54b289e24d1e0a..69f92d94c602c3 100644 --- a/deps/icu-small/source/common/rbbi.cpp +++ b/deps/icu-small/source/common/rbbi.cpp @@ -64,7 +64,9 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedBreakIterator) * Constructs a RuleBasedBreakIterator that uses the already-created * tables object that is passed in as a parameter. */ -RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status) { +RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status) + : fSCharIter(UnicodeString()) +{ init(status); fData = new RBBIDataWrapper(data, status); // status checked in constructor if (U_FAILURE(status)) {return;} @@ -80,7 +82,9 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode // RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules, uint32_t ruleLength, - UErrorCode &status) { + UErrorCode &status) + : fSCharIter(UnicodeString()) +{ init(status); if (U_FAILURE(status)) { return; @@ -110,6 +114,7 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules, // //------------------------------------------------------------------------------- RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &status) + : fSCharIter(UnicodeString()) { init(status); fData = new RBBIDataWrapper(udm, status); // status checked in constructor @@ -130,6 +135,7 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &sta RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules, UParseError &parseError, UErrorCode &status) + : fSCharIter(UnicodeString()) { init(status); if (U_FAILURE(status)) {return;} @@ -152,7 +158,9 @@ RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules, // Used when creating a RuleBasedBreakIterator from a set // of rules. //------------------------------------------------------------------------------- -RuleBasedBreakIterator::RuleBasedBreakIterator() { +RuleBasedBreakIterator::RuleBasedBreakIterator() + : fSCharIter(UnicodeString()) +{ UErrorCode status = U_ZERO_ERROR; init(status); } @@ -165,7 +173,8 @@ RuleBasedBreakIterator::RuleBasedBreakIterator() { // //------------------------------------------------------------------------------- RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& other) -: BreakIterator(other) +: BreakIterator(other), + fSCharIter(UnicodeString()) { UErrorCode status = U_ZERO_ERROR; this->init(status); @@ -177,17 +186,13 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& oth * Destructor */ RuleBasedBreakIterator::~RuleBasedBreakIterator() { - if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { + if (fCharIter != &fSCharIter) { // fCharIter was adopted from the outside. delete fCharIter; } fCharIter = NULL; - delete fSCharIter; - fSCharIter = NULL; - delete fDCharIter; - fDCharIter = NULL; - utext_close(fText); + utext_close(&fText); if (fData != NULL) { fData->removeReference(); @@ -217,26 +222,29 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) { } BreakIterator::operator=(that); - fBreakType = that.fBreakType; if (fLanguageBreakEngines != NULL) { delete fLanguageBreakEngines; fLanguageBreakEngines = NULL; // Just rebuild for now } // TODO: clone fLanguageBreakEngines from "that" UErrorCode status = U_ZERO_ERROR; - fText = utext_clone(fText, that.fText, FALSE, TRUE, &status); + utext_clone(&fText, &that.fText, FALSE, TRUE, &status); - if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { + if (fCharIter != &fSCharIter) { delete fCharIter; } - fCharIter = NULL; + fCharIter = &fSCharIter; - if (that.fCharIter != NULL ) { + if (that.fCharIter != NULL && that.fCharIter != &that.fSCharIter) { // This is a little bit tricky - it will intially appear that // this->fCharIter is adopted, even if that->fCharIter was // not adopted. That's ok. fCharIter = that.fCharIter->clone(); } + fSCharIter = that.fSCharIter; + if (fCharIter == NULL) { + fCharIter = &fSCharIter; + } if (fData != NULL) { fData->removeReference(); @@ -269,33 +277,30 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) { // //----------------------------------------------------------------------------- void RuleBasedBreakIterator::init(UErrorCode &status) { - fText = NULL; fCharIter = NULL; - fSCharIter = NULL; - fDCharIter = NULL; fData = NULL; fPosition = 0; fRuleStatusIndex = 0; fDone = false; fDictionaryCharCount = 0; - fBreakType = UBRK_WORD; // Defaulting BreakType to word gives reasonable - // dictionary behavior for Break Iterators that are - // built from rules. Even better would be the ability to - // declare the type in the rules. - fLanguageBreakEngines = NULL; fUnhandledBreakEngine = NULL; fBreakCache = NULL; fDictionaryCache = NULL; - if (U_FAILURE(status)) { + // Note: IBM xlC is unable to assign or initialize member fText from UTEXT_INITIALIZER. + // fText = UTEXT_INITIALIZER; + static const UText initializedUText = UTEXT_INITIALIZER; + uprv_memcpy(&fText, &initializedUText, sizeof(UText)); + + if (U_FAILURE(status)) { return; } - fText = utext_openUChars(NULL, NULL, 0, &status); + utext_openUChars(&fText, NULL, 0, &status); fDictionaryCache = new DictionaryCache(this, status); fBreakCache = new BreakCache(this, status); - if (U_SUCCESS(status) && (fText == NULL || fDictionaryCache == NULL || fBreakCache == NULL)) { + if (U_SUCCESS(status) && (fDictionaryCache == NULL || fBreakCache == NULL)) { status = U_MEMORY_ALLOCATION_ERROR; } @@ -344,7 +349,7 @@ RuleBasedBreakIterator::operator==(const BreakIterator& that) const { const RuleBasedBreakIterator& that2 = (const RuleBasedBreakIterator&) that; - if (!utext_equals(fText, that2.fText)) { + if (!utext_equals(&fText, &that2.fText)) { // The two break iterators are operating on different text, // or have a different iteration position. // Note that fText's position is always the same as the break iterator's position. @@ -385,7 +390,7 @@ void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) { } fBreakCache->reset(); fDictionaryCache->reset(); - fText = utext_clone(fText, ut, FALSE, TRUE, &status); + utext_clone(&fText, ut, FALSE, TRUE, &status); // Set up a dummy CharacterIterator to be returned if anyone // calls getText(). With input from UText, there is no reasonable @@ -393,27 +398,20 @@ void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) { // Return one over an empty string instead - this is the closest // we can come to signaling a failure. // (GetText() is obsolete, this failure is sort of OK) - if (fDCharIter == NULL) { - static const UChar c = 0; - fDCharIter = new UCharCharacterIterator(&c, 0); - if (fDCharIter == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - } + fSCharIter.setText(UnicodeString()); - if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { + if (fCharIter != &fSCharIter) { // existing fCharIter was adopted from the outside. Delete it now. delete fCharIter; } - fCharIter = fDCharIter; + fCharIter = &fSCharIter; this->first(); } UText *RuleBasedBreakIterator::getUText(UText *fillIn, UErrorCode &status) const { - UText *result = utext_clone(fillIn, fText, FALSE, TRUE, &status); + UText *result = utext_clone(fillIn, &fText, FALSE, TRUE, &status); return result; } @@ -439,7 +437,7 @@ void RuleBasedBreakIterator::adoptText(CharacterIterator* newText) { // If we are holding a CharacterIterator adopted from a // previous call to this function, delete it now. - if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { + if (fCharIter != &fSCharIter) { delete fCharIter; } @@ -450,9 +448,9 @@ RuleBasedBreakIterator::adoptText(CharacterIterator* newText) { if (newText==NULL || newText->startIndex() != 0) { // startIndex !=0 wants to be an error, but there's no way to report it. // Make the iterator text be an empty string. - fText = utext_openUChars(fText, NULL, 0, &status); + utext_openUChars(&fText, NULL, 0, &status); } else { - fText = utext_openCharacterIterator(fText, newText, &status); + utext_openCharacterIterator(&fText, newText, &status); } this->first(); } @@ -467,23 +465,19 @@ RuleBasedBreakIterator::setText(const UnicodeString& newText) { UErrorCode status = U_ZERO_ERROR; fBreakCache->reset(); fDictionaryCache->reset(); - fText = utext_openConstUnicodeString(fText, &newText, &status); + utext_openConstUnicodeString(&fText, &newText, &status); // Set up a character iterator on the string. // Needed in case someone calls getText(). // Can not, unfortunately, do this lazily on the (probably never) // call to getText(), because getText is const. - if (fSCharIter == NULL) { - fSCharIter = new StringCharacterIterator(newText); - } else { - fSCharIter->setText(newText); - } + fSCharIter.setText(newText); - if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { + if (fCharIter != &fSCharIter) { // old fCharIter was adopted from the outside. Delete it. delete fCharIter; } - fCharIter = fSCharIter; + fCharIter = &fSCharIter; this->first(); } @@ -503,14 +497,14 @@ RuleBasedBreakIterator &RuleBasedBreakIterator::refreshInputText(UText *input, U status = U_ILLEGAL_ARGUMENT_ERROR; return *this; } - int64_t pos = utext_getNativeIndex(fText); + int64_t pos = utext_getNativeIndex(&fText); // Shallow read-only clone of the new UText into the existing input UText - fText = utext_clone(fText, input, FALSE, TRUE, &status); + utext_clone(&fText, input, FALSE, TRUE, &status); if (U_FAILURE(status)) { return *this; } - utext_setNativeIndex(fText, pos); - if (utext_getNativeIndex(fText) != pos) { + utext_setNativeIndex(&fText, pos); + if (utext_getNativeIndex(&fText) != pos) { // Sanity check. The new input utext is supposed to have the exact same // contents as the old. If we can't set to the same position, it doesn't. // The contents underlying the old utext might be invalid at this point, @@ -540,7 +534,7 @@ int32_t RuleBasedBreakIterator::first(void) { * @return The text's past-the-end offset. */ int32_t RuleBasedBreakIterator::last(void) { - int32_t endPos = (int32_t)utext_nativeLength(fText); + int32_t endPos = (int32_t)utext_nativeLength(&fText); UBool endShouldBeBoundary = isBoundary(endPos); // Has side effect of setting iterator position. (void)endShouldBeBoundary; U_ASSERT(endShouldBeBoundary); @@ -611,8 +605,8 @@ int32_t RuleBasedBreakIterator::following(int32_t startPos) { // Move requested offset to a code point start. It might be on a trail surrogate, // or on a trail byte if the input is UTF-8. Or it may be beyond the end of the text. - utext_setNativeIndex(fText, startPos); - startPos = (int32_t)utext_getNativeIndex(fText); + utext_setNativeIndex(&fText, startPos); + startPos = (int32_t)utext_getNativeIndex(&fText); UErrorCode status = U_ZERO_ERROR; fBreakCache->following(startPos, status); @@ -626,15 +620,15 @@ int32_t RuleBasedBreakIterator::following(int32_t startPos) { * @return The position of the last boundary before the starting position. */ int32_t RuleBasedBreakIterator::preceding(int32_t offset) { - if (fText == NULL || offset > utext_nativeLength(fText)) { + if (offset > utext_nativeLength(&fText)) { return last(); } // Move requested offset to a code point start. It might be on a trail surrogate, // or on a trail byte if the input is UTF-8. - utext_setNativeIndex(fText, offset); - int32_t adjustedOffset = utext_getNativeIndex(fText); + utext_setNativeIndex(&fText, offset); + int32_t adjustedOffset = utext_getNativeIndex(&fText); UErrorCode status = U_ZERO_ERROR; fBreakCache->preceding(adjustedOffset, status); @@ -660,8 +654,8 @@ UBool RuleBasedBreakIterator::isBoundary(int32_t offset) { // Note that isBoundary() is always be false for offsets that are not on code point boundaries. // But we still need the side effect of leaving iteration at the following boundary. - utext_setNativeIndex(fText, offset); - int32_t adjustedOffset = utext_getNativeIndex(fText); + utext_setNativeIndex(&fText, offset); + int32_t adjustedOffset = utext_getNativeIndex(&fText); bool result = false; UErrorCode status = U_ZERO_ERROR; @@ -669,7 +663,7 @@ UBool RuleBasedBreakIterator::isBoundary(int32_t offset) { result = (fBreakCache->current() == offset); } - if (result && adjustedOffset < offset && utext_char32At(fText, offset) == U_SENTINEL) { + if (result && adjustedOffset < offset && utext_char32At(&fText, offset) == U_SENTINEL) { // Original offset is beyond the end of the text. Return FALSE, it's not a boundary, // but the iteration position remains set to the end of the text, which is a boundary. return FALSE; @@ -789,9 +783,9 @@ int32_t RuleBasedBreakIterator::handleNext() { // if we're already at the end of the text, return DONE. initialPosition = fPosition; - UTEXT_SETNATIVEINDEX(fText, initialPosition); + UTEXT_SETNATIVEINDEX(&fText, initialPosition); result = initialPosition; - c = UTEXT_NEXT32(fText); + c = UTEXT_NEXT32(&fText); if (c==U_SENTINEL) { fDone = TRUE; return UBRK_DONE; @@ -854,7 +848,7 @@ int32_t RuleBasedBreakIterator::handleNext() { #ifdef RBBI_DEBUG if (gTrace) { - RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(fText)); + RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(&fText)); if (0x20<=c && c<0x7f) { RBBIDebugPrintf("\"%c\" ", c); } else { @@ -867,9 +861,7 @@ int32_t RuleBasedBreakIterator::handleNext() { // State Transition - move machine to its next state // - // Note: fNextState is defined as uint16_t[2], but we are casting - // a generated RBBI table to RBBIStateTableRow and some tables - // actually have more than 2 categories. + // fNextState is a variable-length array. U_ASSERT(categoryfHeader->fCatCount); state = row->fNextState[category]; /*Not accessing beyond memory*/ row = (RBBIStateTableRow *) @@ -880,7 +872,7 @@ int32_t RuleBasedBreakIterator::handleNext() { if (row->fAccepting == -1) { // Match found, common case. if (mode != RBBI_START) { - result = (int32_t)UTEXT_GETNATIVEINDEX(fText); + result = (int32_t)UTEXT_GETNATIVEINDEX(&fText); } fRuleStatusIndex = row->fTagIdx; // Remember the break status (tag) values. } @@ -898,7 +890,7 @@ int32_t RuleBasedBreakIterator::handleNext() { int16_t rule = row->fLookAhead; if (rule != 0) { // At the position of a '/' in a look-ahead match. Record it. - int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(fText); + int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText); lookAheadMatches.setPosition(rule, pos); } @@ -914,7 +906,7 @@ int32_t RuleBasedBreakIterator::handleNext() { // the input position. The next iteration will be processing the // first real input character. if (mode == RBBI_RUN) { - c = UTEXT_NEXT32(fText); + c = UTEXT_NEXT32(&fText); } else { if (mode == RBBI_START) { mode = RBBI_RUN; @@ -928,9 +920,9 @@ int32_t RuleBasedBreakIterator::handleNext() { // (This really indicates a defect in the break rules. They should always match // at least one character.) if (result == initialPosition) { - utext_setNativeIndex(fText, initialPosition); - utext_next32(fText); - result = (int32_t)utext_getNativeIndex(fText); + utext_setNativeIndex(&fText, initialPosition); + utext_next32(&fText); + result = (int32_t)utext_getNativeIndex(&fText); fRuleStatusIndex = 0; } @@ -965,7 +957,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { int32_t initialPosition = 0; const RBBIStateTable *stateTable = fData->fSafeRevTable; - UTEXT_SETNATIVEINDEX(fText, fromPosition); + UTEXT_SETNATIVEINDEX(&fText, fromPosition); #ifdef RBBI_DEBUG if (gTrace) { RBBIDebugPuts("Handle Previous pos char state category"); @@ -973,14 +965,14 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { #endif // if we're already at the start of the text, return DONE. - if (fText == NULL || fData == NULL || UTEXT_GETNATIVEINDEX(fText)==0) { + if (fData == NULL || UTEXT_GETNATIVEINDEX(&fText)==0) { return BreakIterator::DONE; } // Set up the starting char. - initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(fText); + initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(&fText); result = initialPosition; - c = UTEXT_PREVIOUS32(fText); + c = UTEXT_PREVIOUS32(&fText); // Set the initial state for the state machine state = START_STATE; @@ -1028,7 +1020,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { #ifdef RBBI_DEBUG if (gTrace) { - RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(fText)); + RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(&fText)); if (0x20<=c && c<0x7f) { RBBIDebugPrintf("\"%c\" ", c); } else { @@ -1041,9 +1033,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { // State Transition - move machine to its next state // - // Note: fNextState is defined as uint16_t[2], but we are casting - // a generated RBBI table to RBBIStateTableRow and some tables - // actually have more than 2 categories. + // fNextState is a variable-length array. U_ASSERT(categoryfHeader->fCatCount); state = row->fNextState[category]; /*Not accessing beyond memory*/ row = (RBBIStateTableRow *) @@ -1051,7 +1041,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { if (row->fAccepting == -1) { // Match found, common case. - result = (int32_t)UTEXT_GETNATIVEINDEX(fText); + result = (int32_t)UTEXT_GETNATIVEINDEX(&fText); } int16_t completedRule = row->fAccepting; @@ -1059,14 +1049,14 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { // Lookahead match is completed. int32_t lookaheadResult = lookAheadMatches.getPosition(completedRule); if (lookaheadResult >= 0) { - UTEXT_SETNATIVEINDEX(fText, lookaheadResult); + UTEXT_SETNATIVEINDEX(&fText, lookaheadResult); return lookaheadResult; } } int16_t rule = row->fLookAhead; if (rule != 0) { // At the position of a '/' in a look-ahead match. Record it. - int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(fText); + int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText); lookAheadMatches.setPosition(rule, pos); } @@ -1082,7 +1072,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { // the input position. The next iteration will be processing the // first real input character. if (mode == RBBI_RUN) { - c = UTEXT_PREVIOUS32(fText); + c = UTEXT_PREVIOUS32(&fText); } else { if (mode == RBBI_START) { mode = RBBI_RUN; @@ -1096,9 +1086,9 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { // (This really indicates a defect in the break rules. They should always match // at least one character.) if (result == initialPosition) { - UTEXT_SETNATIVEINDEX(fText, initialPosition); - UTEXT_PREVIOUS32(fText); - result = (int32_t)UTEXT_GETNATIVEINDEX(fText); + UTEXT_SETNATIVEINDEX(&fText, initialPosition); + UTEXT_PREVIOUS32(&fText); + result = (int32_t)UTEXT_GETNATIVEINDEX(&fText); } #ifdef RBBI_DEBUG @@ -1247,7 +1237,7 @@ static void U_CALLCONV initLanguageFactories() { static const LanguageBreakEngine* -getLanguageBreakEngineFromFactory(UChar32 c, int32_t breakType) +getLanguageBreakEngineFromFactory(UChar32 c) { umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories); if (gLanguageBreakFactories == NULL) { @@ -1258,7 +1248,7 @@ getLanguageBreakEngineFromFactory(UChar32 c, int32_t breakType) const LanguageBreakEngine *lbe = NULL; while (--i >= 0) { LanguageBreakFactory *factory = (LanguageBreakFactory *)(gLanguageBreakFactories->elementAt(i)); - lbe = factory->getEngineFor(c, breakType); + lbe = factory->getEngineFor(c); if (lbe != NULL) { break; } @@ -1290,14 +1280,14 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) { int32_t i = fLanguageBreakEngines->size(); while (--i >= 0) { lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i)); - if (lbe->handles(c, fBreakType)) { + if (lbe->handles(c)) { return lbe; } } // No existing dictionary took the character. See if a factory wants to // give us a new LanguageBreakEngine for this character. - lbe = getLanguageBreakEngineFromFactory(c, fBreakType); + lbe = getLanguageBreakEngineFromFactory(c); // If we got one, use it and push it on our stack. if (lbe != NULL) { @@ -1313,6 +1303,7 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) { fUnhandledBreakEngine = new UnhandledEngine(status); if (U_SUCCESS(status) && fUnhandledBreakEngine == NULL) { status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; } // Put it last so that scripts for which we have an engine get tried // first. @@ -1327,25 +1318,19 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) { // Tell the reject engine about the character; at its discretion, it may // add more than just the one character. - fUnhandledBreakEngine->handleCharacter(c, fBreakType); + fUnhandledBreakEngine->handleCharacter(c); return fUnhandledBreakEngine; } - - -/*int32_t RuleBasedBreakIterator::getBreakType() const { - return fBreakType; -}*/ - -void RuleBasedBreakIterator::setBreakType(int32_t type) { - fBreakType = type; -} - void RuleBasedBreakIterator::dumpCache() { fBreakCache->dumpCache(); } +void RuleBasedBreakIterator::dumpTables() { + fData->printData(); +} + /** * Returns the description used to create this iterator */ diff --git a/deps/icu-small/source/common/rbbi_cache.cpp b/deps/icu-small/source/common/rbbi_cache.cpp index 9d716bb34274c4..ba9329d4771321 100644 --- a/deps/icu-small/source/common/rbbi_cache.cpp +++ b/deps/icu-small/source/common/rbbi_cache.cpp @@ -26,14 +26,11 @@ U_NAMESPACE_BEGIN */ RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) : - fBI(bi), fBreaks(NULL), fPositionInCache(-1), + fBI(bi), fBreaks(status), fPositionInCache(-1), fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) { - fBreaks = new UVector32(status); } RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() { - delete fBreaks; - fBreaks = NULL; } void RuleBasedBreakIterator::DictionaryCache::reset() { @@ -42,7 +39,7 @@ void RuleBasedBreakIterator::DictionaryCache::reset() { fLimit = 0; fFirstRuleStatusIndex = 0; fOtherRuleStatusIndex = 0; - fBreaks->removeAllElements(); + fBreaks.removeAllElements(); } UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) { @@ -54,13 +51,13 @@ UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_ // Sequential iteration, move from previous boundary to the following int32_t r = 0; - if (fPositionInCache >= 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) { + if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) { ++fPositionInCache; - if (fPositionInCache >= fBreaks->size()) { + if (fPositionInCache >= fBreaks.size()) { fPositionInCache = -1; return FALSE; } - r = fBreaks->elementAti(fPositionInCache); + r = fBreaks.elementAti(fPositionInCache); U_ASSERT(r > fromPos); *result = r; *statusIndex = fOtherRuleStatusIndex; @@ -69,8 +66,8 @@ UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_ // Random indexing. Linear search for the boundary following the given position. - for (fPositionInCache = 0; fPositionInCache < fBreaks->size(); ++fPositionInCache) { - r= fBreaks->elementAti(fPositionInCache); + for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) { + r= fBreaks.elementAti(fPositionInCache); if (r > fromPos) { *result = r; *statusIndex = fOtherRuleStatusIndex; @@ -90,16 +87,16 @@ UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_ } if (fromPos == fLimit) { - fPositionInCache = fBreaks->size() - 1; + fPositionInCache = fBreaks.size() - 1; if (fPositionInCache >= 0) { - U_ASSERT(fBreaks->elementAti(fPositionInCache) == fromPos); + U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos); } } int32_t r; - if (fPositionInCache > 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) { + if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) { --fPositionInCache; - r = fBreaks->elementAti(fPositionInCache); + r = fBreaks.elementAti(fPositionInCache); U_ASSERT(r < fromPos); *result = r; *statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex; @@ -111,8 +108,8 @@ UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_ return FALSE; } - for (fPositionInCache = fBreaks->size()-1; fPositionInCache >= 0; --fPositionInCache) { - r = fBreaks->elementAti(fPositionInCache); + for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) { + r = fBreaks.elementAti(fPositionInCache); if (r < fromPos) { *result = r; *statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex; @@ -141,7 +138,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo int32_t current; UErrorCode status = U_ZERO_ERROR; int32_t foundBreakCount = 0; - UText *text = fBI->fText; + UText *text = &fBI->fText; // Loop through the text, looking for ranges of dictionary characters. // For each span, find the appropriate break engine, and ask it to find @@ -168,7 +165,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo // Ask the language object if there are any breaks. It will add them to the cache and // leave the text pointer on the other side of its range, ready to search for the next one. if (lbe != NULL) { - foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBI->fBreakType, *fBreaks); + foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks); } // Reload the loop variables for the next go-round @@ -182,21 +179,21 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo // printf("foundBreakCount = %d\n", foundBreakCount); if (foundBreakCount > 0) { - U_ASSERT(foundBreakCount == fBreaks->size()); - if (startPos < fBreaks->elementAti(0)) { + U_ASSERT(foundBreakCount == fBreaks.size()); + if (startPos < fBreaks.elementAti(0)) { // The dictionary did not place a boundary at the start of the segment of text. // Add one now. This should not commonly happen, but it would be easy for interactions // of the rules for dictionary segments and the break engine implementations to // inadvertently cause it. Cover it here, just in case. - fBreaks->insertElementAt(startPos, 0, status); + fBreaks.insertElementAt(startPos, 0, status); } - if (endPos > fBreaks->peeki()) { - fBreaks->push(endPos, status); + if (endPos > fBreaks.peeki()) { + fBreaks.push(endPos, status); } fPositionInCache = 0; // Note: Dictionary matching may extend beyond the original limit. - fStart = fBreaks->elementAti(0); - fLimit = fBreaks->peeki(); + fStart = fBreaks.elementAti(0); + fLimit = fBreaks.peeki(); } else { // there were no language-based breaks, even though the segment contained // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache diff --git a/deps/icu-small/source/common/rbbi_cache.h b/deps/icu-small/source/common/rbbi_cache.h index 8dc7320db90a9e..fd6deb4333a97e 100644 --- a/deps/icu-small/source/common/rbbi_cache.h +++ b/deps/icu-small/source/common/rbbi_cache.h @@ -56,7 +56,7 @@ class RuleBasedBreakIterator::DictionaryCache: public UMemory { RuleBasedBreakIterator *fBI; - UVector32 *fBreaks; // A vector containing the boundaries. + UVector32 fBreaks; // A vector containing the boundaries. int32_t fPositionInCache; // Index in fBreaks of last boundary returned by following() // or preceding(). Optimizes sequential access. int32_t fStart; // Text position of first boundary in cache. diff --git a/deps/icu-small/source/common/rbbidata.cpp b/deps/icu-small/source/common/rbbidata.cpp index d66eca82f80b39..18912a6a7b3dcf 100644 --- a/deps/icu-small/source/common/rbbidata.cpp +++ b/deps/icu-small/source/common/rbbidata.cpp @@ -267,8 +267,8 @@ void RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *tab #endif -#ifdef RBBI_DEBUG void RBBIDataWrapper::printData() { +#ifdef RBBI_DEBUG RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader); RBBIDebugPrintf(" Version = {%d %d %d %d}\n", fHeader->fFormatVersion[0], fHeader->fFormatVersion[1], fHeader->fFormatVersion[2], fHeader->fFormatVersion[3]); @@ -285,8 +285,8 @@ void RBBIDataWrapper::printData() { RBBIDebugPrintf("%c", fRuleSource[c]); } RBBIDebugPrintf("\n\n"); -} #endif +} U_NAMESPACE_END diff --git a/deps/icu-small/source/common/rbbidata.h b/deps/icu-small/source/common/rbbidata.h index 75427863d9fa2a..8b21acca304d5f 100644 --- a/deps/icu-small/source/common/rbbidata.h +++ b/deps/icu-small/source/common/rbbidata.h @@ -116,9 +116,10 @@ struct RBBIStateTableRow { /* StatusTable of the set of matching */ /* tags (rule status values) */ int16_t fReserved; - uint16_t fNextState[2]; /* Next State, indexed by char category. */ - /* This array does not have two elements */ - /* Array Size is actually fData->fHeader->fCatCount */ + uint16_t fNextState[1]; /* Next State, indexed by char category. */ + /* Variable-length array declared with length 1 */ + /* to disable bounds checkers. */ + /* Array Size is actually fData->fHeader->fCatCount*/ /* CAUTION: see RBBITableBuilder::getTableSize() */ /* before changing anything here. */ }; @@ -129,7 +130,9 @@ struct RBBIStateTable { uint32_t fRowLen; /* Length of a state table row, in bytes. */ uint32_t fFlags; /* Option Flags for this state table */ uint32_t fReserved; /* reserved */ - char fTableData[4]; /* First RBBIStateTableRow begins here. */ + char fTableData[1]; /* First RBBIStateTableRow begins here. */ + /* Variable-length array declared with length 1 */ + /* to disable bounds checkers. */ /* (making it char[] simplifies ugly address */ /* arithmetic for indexing variable length rows.) */ }; @@ -162,13 +165,8 @@ class RBBIDataWrapper : public UMemory { UBool operator ==(const RBBIDataWrapper &other) const; int32_t hashCode(); const UnicodeString &getRuleSourceString() const; -#ifdef RBBI_DEBUG void printData(); void printTable(const char *heading, const RBBIStateTable *table); -#else - #define printData() - #define printTable(heading, table) -#endif /* */ /* Pointers to items within the data */ diff --git a/deps/icu-small/source/common/rbbirb.cpp b/deps/icu-small/source/common/rbbirb.cpp index c67f6f8166c0a0..9fc8f8e814200a 100644 --- a/deps/icu-small/source/common/rbbirb.cpp +++ b/deps/icu-small/source/common/rbbirb.cpp @@ -47,7 +47,7 @@ U_NAMESPACE_BEGIN RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules, UParseError *parseErr, UErrorCode &status) - : fRules(rules) + : fRules(rules), fStrippedRules(rules) { fStatus = &status; // status is checked below fParseError = parseErr; @@ -147,8 +147,9 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { return NULL; } - // Remove comments and whitespace from the rules to make it smaller. - UnicodeString strippedRules((const UnicodeString&)RBBIRuleScanner::stripRules(fRules)); + // Remove whitespace from the rules to make it smaller. + // The rule parser has already removed comments. + fStrippedRules = fScanner->stripRules(fStrippedRules); // Calculate the size of each section in the data. // Sizes here are padded up to a multiple of 8 for better memory alignment. @@ -162,7 +163,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { int32_t safeRevTableSize = align8(fSafeRevTables->getTableSize()); int32_t trieSize = align8(fSetBuilder->getTrieSize()); int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t)); - int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar)); + int32_t rulesSize = align8((fStrippedRules.length()+1) * sizeof(UChar)); (void)safeFwdTableSize; @@ -225,7 +226,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { data->fStatusTable = data->fTrie + trieSize; data->fStatusTableLen= statusTableSize; data->fRuleSource = data->fStatusTable + statusTableSize; - data->fRuleSourceLen = strippedRules.length() * sizeof(UChar); + data->fRuleSourceLen = fStrippedRules.length() * sizeof(UChar); uprv_memset(data->fReserved, 0, sizeof(data->fReserved)); @@ -245,7 +246,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { ruleStatusTable[i] = fRuleStatusVals->elementAti(i); } - strippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus); + fStrippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus); return data; } @@ -281,10 +282,10 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules, // // UnicodeSet processing. // Munge the Unicode Sets to create a set of character categories. - // Generate the mapping tables (TRIE) from input 32-bit characters to + // Generate the mapping tables (TRIE) from input code points to // the character categories. // - builder.fSetBuilder->build(); + builder.fSetBuilder->buildRanges(); // @@ -316,6 +317,11 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules, } #endif + builder.optimizeTables(); + builder.fSetBuilder->buildTrie(); + + + // // Package up the compiled data into a memory image // in the run-time format. @@ -347,6 +353,29 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules, return This; } +void RBBIRuleBuilder::optimizeTables() { + int32_t leftClass; + int32_t rightClass; + + leftClass = 3; + rightClass = 0; + while (fForwardTables->findDuplCharClassFrom(leftClass, rightClass)) { + fSetBuilder->mergeCategories(leftClass, rightClass); + fForwardTables->removeColumn(rightClass); + fReverseTables->removeColumn(rightClass); + fSafeFwdTables->removeColumn(rightClass); + fSafeRevTables->removeColumn(rightClass); + } + + fForwardTables->removeDuplicateStates(); + fReverseTables->removeDuplicateStates(); + fSafeFwdTables->removeDuplicateStates(); + fSafeRevTables->removeDuplicateStates(); + + + +} + U_NAMESPACE_END #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/deps/icu-small/source/common/rbbirb.h b/deps/icu-small/source/common/rbbirb.h index 6fbdbff7449a9f..511f394b458e93 100644 --- a/deps/icu-small/source/common/rbbirb.h +++ b/deps/icu-small/source/common/rbbirb.h @@ -126,10 +126,19 @@ class RBBIRuleBuilder : public UMemory { ); virtual ~RBBIRuleBuilder(); + + /** + * Fold together redundant character classes (table columns) and + * redundant states (table rows). Done after initial table generation, + * before serializing the result. + */ + void optimizeTables(); + char *fDebugEnv; // controls debug trace output UErrorCode *fStatus; // Error reporting. Keeping status UParseError *fParseError; // here avoids passing it everywhere. const UnicodeString &fRules; // The rule string that we are compiling + UnicodeString fStrippedRules; // The rule string, with comments stripped. RBBIRuleScanner *fScanner; // The scanner. RBBINode *fForwardTree; // The parse trees, generated by the scanner, diff --git a/deps/icu-small/source/common/rbbiscan.cpp b/deps/icu-small/source/common/rbbiscan.cpp index 1653a0c7bc7fe2..e3472ed599e15e 100644 --- a/deps/icu-small/source/common/rbbiscan.cpp +++ b/deps/icu-small/source/common/rbbiscan.cpp @@ -822,27 +822,24 @@ static const UChar chRParen = 0x29; //------------------------------------------------------------------------------ // -// stripRules Return a rules string without unnecessary -// characters. +// stripRules Return a rules string without extra spaces. +// (Comments are removed separately, during rule parsing.) // //------------------------------------------------------------------------------ UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) { UnicodeString strippedRules; - int rulesLength = rules.length(); - for (int idx = 0; idx < rulesLength; ) { - UChar ch = rules[idx++]; - if (ch == chPound) { - while (idx < rulesLength - && ch != chCR && ch != chLF && ch != chNEL) - { - ch = rules[idx++]; - } - } - if (!u_isISOControl(ch)) { - strippedRules.append(ch); + int32_t rulesLength = rules.length(); + bool skippingSpaces = false; + + for (int32_t idx=0; idxfStrippedRules.setCharAt(i, u' '); + } } if (c.fChar == (UChar32)-1) { return; diff --git a/deps/icu-small/source/common/rbbisetb.cpp b/deps/icu-small/source/common/rbbisetb.cpp index c172da00df7964..4e7389b4af039b 100644 --- a/deps/icu-small/source/common/rbbisetb.cpp +++ b/deps/icu-small/source/common/rbbisetb.cpp @@ -91,7 +91,7 @@ RBBISetBuilder::~RBBISetBuilder() // from the Unicode Sets. // //------------------------------------------------------------------------ -void RBBISetBuilder::build() { +void RBBISetBuilder::buildRanges() { RBBINode *usetNode; RangeDescriptor *rlRange; @@ -245,11 +245,16 @@ void RBBISetBuilder::build() { if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();} if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();} +} + + +// +// Build the Trie table for mapping UChar32 values to the corresponding +// range group number. +// +void RBBISetBuilder::buildTrie() { + RangeDescriptor *rlRange; - // - // Build the Trie table for mapping UChar32 values to the corresponding - // range group number - // fTrie = utrie2_open(0, // Initial value for all code points. 0, // Error value for out-of-range input. fStatus); @@ -265,6 +270,22 @@ void RBBISetBuilder::build() { } +void RBBISetBuilder::mergeCategories(int32_t left, int32_t right) { + U_ASSERT(left >= 1); + U_ASSERT(right > left); + for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) { + int32_t rangeNum = rd->fNum & ~DICT_BIT; + int32_t rangeDict = rd->fNum & DICT_BIT; + if (rangeNum == right) { + rd->fNum = left | rangeDict; + } else if (rangeNum > right) { + rd->fNum--; + } + } + --fGroupCount; +} + + //----------------------------------------------------------------------------------- // // getTrieSize() Return the size that will be required to serialize the Trie. @@ -446,7 +467,7 @@ void RBBISetBuilder::printRangeGroups() { lastPrintedGroupNum = groupNum; RBBIDebugPrintf("%2i ", groupNum); - if (rlRange->fNum & 0x4000) { RBBIDebugPrintf(" ");} + if (rlRange->fNum & DICT_BIT) { RBBIDebugPrintf(" ");} for (i=0; ifIncludesSets->size(); i++) { RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i); @@ -639,20 +660,20 @@ void RangeDescriptor::split(UChar32 where, UErrorCode &status) { void RangeDescriptor::setDictionaryFlag() { int i; - for (i=0; ifIncludesSets->size(); i++) { - RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i); - UnicodeString setName; - RBBINode *setRef = usetNode->fParent; - if (setRef != NULL) { + static const char16_t *dictionary = u"dictionary"; + for (i=0; isize(); i++) { + RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i); + RBBINode *setRef = usetNode->fParent; + if (setRef != nullptr) { RBBINode *varRef = setRef->fParent; - if (varRef != NULL && varRef->fType == RBBINode::varRef) { - setName = varRef->fText; + if (varRef && varRef->fType == RBBINode::varRef) { + const UnicodeString *setName = &varRef->fText; + if (setName->compare(dictionary, -1) == 0) { + fNum |= RBBISetBuilder::DICT_BIT; + break; + } } } - if (setName.compare(UNICODE_STRING("dictionary", 10)) == 0) { // TODO: no string literals. - this->fNum |= 0x4000; - break; - } } } diff --git a/deps/icu-small/source/common/rbbisetb.h b/deps/icu-small/source/common/rbbisetb.h index 7cedb45b33550f..a7a91b3b375b75 100644 --- a/deps/icu-small/source/common/rbbisetb.h +++ b/deps/icu-small/source/common/rbbisetb.h @@ -82,7 +82,8 @@ class RBBISetBuilder : public UMemory { RBBISetBuilder(RBBIRuleBuilder *rb); ~RBBISetBuilder(); - void build(); + void buildRanges(); + void buildTrie(); void addValToSets(UVector *sets, uint32_t val); void addValToSet (RBBINode *usetNode, uint32_t val); int32_t getNumCharCategories() const; // CharCategories are the same as input symbol set to the @@ -93,6 +94,13 @@ class RBBISetBuilder : public UMemory { UChar32 getFirstChar(int32_t val) const; UBool sawBOF() const; // Indicate whether any references to the {bof} pseudo // character were encountered. + /** merge two character categories that have been identified as having equivalent behavior. + * The ranges belonging to the right category (table column) will be added to the left. + */ + void mergeCategories(int32_t left, int32_t right); + + static constexpr int32_t DICT_BIT = 0x4000; + #ifdef RBBI_DEBUG void printSets(); void printRanges(); diff --git a/deps/icu-small/source/common/rbbitblb.cpp b/deps/icu-small/source/common/rbbitblb.cpp index b3e6ca51d159a5..61661a544249e2 100644 --- a/deps/icu-small/source/common/rbbitblb.cpp +++ b/deps/icu-small/source/common/rbbitblb.cpp @@ -22,6 +22,7 @@ #include "rbbidata.h" #include "cstring.h" #include "uassert.h" +#include "uvectr32.h" #include "cmemory.h" U_NAMESPACE_BEGIN @@ -761,7 +762,7 @@ void RBBITableBuilder::flagAcceptingStates() { // if sd->fAccepting already had a value other than 0 or -1, leave it be. // If the end marker node is from a look-ahead rule, set - // the fLookAhead field or this state also. + // the fLookAhead field for this state also. if (endMarker->fLookAheadEnd) { // TODO: don't change value if already set? // TODO: allow for more than one active look-ahead rule in engine. @@ -1077,7 +1078,128 @@ void RBBITableBuilder::printPosSets(RBBINode *n) { } #endif +// +// findDuplCharClassFrom() +// +bool RBBITableBuilder::findDuplCharClassFrom(int32_t &baseCategory, int32_t &duplCategory) { + int32_t numStates = fDStates->size(); + int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); + + uint16_t table_base; + uint16_t table_dupl; + for (; baseCategory < numCols-1; ++baseCategory) { + for (duplCategory=baseCategory+1; duplCategory < numCols; ++duplCategory) { + for (int32_t state=0; stateelementAt(state); + table_base = (uint16_t)sd->fDtran->elementAti(baseCategory); + table_dupl = (uint16_t)sd->fDtran->elementAti(duplCategory); + if (table_base != table_dupl) { + break; + } + } + if (table_base == table_dupl) { + return true; + } + } + } + return false; +} + + +// +// removeColumn() +// +void RBBITableBuilder::removeColumn(int32_t column) { + int32_t numStates = fDStates->size(); + for (int32_t state=0; stateelementAt(state); + U_ASSERT(column < sd->fDtran->size()); + sd->fDtran->removeElementAt(column); + } +} + +/* + * findDuplicateState + */ +bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplState) { + int32_t numStates = fDStates->size(); + int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); + + for (; firstStateelementAt(firstState); + for (duplState=firstState+1; duplStateelementAt(duplState); + if (firstSD->fAccepting != duplSD->fAccepting || + firstSD->fLookAhead != duplSD->fLookAhead || + firstSD->fTagsIdx != duplSD->fTagsIdx) { + continue; + } + bool rowsMatch = true; + for (int32_t col=0; col < numCols; ++col) { + int32_t firstVal = firstSD->fDtran->elementAti(col); + int32_t duplVal = duplSD->fDtran->elementAti(col); + if (!((firstVal == duplVal) || + ((firstVal == firstState || firstVal == duplState) && + (duplVal == firstState || duplVal == duplState)))) { + rowsMatch = false; + break; + } + } + if (rowsMatch) { + return true; + } + } + } + return false; +} + +void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) { + U_ASSERT(keepState < duplState); + U_ASSERT(duplState < fDStates->size()); + RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState); + fDStates->removeElementAt(duplState); + delete duplSD; + + int32_t numStates = fDStates->size(); + int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); + for (int32_t state=0; stateelementAt(state); + for (int32_t col=0; colfDtran->elementAti(col); + int32_t newVal = existingVal; + if (existingVal == duplState) { + newVal = keepState; + } else if (existingVal > duplState) { + newVal = existingVal - 1; + } + sd->fDtran->setElementAt(newVal, col); + } + if (sd->fAccepting == duplState) { + sd->fAccepting = keepState; + } else if (sd->fAccepting > duplState) { + sd->fAccepting--; + } + if (sd->fLookAhead == duplState) { + sd->fLookAhead = keepState; + } else if (sd->fLookAhead > duplState) { + sd->fLookAhead--; + } + } +} + + +/* + * RemoveDuplicateStates + */ +void RBBITableBuilder::removeDuplicateStates() { + int32_t firstState = 3; + int32_t duplicateState = 0; + while (findDuplicateState(firstState, duplicateState)) { + // printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState); + removeState(firstState, duplicateState); + } +} //----------------------------------------------------------------------------- // @@ -1095,21 +1217,17 @@ int32_t RBBITableBuilder::getTableSize() const { return 0; } - size = sizeof(RBBIStateTable) - 4; // The header, with no rows to the table. + size = offsetof(RBBIStateTable, fTableData); // The header, with no rows to the table. numRows = fDStates->size(); numCols = fRB->fSetBuilder->getNumCharCategories(); - // Note The declaration of RBBIStateTableRow is for a table of two columns. - // Therefore we subtract two from numCols when determining - // how much storage to add to a row for the total columns. - rowSize = sizeof(RBBIStateTableRow) + sizeof(uint16_t)*(numCols-2); + rowSize = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t)*numCols; size += numRows * rowSize; return size; } - //----------------------------------------------------------------------------- // // exportTable() export the state transition table in the format required @@ -1126,14 +1244,14 @@ void RBBITableBuilder::exportTable(void *where) { return; } - if (fRB->fSetBuilder->getNumCharCategories() > 0x7fff || + int32_t catCount = fRB->fSetBuilder->getNumCharCategories(); + if (catCount > 0x7fff || fDStates->size() > 0x7fff) { *fStatus = U_BRK_INTERNAL_ERROR; return; } - table->fRowLen = sizeof(RBBIStateTableRow) + - sizeof(uint16_t) * (fRB->fSetBuilder->getNumCharCategories() - 2); + table->fRowLen = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t) * catCount; table->fNumStates = fDStates->size(); table->fFlags = 0; if (fRB->fLookAheadHardBreak) { @@ -1152,7 +1270,7 @@ void RBBITableBuilder::exportTable(void *where) { row->fAccepting = (int16_t)sd->fAccepting; row->fLookAhead = (int16_t)sd->fLookAhead; row->fTagIdx = (int16_t)sd->fTagsIdx; - for (col=0; colfSetBuilder->getNumCharCategories(); col++) { + for (col=0; colfNextState[col] = (uint16_t)sd->fDtran->elementAti(col); } } @@ -1259,7 +1377,7 @@ RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu fPositions = NULL; fDtran = NULL; - fDtran = new UVector(lastInputSymbol+1, *fStatus); + fDtran = new UVector32(lastInputSymbol+1, *fStatus); if (U_FAILURE(*fStatus)) { return; } @@ -1267,7 +1385,7 @@ RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu *fStatus = U_MEMORY_ALLOCATION_ERROR; return; } - fDtran->setSize(lastInputSymbol+1, *fStatus); // fDtran needs to be pre-sized. + fDtran->setSize(lastInputSymbol+1); // fDtran needs to be pre-sized. // It is indexed by input symbols, and will // hold the next state number for each // symbol. diff --git a/deps/icu-small/source/common/rbbitblb.h b/deps/icu-small/source/common/rbbitblb.h index 10415018785209..09b57b5cf0f4c3 100644 --- a/deps/icu-small/source/common/rbbitblb.h +++ b/deps/icu-small/source/common/rbbitblb.h @@ -24,6 +24,7 @@ U_NAMESPACE_BEGIN class RBBIRuleScanner; class RBBIRuleBuilder; +class UVector32; // // class RBBITableBuilder is part of the RBBI rule compiler. @@ -42,9 +43,24 @@ class RBBITableBuilder : public UMemory { void build(); int32_t getTableSize() const; // Return the runtime size in bytes of // the built state table - void exportTable(void *where); // fill in the runtime state table. - // Sufficient memory must exist at - // the specified location. + + /** Fill in the runtime state table. Sufficient memory must exist at the specified location. + */ + void exportTable(void *where); + + /** Find duplicate (redundant) character classes, beginning after the specifed + * pair, within this state table. This is an iterator-like function, used to + * identify char classes (state table columns) that can be eliminated. + */ + bool findDuplCharClassFrom(int &baseClass, int &duplClass); + + /** Remove a column from the state table. Used when two character categories + * have been found equivalent, and merged together, to eliminate the uneeded table column. + */ + void removeColumn(int32_t column); + + /** Check for, and remove dupicate states (table rows). */ + void removeDuplicateStates(); private: @@ -60,8 +76,29 @@ class RBBITableBuilder : public UMemory { void flagTaggedStates(); void mergeRuleStatusVals(); + /** + * Merge redundant state table columns, eliminating character classes with identical behavior. + * Done after the state tables are generated, just before converting to their run-time format. + */ + int32_t mergeColumns(); + void addRuleRootNodes(UVector *dest, RBBINode *node); + /** Find the next duplicate state. An iterator function. + * @param firstState (in/out) begin looking at this state, return the first of the + * pair of duplicates. + * @param duplicateState returns the duplicate state of fistState + * @return true if a duplicate pair of states was found. + */ + bool findDuplicateState(int32_t &firstState, int32_t &duplicateState); + + /** Remove a duplicate state/ + * @param keepState First of the duplicate pair. Keep it. + * @param duplState Duplicate state. Remove it. Redirect all references to the duplicate state + * to refer to keepState instead. + */ + void removeState(int32_t keepState, int32_t duplState); + // Set functions for UVector. // TODO: make a USet subclass of UVector @@ -112,7 +149,7 @@ class RBBIStateDescriptor : public UMemory { // with this state. Unordered (it's a set). // UVector contents are RBBINode * - UVector *fDtran; // Transitions out of this state. + UVector32 *fDtran; // Transitions out of this state. // indexed by input character // contents is int index of dest state // in RBBITableBuilder.fDStates diff --git a/deps/icu-small/source/common/sharedobject.cpp b/deps/icu-small/source/common/sharedobject.cpp index 37aa458e00f98f..6eeca8605f0002 100644 --- a/deps/icu-small/source/common/sharedobject.cpp +++ b/deps/icu-small/source/common/sharedobject.cpp @@ -8,7 +8,10 @@ * sharedobject.cpp */ #include "sharedobject.h" +#include "mutex.h" #include "uassert.h" +#include "umutex.h" +#include "unifiedcache.h" U_NAMESPACE_BEGIN @@ -17,69 +20,41 @@ SharedObject::~SharedObject() {} UnifiedCacheBase::~UnifiedCacheBase() {} void -SharedObject::addRef(UBool fromWithinCache) const { - umtx_atomic_inc(&totalRefCount); - - // Although items in use may not be correct immediately, it - // will be correct eventually. - if (umtx_atomic_inc(&hardRefCount) == 1 && cachePtr != NULL) { - // If this object is cached, and the hardRefCount goes from 0 to 1, - // then the increment must happen from within the cache while the - // cache global mutex is locked. In this way, we can be rest assured - // that data races can't happen if the cache performs some task if - // the hardRefCount is zero while the global cache mutex is locked. - (void)fromWithinCache; // Suppress unused variable warning in non-debug builds. - U_ASSERT(fromWithinCache); - cachePtr->incrementItemsInUse(); - } +SharedObject::addRef() const { + umtx_atomic_inc(&hardRefCount); } +// removeRef Decrement the reference count and delete if it is zero. +// Note that SharedObjects with a non-null cachePtr are owned by the +// unified cache, and the cache will be responsible for the actual deletion. +// The deletion could be as soon as immediately following the +// update to the reference count, if another thread is running +// a cache eviction cycle concurrently. +// NO ACCESS TO *this PERMITTED AFTER REFERENCE COUNT == 0 for cached objects. +// THE OBJECT MAY ALREADY BE GONE. void -SharedObject::removeRef(UBool fromWithinCache) const { - UBool decrementItemsInUse = (umtx_atomic_dec(&hardRefCount) == 0); - UBool allReferencesGone = (umtx_atomic_dec(&totalRefCount) == 0); - - // Although items in use may not be correct immediately, it - // will be correct eventually. - if (decrementItemsInUse && cachePtr != NULL) { - if (fromWithinCache) { - cachePtr->decrementItemsInUse(); +SharedObject::removeRef() const { + const UnifiedCacheBase *cache = this->cachePtr; + int32_t updatedRefCount = umtx_atomic_dec(&hardRefCount); + U_ASSERT(updatedRefCount >= 0); + if (updatedRefCount == 0) { + if (cache) { + cache->handleUnreferencedObject(); } else { - cachePtr->decrementItemsInUseWithLockingAndEviction(); + delete this; } } - if (allReferencesGone) { - delete this; - } } -void -SharedObject::addSoftRef() const { - umtx_atomic_inc(&totalRefCount); - ++softRefCount; -} - -void -SharedObject::removeSoftRef() const { - --softRefCount; - if (umtx_atomic_dec(&totalRefCount) == 0) { - delete this; - } -} int32_t SharedObject::getRefCount() const { - return umtx_loadAcquire(totalRefCount); -} - -int32_t -SharedObject::getHardRefCount() const { return umtx_loadAcquire(hardRefCount); } void SharedObject::deleteIfZeroRefCount() const { - if(getRefCount() == 0) { + if (this->cachePtr == nullptr && getRefCount() == 0) { delete this; } } diff --git a/deps/icu-small/source/common/sharedobject.h b/deps/icu-small/source/common/sharedobject.h index 783b55948a8248..54655d0d716720 100644 --- a/deps/icu-small/source/common/sharedobject.h +++ b/deps/icu-small/source/common/sharedobject.h @@ -17,6 +17,8 @@ U_NAMESPACE_BEGIN +class SharedObject; + /** * Base class for unified cache exposing enough methods to SharedObject * instances to allow their addRef() and removeRef() methods to @@ -28,22 +30,12 @@ class U_COMMON_API UnifiedCacheBase : public UObject { UnifiedCacheBase() { } /** - * Called by addRefWhileHoldingCacheLock() when the hard reference count - * of its instance goes from 0 to 1. + * Notify the cache implementation that an object was seen transitioning to + * zero hard references. The cache may use this to keep track the number of + * unreferenced SharedObjects, and to trigger evictions. */ - virtual void incrementItemsInUse() const = 0; + virtual void handleUnreferencedObject() const = 0; - /** - * Called by removeRef() when the hard reference count of its instance - * drops from 1 to 0. - */ - virtual void decrementItemsInUseWithLockingAndEviction() const = 0; - - /** - * Called by removeRefWhileHoldingCacheLock() when the hard reference - * count of its instance drops from 1 to 0. - */ - virtual void decrementItemsInUse() const = 0; virtual ~UnifiedCacheBase(); private: UnifiedCacheBase(const UnifiedCacheBase &); @@ -63,7 +55,6 @@ class U_COMMON_API SharedObject : public UObject { public: /** Initializes totalRefCount, softRefCount to 0. */ SharedObject() : - totalRefCount(0), softRefCount(0), hardRefCount(0), cachePtr(NULL) {} @@ -71,7 +62,6 @@ class U_COMMON_API SharedObject : public UObject { /** Initializes totalRefCount, softRefCount to 0. */ SharedObject(const SharedObject &other) : UObject(other), - totalRefCount(0), softRefCount(0), hardRefCount(0), cachePtr(NULL) {} @@ -79,93 +69,45 @@ class U_COMMON_API SharedObject : public UObject { virtual ~SharedObject(); /** - * Increments the number of references to this object. Thread-safe. + * Increments the number of hard references to this object. Thread-safe. + * Not for use from within the Unified Cache implementation. */ - void addRef() const { addRef(FALSE); } + void addRef() const; /** - * Increments the number of references to this object. - * Must be called only from within the internals of UnifiedCache and - * only while the cache global mutex is held. - */ - void addRefWhileHoldingCacheLock() const { addRef(TRUE); } - - /** - * Increments the number of soft references to this object. - * Must be called only from within the internals of UnifiedCache and - * only while the cache global mutex is held. - */ - void addSoftRef() const; - - /** - * Decrements the number of references to this object. Thread-safe. - */ - void removeRef() const { removeRef(FALSE); } - - /** - * Decrements the number of references to this object. - * Must be called only from within the internals of UnifiedCache and - * only while the cache global mutex is held. - */ - void removeRefWhileHoldingCacheLock() const { removeRef(TRUE); } - - /** - * Decrements the number of soft references to this object. - * Must be called only from within the internals of UnifiedCache and - * only while the cache global mutex is held. + * Decrements the number of hard references to this object, and + * arrange for possible cache-eviction and/or deletion if ref + * count goes to zero. Thread-safe. + * + * Not for use from within the UnifiedCache implementation. */ - void removeSoftRef() const; + void removeRef() const; /** - * Returns the reference counter including soft references. + * Returns the number of hard references for this object. * Uses a memory barrier. */ int32_t getRefCount() const; - /** - * Returns the count of soft references only. - * Must be called only from within the internals of UnifiedCache and - * only while the cache global mutex is held. - */ - int32_t getSoftRefCount() const { return softRefCount; } - - /** - * Returns the count of hard references only. Uses a memory barrier. - * Used for testing the cache. Regular clients won't need this. - */ - int32_t getHardRefCount() const; - /** * If noHardReferences() == TRUE then this object has no hard references. * Must be called only from within the internals of UnifiedCache. */ - inline UBool noHardReferences() const { return getHardRefCount() == 0; } + inline UBool noHardReferences() const { return getRefCount() == 0; } /** * If hasHardReferences() == TRUE then this object has hard references. * Must be called only from within the internals of UnifiedCache. */ - inline UBool hasHardReferences() const { return getHardRefCount() != 0; } - - /** - * If noSoftReferences() == TRUE then this object has no soft references. - * Must be called only from within the internals of UnifiedCache and - * only while the cache global mutex is held. - */ - UBool noSoftReferences() const { return (softRefCount == 0); } + inline UBool hasHardReferences() const { return getRefCount() != 0; } /** - * Deletes this object if it has no references or soft references. + * Deletes this object if it has no references. + * Available for non-cached SharedObjects only. Ownership of cached objects + * is with the UnifiedCache, which is soley responsible for eviction and deletion. */ void deleteIfZeroRefCount() const; - /** - * @internal For UnifedCache use only to register this object with itself. - * Must be called before this object is exposed to multiple threads. - */ - void registerWithCache(const UnifiedCacheBase *ptr) const { - cachePtr = ptr; - } /** * Returns a writable version of ptr. @@ -219,15 +161,21 @@ class U_COMMON_API SharedObject : public UObject { } private: - mutable u_atomic_int32_t totalRefCount; - - // Any thread modifying softRefCount must hold the global cache mutex + /** + * The number of references from the UnifiedCache, which is + * the number of times that the sharedObject is stored as a hash table value. + * For use by UnifiedCache implementation code only. + * All access is synchronized by UnifiedCache's gCacheMutex + */ mutable int32_t softRefCount; + friend class UnifiedCache; + /** + * Reference count, excluding references from within the UnifiedCache implementation. + */ mutable u_atomic_int32_t hardRefCount; + mutable const UnifiedCacheBase *cachePtr; - void addRef(UBool withCacheLock) const; - void removeRef(UBool withCacheLock) const; }; diff --git a/deps/icu-small/source/common/sprpimpl.h b/deps/icu-small/source/common/sprpimpl.h index aff40ad0dab671..26de904b1fde5a 100644 --- a/deps/icu-small/source/common/sprpimpl.h +++ b/deps/icu-small/source/common/sprpimpl.h @@ -90,7 +90,6 @@ struct UStringPrepProfile{ UTrie sprepTrie; const uint16_t* mappingData; UDataMemory* sprepData; - const UBiDiProps *bdp; /* used only if checkBiDi is set */ int32_t refCount; UBool isDataLoaded; UBool doNFKC; diff --git a/deps/icu-small/source/common/ubidi.cpp b/deps/icu-small/source/common/ubidi.cpp index 8e2fc36e5f1753..531ed64cff6ec8 100644 --- a/deps/icu-small/source/common/ubidi.cpp +++ b/deps/icu-small/source/common/ubidi.cpp @@ -152,9 +152,6 @@ ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */ uprv_memset(pBiDi, 0, sizeof(UBiDi)); - /* get BiDi properties */ - pBiDi->bdp=ubidi_getSingleton(); - /* allocate memory for arrays as requested */ if(maxLength>0) { if( !getInitialDirPropsMemory(pBiDi, maxLength) || @@ -925,7 +922,7 @@ bracketProcessChar(BracketData *bd, int32_t position) { else match=0; if(match!=c && /* has a matching char */ - ubidi_getPairedBracketType(bd->pBiDi->bdp, c)==U_BPT_OPEN) { /* opening bracket */ + ubidi_getPairedBracketType(c)==U_BPT_OPEN) { /* opening bracket */ /* special case: process synonyms create an opening entry for each synonym */ if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */ @@ -3033,7 +3030,7 @@ ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c) if( pBiDi->fnClassCallback == NULL || (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT ) { - dir = ubidi_getClass(pBiDi->bdp, c); + dir = ubidi_getClass(c); } if(dir >= U_CHAR_DIRECTION_COUNT) { dir = (UCharDirection)ON; diff --git a/deps/icu-small/source/common/ubidi_props.cpp b/deps/icu-small/source/common/ubidi_props.cpp index dcfb52c897d360..4141c21938a740 100644 --- a/deps/icu-small/source/common/ubidi_props.cpp +++ b/deps/icu-small/source/common/ubidi_props.cpp @@ -44,13 +44,6 @@ struct UBiDiProps { #define INCLUDED_FROM_UBIDI_PROPS_C #include "ubidi_props_data.h" -/* UBiDiProps singleton ----------------------------------------------------- */ - -U_CFUNC const UBiDiProps * -ubidi_getSingleton() { - return &ubidi_props_singleton; -} - /* set of property starts for UnicodeSet ------------------------------------ */ static UBool U_CALLCONV @@ -64,7 +57,7 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32 } U_CFUNC void -ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode) { +ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { int32_t i, length; UChar32 c, start, limit; @@ -76,19 +69,19 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode * } /* add the start code point of each same-value range of the trie */ - utrie2_enum(&bdp->trie, NULL, _enumPropertyStartsRange, sa); + utrie2_enum(&ubidi_props_singleton.trie, NULL, _enumPropertyStartsRange, sa); /* add the code points from the bidi mirroring table */ - length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH]; + length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH]; for(i=0; imirrors[i]); + c=UBIDI_GET_MIRROR_CODE_POINT(ubidi_props_singleton.mirrors[i]); sa->addRange(sa->set, c, c+1); } /* add the code points from the Joining_Group array where the value changes */ - start=bdp->indexes[UBIDI_IX_JG_START]; - limit=bdp->indexes[UBIDI_IX_JG_LIMIT]; - jgArray=bdp->jgArray; + start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START]; + limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]; + jgArray=ubidi_props_singleton.jgArray; for(;;) { prev=0; while(startadd(sa->set, limit); } - if(limit==bdp->indexes[UBIDI_IX_JG_LIMIT]) { + if(limit==ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]) { /* switch to the second Joining_Group range */ - start=bdp->indexes[UBIDI_IX_JG_START2]; - limit=bdp->indexes[UBIDI_IX_JG_LIMIT2]; - jgArray=bdp->jgArray2; + start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2]; + limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2]; + jgArray=ubidi_props_singleton.jgArray2; } else { break; } @@ -121,14 +114,8 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode * /* property access functions ------------------------------------------------ */ U_CFUNC int32_t -ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) { - int32_t max; - - if(bdp==NULL) { - return -1; - } - - max=bdp->indexes[UBIDI_MAX_VALUES_INDEX]; +ubidi_getMaxValue(UProperty which) { + int32_t max=ubidi_props_singleton.indexes[UBIDI_MAX_VALUES_INDEX]; switch(which) { case UCHAR_BIDI_CLASS: return (max&UBIDI_CLASS_MASK); @@ -144,19 +131,19 @@ ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) { } U_CAPI UCharDirection -ubidi_getClass(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_getClass(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); return (UCharDirection)UBIDI_GET_CLASS(props); } U_CFUNC UBool -ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_isMirrored(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); return (UBool)UBIDI_GET_FLAG(props, UBIDI_IS_MIRRORED_SHIFT); } static UChar32 -getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) { +getMirror(UChar32 c, uint16_t props) { int32_t delta=UBIDI_GET_MIRROR_DELTA(props); if(delta!=UBIDI_ESC_MIRROR_DELTA) { return c+delta; @@ -167,8 +154,8 @@ getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) { int32_t i, length; UChar32 c2; - mirrors=bdp->mirrors; - length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH]; + mirrors=ubidi_props_singleton.mirrors; + length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH]; /* linear search */ for(i=0; itrie, c); - return getMirror(bdp, c, props); +ubidi_getMirror(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); + return getMirror(c, props); } U_CFUNC UBool -ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_isBidiControl(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); return (UBool)UBIDI_GET_FLAG(props, UBIDI_BIDI_CONTROL_SHIFT); } U_CFUNC UBool -ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_isJoinControl(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); return (UBool)UBIDI_GET_FLAG(props, UBIDI_JOIN_CONTROL_SHIFT); } U_CFUNC UJoiningType -ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_getJoiningType(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); return (UJoiningType)((props&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT); } U_CFUNC UJoiningGroup -ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c) { +ubidi_getJoiningGroup(UChar32 c) { UChar32 start, limit; - start=bdp->indexes[UBIDI_IX_JG_START]; - limit=bdp->indexes[UBIDI_IX_JG_LIMIT]; + start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START]; + limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]; if(start<=c && cjgArray[c-start]; + return (UJoiningGroup)ubidi_props_singleton.jgArray[c-start]; } - start=bdp->indexes[UBIDI_IX_JG_START2]; - limit=bdp->indexes[UBIDI_IX_JG_LIMIT2]; + start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2]; + limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2]; if(start<=c && cjgArray2[c-start]; + return (UJoiningGroup)ubidi_props_singleton.jgArray2[c-start]; } return U_JG_NO_JOINING_GROUP; } U_CFUNC UBidiPairedBracketType -ubidi_getPairedBracketType(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_getPairedBracketType(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); return (UBidiPairedBracketType)((props&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT); } U_CFUNC UChar32 -ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_getPairedBracket(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); if((props&UBIDI_BPT_MASK)==0) { return c; } else { - return getMirror(bdp, c, props); + return getMirror(c, props); } } @@ -248,20 +235,20 @@ ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c) { U_CFUNC UCharDirection u_charDirection(UChar32 c) { - return ubidi_getClass(&ubidi_props_singleton, c); + return ubidi_getClass(c); } U_CFUNC UBool u_isMirrored(UChar32 c) { - return ubidi_isMirrored(&ubidi_props_singleton, c); + return ubidi_isMirrored(c); } U_CFUNC UChar32 u_charMirror(UChar32 c) { - return ubidi_getMirror(&ubidi_props_singleton, c); + return ubidi_getMirror(c); } U_STABLE UChar32 U_EXPORT2 u_getBidiPairedBracket(UChar32 c) { - return ubidi_getPairedBracket(&ubidi_props_singleton, c); + return ubidi_getPairedBracket(c); } diff --git a/deps/icu-small/source/common/ubidi_props.h b/deps/icu-small/source/common/ubidi_props.h index 69e8853e69b1ca..698ee9c52bd6cb 100644 --- a/deps/icu-small/source/common/ubidi_props.h +++ b/deps/icu-small/source/common/ubidi_props.h @@ -31,46 +31,40 @@ U_CDECL_BEGIN /* library API -------------------------------------------------------------- */ -struct UBiDiProps; -typedef struct UBiDiProps UBiDiProps; - -U_CFUNC const UBiDiProps * -ubidi_getSingleton(void); - U_CFUNC void -ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode); +ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode); /* property access functions */ U_CFUNC int32_t -ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which); +ubidi_getMaxValue(UProperty which); U_CAPI UCharDirection -ubidi_getClass(const UBiDiProps *bdp, UChar32 c); +ubidi_getClass(UChar32 c); U_CFUNC UBool -ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c); +ubidi_isMirrored(UChar32 c); U_CFUNC UChar32 -ubidi_getMirror(const UBiDiProps *bdp, UChar32 c); +ubidi_getMirror(UChar32 c); U_CFUNC UBool -ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c); +ubidi_isBidiControl(UChar32 c); U_CFUNC UBool -ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c); +ubidi_isJoinControl(UChar32 c); U_CFUNC UJoiningType -ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c); +ubidi_getJoiningType(UChar32 c); U_CFUNC UJoiningGroup -ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c); +ubidi_getJoiningGroup(UChar32 c); U_CFUNC UBidiPairedBracketType -ubidi_getPairedBracketType(const UBiDiProps *bdp, UChar32 c); +ubidi_getPairedBracketType(UChar32 c); U_CFUNC UChar32 -ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c); +ubidi_getPairedBracket(UChar32 c); /* file definitions --------------------------------------------------------- */ diff --git a/deps/icu-small/source/common/ubidiimp.h b/deps/icu-small/source/common/ubidiimp.h index fd64fac34dea4b..a5d0727495d767 100644 --- a/deps/icu-small/source/common/ubidiimp.h +++ b/deps/icu-small/source/common/ubidiimp.h @@ -254,8 +254,6 @@ struct UBiDi { */ const UBiDi * pParaBiDi; - const UBiDiProps *bdp; - /* alias pointer to the current text */ const UChar *text; diff --git a/deps/icu-small/source/common/ucase.cpp b/deps/icu-small/source/common/ucase.cpp index 1f41dbf6de3edb..28d5a4cac62ba6 100644 --- a/deps/icu-small/source/common/ucase.cpp +++ b/deps/icu-small/source/common/ucase.cpp @@ -77,9 +77,12 @@ ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { /* data access primitives --------------------------------------------------- */ -#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT)) +U_CFUNC const UTrie2 * U_EXPORT2 +ucase_getTrie() { + return &ucase_props_singleton.trie; +} -#define PROPS_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION) +#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT)) /* number of bits in an 8-bit integer value */ static const uint8_t flagsOffset[256]={ @@ -128,8 +131,8 @@ static const uint8_t flagsOffset[256]={ U_CAPI UChar32 U_EXPORT2 ucase_tolower(UChar32 c) { uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { - if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_IS_UPPER_OR_TITLE(props)) { c+=UCASE_GET_DELTA(props); } } else { @@ -145,7 +148,7 @@ ucase_tolower(UChar32 c) { U_CAPI UChar32 U_EXPORT2 ucase_toupper(UChar32 c) { uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { + if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) { c+=UCASE_GET_DELTA(props); } @@ -162,7 +165,7 @@ ucase_toupper(UChar32 c) { U_CAPI UChar32 U_EXPORT2 ucase_totitle(UChar32 c) { uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { + if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) { c+=UCASE_GET_DELTA(props); } @@ -223,7 +226,7 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) { } props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { + if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)!=UCASE_NONE) { /* add the one simple case mapping, no matter what type it is */ int32_t delta=UCASE_GET_DELTA(props); @@ -419,6 +422,138 @@ FullCaseFoldingIterator::next(UnicodeString &full) { return c; } +namespace LatinCase { + +const int8_t TO_LOWER_NORMAL[LIMIT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, + + 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC +}; + +const int8_t TO_LOWER_TR_LT[LIMIT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, EXC, 0, 1, 0, 1, 0, EXC, 0, + EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, + + 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC +}; + +const int8_t TO_UPPER_NORMAL[LIMIT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, + -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC, + -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, + -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121, + + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, + + -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC +}; + +const int8_t TO_UPPER_TR[LIMIT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, -32, -32, -32, -32, -32, -32, -32, -32, EXC, -32, -32, -32, -32, -32, -32, + -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC, + -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, + -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121, + + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, + + -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC +}; + +} // namespace LatinCase + U_NAMESPACE_END /** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */ @@ -439,7 +574,7 @@ ucase_getTypeOrIgnorable(UChar32 c) { static inline int32_t getDotType(UChar32 c) { uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { + if(!UCASE_HAS_EXCEPTION(props)) { return props&UCASE_DOT_MASK; } else { const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); @@ -878,8 +1013,8 @@ ucase_toFullLower(UChar32 c, U_ASSERT(c >= 0); UChar32 result=c; uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { - if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_IS_UPPER_OR_TITLE(props)) { result=c+UCASE_GET_DELTA(props); } } else { @@ -1024,7 +1159,7 @@ toUpperOrTitle(UChar32 c, U_ASSERT(c >= 0); UChar32 result=c; uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { + if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) { result=c+UCASE_GET_DELTA(props); } @@ -1169,8 +1304,8 @@ ucase_toFullTitle(UChar32 c, U_CAPI UChar32 U_EXPORT2 ucase_fold(UChar32 c, uint32_t options) { uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { - if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_IS_UPPER_OR_TITLE(props)) { c+=UCASE_GET_DELTA(props); } } else { @@ -1234,8 +1369,8 @@ ucase_toFullFolding(UChar32 c, U_ASSERT(c >= 0); UChar32 result=c; uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { - if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_IS_UPPER_OR_TITLE(props)) { result=c+UCASE_GET_DELTA(props); } } else { diff --git a/deps/icu-small/source/common/ucase.h b/deps/icu-small/source/common/ucase.h index 9d6365eadfcca2..a7a8c9f00d1e93 100644 --- a/deps/icu-small/source/common/ucase.h +++ b/deps/icu-small/source/common/ucase.h @@ -26,6 +26,7 @@ #include "putilimp.h" #include "uset_imp.h" #include "udataswp.h" +#include "utrie2.h" #ifdef __cplusplus U_NAMESPACE_BEGIN @@ -148,6 +149,33 @@ class U_COMMON_API FullCaseFoldingIterator { int32_t rowCpIndex; }; +/** + * Fast case mapping data for ASCII/Latin. + * Linear arrays of delta bytes: 0=no mapping; EXC=exception. + * Deltas must not cross the ASCII boundary, or else they cannot be easily used + * in simple UTF-8 code. + */ +namespace LatinCase { + +/** Case mapping/folding data for code points up to U+017F. */ +constexpr UChar LIMIT = 0x180; +/** U+017F case-folds and uppercases crossing the ASCII boundary. */ +constexpr UChar LONG_S = 0x17f; +/** Exception: Complex mapping, or too-large delta. */ +constexpr int8_t EXC = -0x80; + +/** Deltas for lowercasing for most locales, and default case folding. */ +extern const int8_t TO_LOWER_NORMAL[LIMIT]; +/** Deltas for lowercasing for tr/az/lt, and Turkic case folding. */ +extern const int8_t TO_LOWER_TR_LT[LIMIT]; + +/** Deltas for uppercasing for most locales. */ +extern const int8_t TO_UPPER_NORMAL[LIMIT]; +/** Deltas for uppercasing for tr/az. */ +extern const int8_t TO_UPPER_TR[LIMIT]; + +} // namespace LatinCase + U_NAMESPACE_END #endif @@ -308,6 +336,9 @@ enum { /* definitions for 16-bit case properties word ------------------------------ */ +U_CFUNC const UTrie2 * U_EXPORT2 +ucase_getTrie(); + /* 2-bit constants for types of cased characters */ #define UCASE_TYPE_MASK 3 enum { @@ -320,10 +351,14 @@ enum { #define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK) #define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7) +#define UCASE_IS_UPPER_OR_TITLE(props) ((props)&2) + #define UCASE_IGNORABLE 4 #define UCASE_SENSITIVE 8 #define UCASE_EXCEPTION 0x10 +#define UCASE_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION) + #define UCASE_DOT_MASK 0x60 enum { UCASE_NO_DOT=0, /* normal characters with cc=0 */ diff --git a/deps/icu-small/source/common/ucasemap.cpp b/deps/icu-small/source/common/ucasemap.cpp index 8eec93c6e3ea3b..99e30c9fc6987e 100644 --- a/deps/icu-small/source/common/ucasemap.cpp +++ b/deps/icu-small/source/common/ucasemap.cpp @@ -165,9 +165,7 @@ appendResult(int32_t cpLength, int32_t result, const UChar *s, inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); } inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); } -} // namespace - -static UChar32 U_CALLCONV +UChar32 U_CALLCONV utf8_caseContextIterator(void *context, int8_t dir) { UCaseContext *csc=(UCaseContext *)context; UChar32 c; @@ -199,36 +197,227 @@ utf8_caseContextIterator(void *context, int8_t dir) { return U_SENTINEL; } -/* - * Case-maps [srcStart..srcLimit[ but takes - * context [0..srcLength[ into account. +/** + * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account. + * caseLocale < 0: Case-folds [srcStart..srcLimit[. */ -static void -_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map, - const uint8_t *src, UCaseContext *csc, - int32_t srcStart, int32_t srcLimit, - icu::ByteSink &sink, icu::Edits *edits, - UErrorCode &errorCode) { - /* case mapping loop */ - int32_t srcIndex=srcStart; - while (U_SUCCESS(errorCode) && srcIndex= 0 ? + !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) : + (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) { + latinToLower = LatinCase::TO_LOWER_NORMAL; + } else { + latinToLower = LatinCase::TO_LOWER_TR_LT; + } + const UTrie2 *trie = ucase_getTrie(); + int32_t prev = srcStart; + int32_t srcIndex = srcStart; + for (;;) { + // fast path for simple cases int32_t cpStart; - csc->cpStart=cpStart=srcIndex; UChar32 c; - U8_NEXT(src, srcIndex, srcLimit, c); - csc->cpLimit=srcIndex; - if(c<0) { - // Malformed UTF-8. - ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart, + for (;;) { + if (U_FAILURE(errorCode) || srcIndex >= srcLimit) { + c = U_SENTINEL; + break; + } + uint8_t lead = src[srcIndex++]; + if (lead <= 0x7f) { + int8_t d = latinToLower[lead]; + if (d == LatinCase::EXC) { + cpStart = srcIndex - 1; + c = lead; + break; + } + if (d == 0) { continue; } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev, + sink, options, edits, errorCode); + char ascii = (char)(lead + d); + sink.Append(&ascii, 1); + if (edits != nullptr) { + edits->addReplace(1, 1); + } + prev = srcIndex; + continue; + } else if (lead < 0xe3) { + uint8_t t; + if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLimit && + (t = src[srcIndex] - 0x80) <= 0x3f) { + // U+0080..U+017F + ++srcIndex; + c = ((lead - 0xc0) << 6) | t; + int8_t d = latinToLower[c]; + if (d == LatinCase::EXC) { + cpStart = srcIndex - 2; + break; + } + if (d == 0) { continue; } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev, + sink, options, edits, errorCode); + ByteSinkUtil::appendTwoBytes(c + d, sink); + if (edits != nullptr) { + edits->addReplace(2, 2); + } + prev = srcIndex; + continue; + } + } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) && + (srcIndex + 2) <= srcLimit && + U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) { + // most of CJK: no case mappings + srcIndex += 2; + continue; + } + cpStart = --srcIndex; + U8_NEXT(src, srcIndex, srcLimit, c); + if (c < 0) { + // ill-formed UTF-8 + continue; + } + uint16_t props = UTRIE2_GET16(trie, c); + if (UCASE_HAS_EXCEPTION(props)) { break; } + int32_t delta; + if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) { + continue; + } + ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, sink, options, edits, errorCode); + ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits); + prev = srcIndex; + } + if (c < 0) { + break; + } + // slow path + const UChar *s; + if (caseLocale >= 0) { + csc->cpStart = cpStart; + csc->cpLimit = srcIndex; + c = ucase_toFullLower(c, utf8_caseContextIterator, csc, &s, caseLocale); } else { - const UChar *s; - c=map(c, utf8_caseContextIterator, csc, &s, caseLocale); + c = ucase_toFullFolding(c, &s, options); + } + if (c >= 0) { + ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, + sink, options, edits, errorCode); appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode); + prev = srcIndex; } } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev, + sink, options, edits, errorCode); } +void toUpper(int32_t caseLocale, uint32_t options, + const uint8_t *src, UCaseContext *csc, int32_t srcLength, + icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) { + const int8_t *latinToUpper; + if (caseLocale == UCASE_LOC_TURKISH) { + latinToUpper = LatinCase::TO_UPPER_TR; + } else { + latinToUpper = LatinCase::TO_UPPER_NORMAL; + } + const UTrie2 *trie = ucase_getTrie(); + int32_t prev = 0; + int32_t srcIndex = 0; + for (;;) { + // fast path for simple cases + int32_t cpStart; + UChar32 c; + for (;;) { + if (U_FAILURE(errorCode) || srcIndex >= srcLength) { + c = U_SENTINEL; + break; + } + uint8_t lead = src[srcIndex++]; + if (lead <= 0x7f) { + int8_t d = latinToUpper[lead]; + if (d == LatinCase::EXC) { + cpStart = srcIndex - 1; + c = lead; + break; + } + if (d == 0) { continue; } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev, + sink, options, edits, errorCode); + char ascii = (char)(lead + d); + sink.Append(&ascii, 1); + if (edits != nullptr) { + edits->addReplace(1, 1); + } + prev = srcIndex; + continue; + } else if (lead < 0xe3) { + uint8_t t; + if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLength && + (t = src[srcIndex] - 0x80) <= 0x3f) { + // U+0080..U+017F + ++srcIndex; + c = ((lead - 0xc0) << 6) | t; + int8_t d = latinToUpper[c]; + if (d == LatinCase::EXC) { + cpStart = srcIndex - 2; + break; + } + if (d == 0) { continue; } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev, + sink, options, edits, errorCode); + ByteSinkUtil::appendTwoBytes(c + d, sink); + if (edits != nullptr) { + edits->addReplace(2, 2); + } + prev = srcIndex; + continue; + } + } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) && + (srcIndex + 2) <= srcLength && + U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) { + // most of CJK: no case mappings + srcIndex += 2; + continue; + } + cpStart = --srcIndex; + U8_NEXT(src, srcIndex, srcLength, c); + if (c < 0) { + // ill-formed UTF-8 + continue; + } + uint16_t props = UTRIE2_GET16(trie, c); + if (UCASE_HAS_EXCEPTION(props)) { break; } + int32_t delta; + if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) { + continue; + } + ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, + sink, options, edits, errorCode); + ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits); + prev = srcIndex; + } + if (c < 0) { + break; + } + // slow path + csc->cpStart = cpStart; + csc->cpLimit = srcIndex; + const UChar *s; + c = ucase_toFullUpper(c, utf8_caseContextIterator, csc, &s, caseLocale); + if (c >= 0) { + ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, + sink, options, edits, errorCode); + appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode); + prev = srcIndex; + } + } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev, + sink, options, edits, errorCode); +} + +} // namespace + #if !UCONFIG_NO_BREAK_ITERATION U_CFUNC void U_CALLCONV @@ -335,10 +524,9 @@ ucasemap_internalUTF8ToTitle( if(titleLimitversion == 0) { if(length == 1) { - if((UBool)args->converter->fromUnicodeStatus) { + if(args->converter->fromUnicodeStatus) { /* in DBCS mode: switch to SBCS */ args->converter->fromUnicodeStatus = 0; *p++ = UCNV_SI; } *p++ = subchar[0]; } else /* length == 2*/ { - if(!(UBool)args->converter->fromUnicodeStatus) { + if(!args->converter->fromUnicodeStatus) { /* in SBCS mode: switch to DBCS */ args->converter->fromUnicodeStatus = 1; *p++ = UCNV_SO; diff --git a/deps/icu-small/source/common/ucnv_err.cpp b/deps/icu-small/source/common/ucnv_err.cpp index 18218835a2260e..63794d2334f62b 100644 --- a/deps/icu-small/source/common/ucnv_err.cpp +++ b/deps/icu-small/source/common/ucnv_err.cpp @@ -60,11 +60,12 @@ * To avoid dependency on other code, this list is hard coded here. * When an ignorable code point is found and is unmappable, the default callbacks * will ignore them. - * For a list of the default ignorable code points, use this link: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3ADI%3A]&g= + * For a list of the default ignorable code points, use this link: + * https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3ADI%3A%5D&abb=on&g=&i= * * This list should be sync with the one in CharsetCallback.java */ -#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\ +#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) ( \ (c == 0x00AD) || \ (c == 0x034F) || \ (c == 0x061C) || \ @@ -74,26 +75,15 @@ (0x180B <= c && c <= 0x180E) || \ (0x200B <= c && c <= 0x200F) || \ (0x202A <= c && c <= 0x202E) || \ - (c == 0x2060) || \ - (0x2066 <= c && c <= 0x2069) || \ - (0x2061 <= c && c <= 0x2064) || \ - (0x206A <= c && c <= 0x206F) || \ + (0x2060 <= c && c <= 0x206F) || \ (c == 0x3164) || \ - (0x0FE00 <= c && c <= 0x0FE0F) || \ - (c == 0x0FEFF) || \ - (c == 0x0FFA0) || \ - (0x01BCA0 <= c && c <= 0x01BCA3) || \ - (0x01D173 <= c && c <= 0x01D17A) || \ - (c == 0x0E0001) || \ - (0x0E0020 <= c && c <= 0x0E007F) || \ - (0x0E0100 <= c && c <= 0x0E01EF) || \ - (c == 0x2065) || \ - (0x0FFF0 <= c && c <= 0x0FFF8) || \ - (c == 0x0E0000) || \ - (0x0E0002 <= c && c <= 0x0E001F) || \ - (0x0E0080 <= c && c <= 0x0E00FF) || \ - (0x0E01F0 <= c && c <= 0x0E0FFF) \ - ) + (0xFE00 <= c && c <= 0xFE0F) || \ + (c == 0xFEFF) || \ + (c == 0xFFA0) || \ + (0xFFF0 <= c && c <= 0xFFF8) || \ + (0x1BCA0 <= c && c <= 0x1BCA3) || \ + (0x1D173 <= c && c <= 0x1D17A) || \ + (0xE0000 <= c && c <= 0xE0FFF)) /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ diff --git a/deps/icu-small/source/common/ucnv_u32.cpp b/deps/icu-small/source/common/ucnv_u32.cpp index 3fac04b300ed34..ca8c6788d3dd97 100644 --- a/deps/icu-small/source/common/ucnv_u32.cpp +++ b/deps/icu-small/source/common/ucnv_u32.cpp @@ -55,7 +55,7 @@ T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args, uint32_t ch, i; /* Restore state of current sequence */ - if (args->converter->toUnicodeStatus && myTarget < targetLimit) { + if (args->converter->toULength > 0 && myTarget < targetLimit) { i = args->converter->toULength; /* restore # of bytes consumed */ args->converter->toULength = 0; @@ -136,7 +136,7 @@ T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, int32_t offsetNum = 0; /* Restore state of current sequence */ - if (args->converter->toUnicodeStatus && myTarget < targetLimit) { + if (args->converter->toULength > 0 && myTarget < targetLimit) { i = args->converter->toULength; /* restore # of bytes consumed */ args->converter->toULength = 0; @@ -517,7 +517,7 @@ T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args, uint32_t ch, i; /* Restore state of current sequence */ - if (args->converter->toUnicodeStatus && myTarget < targetLimit) + if (args->converter->toULength > 0 && myTarget < targetLimit) { i = args->converter->toULength; /* restore # of bytes consumed */ args->converter->toULength = 0; @@ -604,7 +604,7 @@ T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, int32_t offsetNum = 0; /* Restore state of current sequence */ - if (args->converter->toUnicodeStatus && myTarget < targetLimit) + if (args->converter->toULength > 0 && myTarget < targetLimit) { i = args->converter->toULength; /* restore # of bytes consumed */ args->converter->toULength = 0; diff --git a/deps/icu-small/source/common/ucnv_u8.cpp b/deps/icu-small/source/common/ucnv_u8.cpp index c7ef87fd500c62..5a07244b02bf9c 100644 --- a/deps/icu-small/source/common/ucnv_u8.cpp +++ b/deps/icu-small/source/common/ucnv_u8.cpp @@ -76,7 +76,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, int32_t i, inBytes; /* Restore size of current sequence */ - if (cnv->toUnicodeStatus && myTarget < targetLimit) + if (cnv->toULength > 0 && myTarget < targetLimit) { inBytes = cnv->mode; /* restore # of bytes to consume */ i = cnv->toULength; /* restore # of bytes consumed */ @@ -194,7 +194,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeAr int32_t i, inBytes; /* Restore size of current sequence */ - if (cnv->toUnicodeStatus && myTarget < targetLimit) + if (cnv->toULength > 0 && myTarget < targetLimit) { inBytes = cnv->mode; /* restore # of bytes to consume */ i = cnv->toULength; /* restore # of bytes consumed */ @@ -670,12 +670,13 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); /* get the converter state from the UTF-8 UConverter */ - c=(UChar32)utf8->toUnicodeStatus; - if(c!=0) { + if(utf8->toULength > 0) { toULength=oldToULength=utf8->toULength; toULimit=(int8_t)utf8->mode; + c=(UChar32)utf8->toUnicodeStatus; } else { toULength=oldToULength=toULimit=0; + c = 0; } count=(int32_t)(sourceLimit-source)+oldToULength; @@ -695,36 +696,20 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, // Use a single counter for source and target, counting the minimum of // the source length and the target capacity. // Let the standard converter handle edge cases. - const uint8_t *limit=sourceLimit; if(count>targetCapacity) { - limit-=(count-targetCapacity); count=targetCapacity; } - // The conversion loop checks count>0 only once per 1/2/3-byte character. - // If the buffer ends with a truncated 2- or 3-byte sequence, + // The conversion loop checks count>0 only once per character. + // If the buffer ends with a truncated sequence, // then we reduce the count to stop before that, // and collect the remaining bytes after the conversion loop. - { - // Do not go back into the bytes that will be read for finishing a partial - // sequence from the previous buffer. - int32_t length=count-toULimit; - if(length>0) { - uint8_t b1=*(limit-1); - if(U8_IS_SINGLE(b1)) { - // common ASCII character - } else if(U8_IS_TRAIL(b1) && length>=2) { - uint8_t b2=*(limit-2); - if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { - // truncated 3-byte sequence - count-=2; - } - } else if(0xc2<=b1 && b1<0xf0) { - // truncated 2- or 3-byte sequence - --count; - } - } - } + + // Do not go back into the bytes that will be read for finishing a partial + // sequence from the previous buffer. + int32_t length=count-toULimit; + U8_TRUNCATE_IF_INCOMPLETE(source, 0, length); + count=toULimit+length; } if(c!=0) { @@ -814,7 +799,7 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, } /* copy the legal byte sequence to the target */ - if(count>=toULength) { + { int8_t i; for(i=0; isource=(char *)source; - pFromUArgs->target=(char *)target; - *pErrorCode=U_USING_DEFAULT_WARNING; - return; } } } @@ -856,8 +833,7 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, utf8->toULength=toULength; utf8->mode=toULimit; break; - } else if(!U8_IS_TRAIL(b=*source)) { - /* lead byte in trail byte position */ + } else if(!icu::UTF8::isValidTrail(c, b=*source, toULength, toULimit)) { utf8->toULength=toULength; *pErrorCode=U_ILLEGAL_CHAR_FOUND; break; diff --git a/deps/icu-small/source/common/ucnvlat1.cpp b/deps/icu-small/source/common/ucnvlat1.cpp index 9855ebe6e774d7..15eeb5c51f5fb3 100644 --- a/deps/icu-small/source/common/ucnvlat1.cpp +++ b/deps/icu-small/source/common/ucnvlat1.cpp @@ -340,7 +340,11 @@ ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); /* get the converter state from the UTF-8 UConverter */ - c=(UChar32)utf8->toUnicodeStatus; + if (utf8->toULength > 0) { + c=(UChar32)utf8->toUnicodeStatus; + } else { + c = 0; + } if(c!=0 && sourceconverter->toUnicodeStatus!=0) { + if(pToUArgs->converter->toULength > 0) { /* no handling of partial UTF-8 characters here, fall back to pivoting */ *pErrorCode=U_USING_DEFAULT_WARNING; return; diff --git a/deps/icu-small/source/common/ucnvmbcs.cpp b/deps/icu-small/source/common/ucnvmbcs.cpp index e5efa7fc1b2ad3..9052394b4ff8d1 100644 --- a/deps/icu-small/source/common/ucnvmbcs.cpp +++ b/deps/icu-small/source/common/ucnvmbcs.cpp @@ -5064,12 +5064,13 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); /* get the converter state from the UTF-8 UConverter */ - c=(UChar32)utf8->toUnicodeStatus; - if(c!=0) { + if(utf8->toULength > 0) { toULength=oldToULength=utf8->toULength; toULimit=(int8_t)utf8->mode; + c=(UChar32)utf8->toUnicodeStatus; } else { toULength=oldToULength=toULimit=0; + c = 0; } // The conversion loop checks sourcesharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); /* get the converter state from the UTF-8 UConverter */ - c=(UChar32)utf8->toUnicodeStatus; - if(c!=0) { + if(utf8->toULength > 0) { toULength=oldToULength=utf8->toULength; toULimit=(int8_t)utf8->mode; + c=(UChar32)utf8->toUnicodeStatus; } else { toULength=oldToULength=toULimit=0; + c = 0; } // The conversion loop checks source 1) { + if (choice < 0 || choice > 2) { *ec = U_ILLEGAL_ARGUMENT_ERROR; return 0; } @@ -731,15 +736,19 @@ ucurr_getName(const UChar* currency, const UChar* s = NULL; ec2 = U_ZERO_ERROR; - UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc, &ec2); - - rb = ures_getByKey(rb, CURRENCIES, rb, &ec2); - - // Fetch resource with multi-level resource inheritance fallback - rb = ures_getByKeyWithFallback(rb, buf, rb, &ec2); - - s = ures_getStringByIndex(rb, choice, len, &ec2); - ures_close(rb); + LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_CURR, loc, &ec2)); + + if (nameStyle == UCURR_NARROW_SYMBOL_NAME) { + CharString key; + key.append(CURRENCIES_NARROW, ec2); + key.append("/", ec2); + key.append(buf, ec2); + s = ures_getStringByKeyWithFallback(rb.getAlias(), key.data(), len, &ec2); + } else { + ures_getByKey(rb.getAlias(), CURRENCIES, rb.getAlias(), &ec2); + ures_getByKeyWithFallback(rb.getAlias(), buf, rb.getAlias(), &ec2); + s = ures_getStringByIndex(rb.getAlias(), choice, len, &ec2); + } // If we've succeeded we're done. Otherwise, try to fallback. // If that fails (because we are already at root) then exit. diff --git a/deps/icu-small/source/common/unicode/brkiter.h b/deps/icu-small/source/common/unicode/brkiter.h index c64bb712222c63..607f3ec625ab1f 100644 --- a/deps/icu-small/source/common/unicode/brkiter.h +++ b/deps/icu-small/source/common/unicode/brkiter.h @@ -298,15 +298,14 @@ class U_COMMON_API BreakIterator : public UObject { virtual int32_t next(int32_t n) = 0; /** - * For RuleBasedBreakIterators, return the status tag from the - * break rule that determined the most recently - * returned break position. + * For RuleBasedBreakIterators, return the status tag from the break rule + * that determined the boundary at the current iteration position. *

* For break iterator types that do not support a rule status, * a default value of 0 is returned. *

- * @return the status from the break rule that determined the most recently - * returned break position. + * @return the status from the break rule that determined the boundary at + * the current iteration position. * @see RuleBaseBreakIterator::getRuleStatus() * @see UWordBreak * @stable ICU 52 @@ -315,7 +314,7 @@ class U_COMMON_API BreakIterator : public UObject { /** * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) - * that determined the most recently returned break position. + * that determined the boundary at the current iteration position. *

* For break iterator types that do not support rule status, * no values are returned. @@ -334,7 +333,7 @@ class U_COMMON_API BreakIterator : public UObject { * normal way, without attempting to store any values. * @param status receives error codes. * @return The number of rule status values from rules that determined - * the most recent boundary returned by the break iterator. + * the boundary at the current iteration position. * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value * is the total number of status values that were available, * not the reduced number that were actually returned. @@ -616,7 +615,7 @@ class U_COMMON_API BreakIterator : public UObject { virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0; private: - static BreakIterator* buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode& status); + static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status); static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status); static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status); diff --git a/deps/icu-small/source/common/unicode/bytestriebuilder.h b/deps/icu-small/source/common/unicode/bytestriebuilder.h index 0f9f5e2c060f13..7a806bb7f02096 100644 --- a/deps/icu-small/source/common/unicode/bytestriebuilder.h +++ b/deps/icu-small/source/common/unicode/bytestriebuilder.h @@ -154,7 +154,6 @@ class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder { const char *s; }; - // don't use #ifndef U_HIDE_INTERNAL_API with private class members or virtual methods. virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, Node *nextNode) const; diff --git a/deps/icu-small/source/common/unicode/casemap.h b/deps/icu-small/source/common/unicode/casemap.h index 4a4917bdcaf1b7..4b77256d742784 100644 --- a/deps/icu-small/source/common/unicode/casemap.h +++ b/deps/icu-small/source/common/unicode/casemap.h @@ -18,8 +18,6 @@ U_NAMESPACE_BEGIN -#ifndef U_HIDE_DRAFT_API - class BreakIterator; class ByteSink; class Edits; @@ -27,7 +25,7 @@ class Edits; /** * Low-level C++ case mapping functions. * - * @draft ICU 59 + * @stable ICU 59 */ class U_COMMON_API CaseMap U_FINAL : public UMemory { public: @@ -59,7 +57,7 @@ class U_COMMON_API CaseMap U_FINAL : public UMemory { * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see u_strToLower - * @draft ICU 59 + * @stable ICU 59 */ static int32_t toLower( const char *locale, uint32_t options, @@ -95,7 +93,7 @@ class U_COMMON_API CaseMap U_FINAL : public UMemory { * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see u_strToUpper - * @draft ICU 59 + * @stable ICU 59 */ static int32_t toUpper( const char *locale, uint32_t options, @@ -146,7 +144,7 @@ class U_COMMON_API CaseMap U_FINAL : public UMemory { * * @see u_strToTitle * @see ucasemap_toTitle - * @draft ICU 59 + * @stable ICU 59 */ static int32_t toTitle( const char *locale, uint32_t options, BreakIterator *iter, @@ -188,7 +186,7 @@ class U_COMMON_API CaseMap U_FINAL : public UMemory { * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see u_strFoldCase - * @draft ICU 59 + * @stable ICU 59 */ static int32_t fold( uint32_t options, @@ -196,6 +194,7 @@ class U_COMMON_API CaseMap U_FINAL : public UMemory { char16_t *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); +#ifndef U_HIDE_DRAFT_API /** * Lowercases a UTF-8 string and optionally records edits. * Casing is locale-dependent and context-sensitive. @@ -318,6 +317,7 @@ class U_COMMON_API CaseMap U_FINAL : public UMemory { uint32_t options, StringPiece src, ByteSink &sink, Edits *edits, UErrorCode &errorCode); +#endif // U_HIDE_DRAFT_API /** * Lowercases a UTF-8 string and optionally records edits. @@ -347,7 +347,7 @@ class U_COMMON_API CaseMap U_FINAL : public UMemory { * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see ucasemap_utf8ToLower - * @draft ICU 59 + * @stable ICU 59 */ static int32_t utf8ToLower( const char *locale, uint32_t options, @@ -383,7 +383,7 @@ class U_COMMON_API CaseMap U_FINAL : public UMemory { * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see ucasemap_utf8ToUpper - * @draft ICU 59 + * @stable ICU 59 */ static int32_t utf8ToUpper( const char *locale, uint32_t options, @@ -433,7 +433,7 @@ class U_COMMON_API CaseMap U_FINAL : public UMemory { * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see ucasemap_utf8ToTitle - * @draft ICU 59 + * @stable ICU 59 */ static int32_t utf8ToTitle( const char *locale, uint32_t options, BreakIterator *iter, @@ -475,7 +475,7 @@ class U_COMMON_API CaseMap U_FINAL : public UMemory { * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see ucasemap_utf8FoldCase - * @draft ICU 59 + * @stable ICU 59 */ static int32_t utf8Fold( uint32_t options, @@ -489,8 +489,6 @@ class U_COMMON_API CaseMap U_FINAL : public UMemory { CaseMap &operator=(const CaseMap &other) = delete; }; -#endif // U_HIDE_DRAFT_API - U_NAMESPACE_END #endif // __CASEMAP_H__ diff --git a/deps/icu-small/source/common/unicode/char16ptr.h b/deps/icu-small/source/common/unicode/char16ptr.h index fbce1775911518..49d0e029a93b6e 100644 --- a/deps/icu-small/source/common/unicode/char16ptr.h +++ b/deps/icu-small/source/common/unicode/char16ptr.h @@ -30,25 +30,23 @@ U_NAMESPACE_BEGIN # define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory") #endif -// Do not use #ifndef U_HIDE_DRAFT_API for the following class, it -// is now used in place of UChar* in several stable C++ methods /** * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. - * @draft ICU 59 + * @stable ICU 59 */ class U_COMMON_API Char16Ptr U_FINAL { public: /** * Copies the pointer. * @param p pointer - * @draft ICU 59 + * @stable ICU 59 */ inline Char16Ptr(char16_t *p); #if !U_CHAR16_IS_TYPEDEF /** * Converts the pointer to char16_t *. * @param p pointer to be converted - * @draft ICU 59 + * @stable ICU 59 */ inline Char16Ptr(uint16_t *p); #endif @@ -57,32 +55,32 @@ class U_COMMON_API Char16Ptr U_FINAL { * Converts the pointer to char16_t *. * (Only defined if U_SIZEOF_WCHAR_T==2.) * @param p pointer to be converted - * @draft ICU 59 + * @stable ICU 59 */ inline Char16Ptr(wchar_t *p); #endif /** * nullptr constructor. * @param p nullptr - * @draft ICU 59 + * @stable ICU 59 */ inline Char16Ptr(std::nullptr_t p); /** * Destructor. - * @draft ICU 59 + * @stable ICU 59 */ inline ~Char16Ptr(); /** * Pointer access. * @return the wrapped pointer - * @draft ICU 59 + * @stable ICU 59 */ inline char16_t *get() const; /** * char16_t pointer access via type conversion (e.g., static_cast). * @return the wrapped pointer - * @draft ICU 59 + * @stable ICU 59 */ inline operator char16_t *() const { return get(); } @@ -137,25 +135,23 @@ char16_t *Char16Ptr::get() const { return u_.cp; } #endif -// Do not use #ifndef U_HIDE_DRAFT_API for the following class, it is -// now used in place of const UChar* in several stable C++ methods /** * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. - * @draft ICU 59 + * @stable ICU 59 */ class U_COMMON_API ConstChar16Ptr U_FINAL { public: /** * Copies the pointer. * @param p pointer - * @draft ICU 59 + * @stable ICU 59 */ inline ConstChar16Ptr(const char16_t *p); #if !U_CHAR16_IS_TYPEDEF /** * Converts the pointer to char16_t *. * @param p pointer to be converted - * @draft ICU 59 + * @stable ICU 59 */ inline ConstChar16Ptr(const uint16_t *p); #endif @@ -164,33 +160,33 @@ class U_COMMON_API ConstChar16Ptr U_FINAL { * Converts the pointer to char16_t *. * (Only defined if U_SIZEOF_WCHAR_T==2.) * @param p pointer to be converted - * @draft ICU 59 + * @stable ICU 59 */ inline ConstChar16Ptr(const wchar_t *p); #endif /** * nullptr constructor. * @param p nullptr - * @draft ICU 59 + * @stable ICU 59 */ inline ConstChar16Ptr(const std::nullptr_t p); /** * Destructor. - * @draft ICU 59 + * @stable ICU 59 */ inline ~ConstChar16Ptr(); /** * Pointer access. * @return the wrapped pointer - * @draft ICU 59 + * @stable ICU 59 */ inline const char16_t *get() const; /** * char16_t pointer access via type conversion (e.g., static_cast). * @return the wrapped pointer - * @draft ICU 59 + * @stable ICU 59 */ inline operator const char16_t *() const { return get(); } @@ -250,7 +246,7 @@ const char16_t *ConstChar16Ptr::get() const { return u_.cp; } * Includes an aliasing barrier if available. * @param p pointer * @return p as const UChar * - * @draft ICU 59 + * @stable ICU 59 */ inline const UChar *toUCharPtr(const char16_t *p) { #ifdef U_ALIASING_BARRIER @@ -264,7 +260,7 @@ inline const UChar *toUCharPtr(const char16_t *p) { * Includes an aliasing barrier if available. * @param p pointer * @return p as UChar * - * @draft ICU 59 + * @stable ICU 59 */ inline UChar *toUCharPtr(char16_t *p) { #ifdef U_ALIASING_BARRIER @@ -278,7 +274,7 @@ inline UChar *toUCharPtr(char16_t *p) { * Includes an aliasing barrier if available. * @param p pointer * @return p as const OldUChar * - * @draft ICU 59 + * @stable ICU 59 */ inline const OldUChar *toOldUCharPtr(const char16_t *p) { #ifdef U_ALIASING_BARRIER @@ -292,7 +288,7 @@ inline const OldUChar *toOldUCharPtr(const char16_t *p) { * Includes an aliasing barrier if available. * @param p pointer * @return p as OldUChar * - * @draft ICU 59 + * @stable ICU 59 */ inline OldUChar *toOldUCharPtr(char16_t *p) { #ifdef U_ALIASING_BARRIER diff --git a/deps/icu-small/source/common/unicode/chariter.h b/deps/icu-small/source/common/unicode/chariter.h index dbed89dbe61e0b..292794f6d65832 100644 --- a/deps/icu-small/source/common/unicode/chariter.h +++ b/deps/icu-small/source/common/unicode/chariter.h @@ -569,7 +569,7 @@ class U_COMMON_API CharacterIterator : public ForwardCharacterIterator { * Returns the numeric index in the underlying text-storage * object of the character the iterator currently refers to * (i.e., the character returned by current()). - * @return the numberic index in the text-storage object of + * @return the numeric index in the text-storage object of * the character the iterator currently refers to * @stable ICU 2.0 */ diff --git a/deps/icu-small/source/common/unicode/dtintrv.h b/deps/icu-small/source/common/unicode/dtintrv.h index 2221b36c9b293f..c99011e26c81dd 100644 --- a/deps/icu-small/source/common/unicode/dtintrv.h +++ b/deps/icu-small/source/common/unicode/dtintrv.h @@ -69,7 +69,7 @@ class U_COMMON_API DateInterval : public UObject { *

      * .   Base* polymorphic_pointer = createPolymorphicObject();
      * .   if (polymorphic_pointer->getDynamicClassID() ==
-     * .       erived::getStaticClassID()) ...
+     * .       derived::getStaticClassID()) ...
      * 
* @return The class ID for all objects of this class. * @stable ICU 4.0 diff --git a/deps/icu-small/source/common/unicode/edits.h b/deps/icu-small/source/common/unicode/edits.h index 082c3733a88bda..5a72574c140db6 100644 --- a/deps/icu-small/source/common/unicode/edits.h +++ b/deps/icu-small/source/common/unicode/edits.h @@ -17,8 +17,6 @@ U_NAMESPACE_BEGIN -#ifndef U_HIDE_DRAFT_API - /** * Records lengths of string edits but not replacement text. * Supports replacements, insertions, deletions in linear progression. @@ -27,13 +25,13 @@ U_NAMESPACE_BEGIN * An Edits object tracks a separate UErrorCode, but ICU string transformation functions * (e.g., case mapping functions) merge any such errors into their API's UErrorCode. * - * @draft ICU 59 + * @stable ICU 59 */ class U_COMMON_API Edits U_FINAL : public UMemory { public: /** * Constructs an empty object. - * @draft ICU 59 + * @stable ICU 59 */ Edits() : array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0), @@ -64,7 +62,7 @@ class U_COMMON_API Edits U_FINAL : public UMemory { /** * Destructor. - * @draft ICU 59 + * @stable ICU 59 */ ~Edits(); @@ -88,20 +86,20 @@ class U_COMMON_API Edits U_FINAL : public UMemory { /** * Resets the data but may not release memory. - * @draft ICU 59 + * @stable ICU 59 */ void reset() U_NOEXCEPT; /** * Adds a record for an unchanged segment of text. * Normally called from inside ICU string transformation functions, not user code. - * @draft ICU 59 + * @stable ICU 59 */ void addUnchanged(int32_t unchangedLength); /** * Adds a record for a text replacement/insertion/deletion. * Normally called from inside ICU string transformation functions, not user code. - * @draft ICU 59 + * @stable ICU 59 */ void addReplace(int32_t oldLength, int32_t newLength); /** @@ -112,33 +110,35 @@ class U_COMMON_API Edits U_FINAL : public UMemory { * and an error occurred while recording edits. * Otherwise unchanged. * @return TRUE if U_FAILURE(outErrorCode) - * @draft ICU 59 + * @stable ICU 59 */ UBool copyErrorTo(UErrorCode &outErrorCode); /** * How much longer is the new text compared with the old text? * @return new length minus old length - * @draft ICU 59 + * @stable ICU 59 */ int32_t lengthDelta() const { return delta; } /** * @return TRUE if there are any change edits - * @draft ICU 59 + * @stable ICU 59 */ UBool hasChanges() const { return numChanges != 0; } +#ifndef U_HIDE_DRAFT_API /** * @return the number of change edits * @draft ICU 60 */ int32_t numberOfChanges() const { return numChanges; } +#endif // U_HIDE_DRAFT_API /** * Access to the list of edits. * @see getCoarseIterator * @see getFineIterator - * @draft ICU 59 + * @stable ICU 59 */ struct U_COMMON_API Iterator U_FINAL : public UMemory { /** @@ -152,12 +152,12 @@ class U_COMMON_API Edits U_FINAL : public UMemory { srcIndex(0), replIndex(0), destIndex(0) {} /** * Copy constructor. - * @draft ICU 59 + * @stable ICU 59 */ Iterator(const Iterator &other) = default; /** * Assignment operator. - * @draft ICU 59 + * @stable ICU 59 */ Iterator &operator=(const Iterator &other) = default; @@ -167,7 +167,7 @@ class U_COMMON_API Edits U_FINAL : public UMemory { * or else the function returns immediately. Check for U_FAILURE() * on output or use with function chaining. (See User Guide for details.) * @return TRUE if there is another edit - * @draft ICU 59 + * @stable ICU 59 */ UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); } @@ -188,12 +188,13 @@ class U_COMMON_API Edits U_FINAL : public UMemory { * or else the function returns immediately. Check for U_FAILURE() * on output or use with function chaining. (See User Guide for details.) * @return TRUE if the edit for the source index was found - * @draft ICU 59 + * @stable ICU 59 */ UBool findSourceIndex(int32_t i, UErrorCode &errorCode) { return findIndex(i, TRUE, errorCode) == 0; } +#ifndef U_HIDE_DRAFT_API /** * Finds the edit that contains the destination index. * The destination index may be found in a non-change @@ -264,39 +265,40 @@ class U_COMMON_API Edits U_FINAL : public UMemory { * @draft ICU 60 */ int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode); +#endif // U_HIDE_DRAFT_API /** * @return TRUE if this edit replaces oldLength() units with newLength() different ones. * FALSE if oldLength units remain unchanged. - * @draft ICU 59 + * @stable ICU 59 */ UBool hasChange() const { return changed; } /** * @return the number of units in the original string which are replaced or remain unchanged. - * @draft ICU 59 + * @stable ICU 59 */ int32_t oldLength() const { return oldLength_; } /** * @return the number of units in the modified string, if hasChange() is TRUE. * Same as oldLength if hasChange() is FALSE. - * @draft ICU 59 + * @stable ICU 59 */ int32_t newLength() const { return newLength_; } /** * @return the current index into the source string - * @draft ICU 59 + * @stable ICU 59 */ int32_t sourceIndex() const { return srcIndex; } /** * @return the current index into the replacement-characters-only string, * not counting unchanged spans - * @draft ICU 59 + * @stable ICU 59 */ int32_t replacementIndex() const { return replIndex; } /** * @return the current index into the full destination string - * @draft ICU 59 + * @stable ICU 59 */ int32_t destinationIndex() const { return destIndex; } @@ -331,7 +333,7 @@ class U_COMMON_API Edits U_FINAL : public UMemory { * Returns an Iterator for coarse-grained changes for simple string updates. * Skips non-changes. * @return an Iterator that merges adjacent changes. - * @draft ICU 59 + * @stable ICU 59 */ Iterator getCoarseChangesIterator() const { return Iterator(array, length, TRUE, TRUE); @@ -340,7 +342,7 @@ class U_COMMON_API Edits U_FINAL : public UMemory { /** * Returns an Iterator for coarse-grained changes and non-changes for simple string updates. * @return an Iterator that merges adjacent changes. - * @draft ICU 59 + * @stable ICU 59 */ Iterator getCoarseIterator() const { return Iterator(array, length, FALSE, TRUE); @@ -350,7 +352,7 @@ class U_COMMON_API Edits U_FINAL : public UMemory { * Returns an Iterator for fine-grained changes for modifying styled text. * Skips non-changes. * @return an Iterator that separates adjacent changes. - * @draft ICU 59 + * @stable ICU 59 */ Iterator getFineChangesIterator() const { return Iterator(array, length, TRUE, FALSE); @@ -359,12 +361,13 @@ class U_COMMON_API Edits U_FINAL : public UMemory { /** * Returns an Iterator for fine-grained changes and non-changes for modifying styled text. * @return an Iterator that separates adjacent changes. - * @draft ICU 59 + * @stable ICU 59 */ Iterator getFineIterator() const { return Iterator(array, length, FALSE, FALSE); } +#ifndef U_HIDE_DRAFT_API /** * Merges the two input Edits and appends the result to this object. * @@ -393,6 +396,7 @@ class U_COMMON_API Edits U_FINAL : public UMemory { * @draft ICU 60 */ Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode); +#endif // U_HIDE_DRAFT_API private: void releaseArray() U_NOEXCEPT; @@ -415,8 +419,6 @@ class U_COMMON_API Edits U_FINAL : public UMemory { uint16_t stackArray[STACK_CAPACITY]; }; -#endif // U_HIDE_DRAFT_API - U_NAMESPACE_END #endif // __EDITS_H__ diff --git a/deps/icu-small/source/common/unicode/filteredbrk.h b/deps/icu-small/source/common/unicode/filteredbrk.h index a0319bf0a76af5..751d1faf40454f 100644 --- a/deps/icu-small/source/common/unicode/filteredbrk.h +++ b/deps/icu-small/source/common/unicode/filteredbrk.h @@ -64,9 +64,7 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { * @deprecated ICU 60 use createEmptyInstance instead * @see createEmptyInstance() */ - static inline FilteredBreakIteratorBuilder *createInstance(UErrorCode &status) { - return createEmptyInstance(status); - } + static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status); #endif /* U_HIDE_DEPRECATED_API */ #ifndef U_HIDE_DRAFT_API @@ -105,7 +103,6 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { */ virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; -#ifndef U_HIDE_DEPRECATED_API /** * This function has been deprecated in favor of wrapIteratorWithFilter() * The behavior is identical. @@ -116,7 +113,6 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { * @see wrapBreakIteratorWithFilter() */ virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0; -#endif /* U_HIDE_DEPRECATED_API */ #ifndef U_HIDE_DRAFT_API /** diff --git a/deps/icu-small/source/common/unicode/locid.h b/deps/icu-small/source/common/unicode/locid.h index c752344f339db1..c84774e07fd395 100644 --- a/deps/icu-small/source/common/unicode/locid.h +++ b/deps/icu-small/source/common/unicode/locid.h @@ -353,7 +353,7 @@ class U_COMMON_API Locale : public UObject { * the default locale ID of the runtime environment. * * @param newLocale Locale to set to. If NULL, set to the value obtained - * from the runtime environement. + * from the runtime environment. * @param success The error code. * @system * @stable ICU 2.0 @@ -629,7 +629,7 @@ class U_COMMON_API Locale : public UObject { /** * Fills in "name" with the name of this locale in a format suitable for user display - * in the locale specfied by "displayLocale". This function uses getDisplayLanguage(), + * in the locale specified by "displayLocale". This function uses getDisplayLanguage(), * getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display * name in the format "language (country[,variant])". For example, if displayLocale is * fr_FR, then en_US's display name would be "Anglais (États-Unis)", and no_NO_NY's diff --git a/deps/icu-small/source/common/unicode/parseerr.h b/deps/icu-small/source/common/unicode/parseerr.h index c8283bfcc9daa0..c05487601cb0d1 100644 --- a/deps/icu-small/source/common/unicode/parseerr.h +++ b/deps/icu-small/source/common/unicode/parseerr.h @@ -58,9 +58,9 @@ enum { U_PARSE_CONTEXT_LEN = 16 }; typedef struct UParseError { /** - * The line on which the error occured. If the parser uses this + * The line on which the error occurred. If the parser uses this * field, it sets it to the line number of the source text line on - * which the error appears, which will be be a value >= 1. If the + * which the error appears, which will be a value >= 1. If the * parse does not support line numbers, the value will be <= 0. * @stable ICU 2.0 */ diff --git a/deps/icu-small/source/common/unicode/platform.h b/deps/icu-small/source/common/unicode/platform.h index 12e2929d240d45..a3f8d32f89d2cd 100644 --- a/deps/icu-small/source/common/unicode/platform.h +++ b/deps/icu-small/source/common/unicode/platform.h @@ -482,9 +482,9 @@ /* Otherwise use the predefined value. */ #elif !defined(__cplusplus) # define U_CPLUSPLUS_VERSION 0 -#elif __cplusplus >= 201402L +#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L) # define U_CPLUSPLUS_VERSION 14 -#elif __cplusplus >= 201103L +#elif __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) # define U_CPLUSPLUS_VERSION 11 #else // C++98 or C++03 @@ -631,7 +631,7 @@ namespace std { */ #ifdef U_CHARSET_IS_UTF8 /* Use the predefined value. */ -#elif U_PLATFORM == U_PF_ANDROID || U_PLATFORM_IS_DARWIN_BASED +#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED # define U_CHARSET_IS_UTF8 1 #else # define U_CHARSET_IS_UTF8 0 @@ -749,8 +749,10 @@ namespace std { #else /* * Notes: - * Visual Studio 10 (_MSC_VER>=1600) defines char16_t but - * does not support u"abc" string literals. + * Visual Studio 2010 (_MSC_VER==1600) defines char16_t as a typedef + * and does not support u"abc" string literals. + * Visual Studio 2015 (_MSC_VER>=1900) and above adds support for + * both char16_t and u"abc" string literals. * gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but * does not support u"abc" string literals. * C++11 and C11 require support for UTF-16 literals diff --git a/deps/icu-small/source/common/unicode/putil.h b/deps/icu-small/source/common/unicode/putil.h index 91d6bb10f76f9a..14bb99ccc545b2 100644 --- a/deps/icu-small/source/common/unicode/putil.h +++ b/deps/icu-small/source/common/unicode/putil.h @@ -38,7 +38,7 @@ /** * Platform utilities isolates the platform dependencies of the - * libarary. For each platform which this code is ported to, these + * library. For each platform which this code is ported to, these * functions may have to be re-implemented. */ @@ -53,7 +53,7 @@ * The data directory is determined as follows: * If u_setDataDirectory() has been called, that is it, otherwise * if the ICU_DATA environment variable is set, use that, otherwise - * If a data directory was specifed at ICU build time + * If a data directory was specified at ICU build time * * \code * #define ICU_DATA_DIR "path" @@ -93,7 +93,7 @@ U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory); #ifndef U_HIDE_INTERNAL_API /** * Return the time zone files override directory, or an empty string if - * no directory was specified. Certain time zone resources will be preferrentially + * no directory was specified. Certain time zone resources will be preferentially * loaded from individual files in this directory. * * @return the time zone data override directory. diff --git a/deps/icu-small/source/common/unicode/rbbi.h b/deps/icu-small/source/common/unicode/rbbi.h index c3c201dd35d333..0c41d69d235ccb 100644 --- a/deps/icu-small/source/common/unicode/rbbi.h +++ b/deps/icu-small/source/common/unicode/rbbi.h @@ -29,7 +29,6 @@ #include "unicode/udata.h" #include "unicode/parseerr.h" #include "unicode/schriter.h" -#include "unicode/uchriter.h" U_NAMESPACE_BEGIN @@ -58,34 +57,18 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { * The UText through which this BreakIterator accesses the text * @internal */ - UText *fText; - - /** - * A character iterator that refers to the same text as the UText, above. - * Only included for compatibility with old API, which was based on CharacterIterators. - * Value may be adopted from outside, or one of fSCharIter or fDCharIter, below. - */ - CharacterIterator *fCharIter; - - /** - * When the input text is provided by a UnicodeString, this will point to - * a characterIterator that wraps that data. Needed only for the - * implementation of getText(), a backwards compatibility issue. - */ - StringCharacterIterator *fSCharIter; - - /** - * When the input text is provided by a UText, this - * dummy CharacterIterator over an empty string will - * be returned from getText() - */ - UCharCharacterIterator *fDCharIter; + UText fText; +#ifndef U_HIDE_INTERNAL_API +public: +#endif /* U_HIDE_INTERNAL_API */ /** - * The rule data for this BreakIterator instance + * The rule data for this BreakIterator instance. + * Not for general use; Public only for testing purposes. * @internal */ RBBIDataWrapper *fData; +private: /** * The iteration state - current position, rule status for the current position, @@ -105,24 +88,11 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { */ int32_t fRuleStatusIndex; - /** - * True when iteration has run off the end, and iterator functions should return UBRK_DONE. - */ - UBool fDone; - /** * Cache of previously determined boundary positions. */ - public: // TODO: debug, return to private. class BreakCache; BreakCache *fBreakCache; - private: - /** - * Counter for the number of characters encountered with the "dictionary" - * flag set. - * @internal - */ - uint32_t fDictionaryCharCount; /** * Cache of boundary positions within a region of text that has been @@ -150,11 +120,30 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { UnhandledEngine *fUnhandledBreakEngine; /** - * - * The type of the break iterator, or -1 if it has not been set. + * Counter for the number of characters encountered with the "dictionary" + * flag set. * @internal */ - int32_t fBreakType; + uint32_t fDictionaryCharCount; + + /** + * A character iterator that refers to the same text as the UText, above. + * Only included for compatibility with old API, which was based on CharacterIterators. + * Value may be adopted from outside, or one of fSCharIter or fDCharIter, below. + */ + CharacterIterator *fCharIter; + + /** + * When the input text is provided by a UnicodeString, this will point to + * a characterIterator that wraps that data. Needed only for the + * implementation of getText(), a backwards compatibility issue. + */ + StringCharacterIterator fSCharIter; + + /** + * True when iteration has run off the end, and iterator functions should return UBRK_DONE. + */ + UBool fDone; //======================================================================= // constructors @@ -206,17 +195,17 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { UErrorCode &status); /** - * Contruct a RuleBasedBreakIterator from a set of precompiled binary rules. + * Construct a RuleBasedBreakIterator from a set of precompiled binary rules. * Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules(). * Construction of a break iterator in this way is substantially faster than - * constuction from source rules. + * construction from source rules. * * Ownership of the storage containing the compiled rules remains with the * caller of this function. The compiled rules must not be modified or * deleted during the life of the break iterator. * * The compiled rules are not compatible across different major versions of ICU. - * The compiled rules are comaptible only between machines with the same + * The compiled rules are compatible only between machines with the same * byte ordering (little or big endian) and the same base character set family * (ASCII or EBCDIC). * @@ -285,7 +274,7 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { * behavior, and iterating over the same text, as this one. * Differs from the copy constructor in that it is polymorphic, and * will correctly clone (copy) a derived class. - * clone() is thread safe. Multiple threads may simultaeneously + * clone() is thread safe. Multiple threads may simultaneously * clone the same source break iterator. * @return a newly-constructed RuleBasedBreakIterator * @stable ICU 2.0 @@ -450,7 +439,7 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { virtual int32_t preceding(int32_t offset); /** - * Returns true if the specfied position is a boundary position. As a side + * Returns true if the specified position is a boundary position. As a side * effect, leaves the iterator pointing to the first boundary position at * or after "offset". * @param offset the offset to check. @@ -471,8 +460,8 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { /** - * Return the status tag from the break rule that determined the most recently - * returned break position. For break rules that do not specify a + * Return the status tag from the break rule that determined the boundary at + * the current iteration position. For break rules that do not specify a * status, a default value of 0 is returned. If more than one break rule * would cause a boundary to be located at some position in the text, * the numerically largest of the applicable status values is returned. @@ -489,16 +478,14 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { * position from next(), previous(), or * any other break iterator functions that returns a boundary position. *

+ * Note that getRuleStatus() returns the value corresponding to + * current() index even after next() has returned DONE. + *

* When creating custom break rules, one is free to define whatever * status values may be convenient for the application. *

- * Note: this function is not thread safe. It should not have been - * declared const, and the const remains only for compatibility - * reasons. (The function is logically const, but not bit-wise const). - * TODO: check this. Probably thread safe now. - *

- * @return the status from the break rule that determined the most recently - * returned break position. + * @return the status from the break rule that determined the boundary + * at the current iteration position. * * @see UWordBreak * @stable ICU 2.2 @@ -506,8 +493,8 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { virtual int32_t getRuleStatus() const; /** - * Get the status (tag) values from the break rule(s) that determined the most - * recently returned break position. + * Get the status (tag) values from the break rule(s) that determined the boundary + * at the current iteration position. *

* The returned status value(s) are stored into an array provided by the caller. * The values are stored in sorted (ascending) order. @@ -518,10 +505,10 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { * @param fillInVec an array to be filled in with the status values. * @param capacity the length of the supplied vector. A length of zero causes * the function to return the number of status values, in the - * normal way, without attemtping to store any values. + * normal way, without attempting to store any values. * @param status receives error codes. - * @return The number of rule status values from rules that determined - * the most recent boundary returned by the break iterator. + * @return The number of rule status values from the rules that determined + * the boundary at the current iteration position. * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value * is the total number of status values that were available, * not the reduced number that were actually returned. @@ -561,7 +548,7 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { * * Create a clone (copy) of this break iterator in memory provided * by the caller. The idea is to increase performance by avoiding - * a storage allocation. Use of this functoin is NOT RECOMMENDED. + * a storage allocation. Use of this function is NOT RECOMMENDED. * Performance gains are minimal, and correct buffer management is * tricky. Use clone() instead. * @@ -574,7 +561,7 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { * storage for the cloned object. * * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be - * returned if the the provided buffer was too small, and + * returned if the provided buffer was too small, and * the clone was therefore put on the heap. * * @return Pointer to the clone object. This may differ from the stackBuffer @@ -597,7 +584,7 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { * The binary data can only be used with the same version of ICU * and on the same platform type (processor endian-ness) * - * @param length Returns the length of the binary data. (Out paramter.) + * @param length Returns the length of the binary data. (Out parameter.) * * @return A pointer to the binary (compiled) rule data. The storage * belongs to the RulesBasedBreakIterator object, not the @@ -645,12 +632,6 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { */ void reset(void); - /** - * Set the type of the break iterator. - * @internal - */ - void setBreakType(int32_t type); - /** * Common initialization function, used by constructors and bufferClone. * @internal @@ -697,6 +678,13 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { * @internal */ void dumpCache(); + + /** + * Debugging function only. + * @internal + */ + void dumpTables(); + #endif /* U_HIDE_INTERNAL_API */ }; diff --git a/deps/icu-small/source/common/unicode/resbund.h b/deps/icu-small/source/common/unicode/resbund.h index 358ed7eeb9e535..ab0b60bbb28fcc 100644 --- a/deps/icu-small/source/common/unicode/resbund.h +++ b/deps/icu-small/source/common/unicode/resbund.h @@ -132,7 +132,7 @@ class U_COMMON_API ResourceBundle : public UObject { ResourceBundle(UErrorCode &err); /** - * Standard constructor, onstructs a resource bundle for the locale-specific + * Standard constructor, constructs a resource bundle for the locale-specific * bundle in the specified package. * * @param packageName The packageName and locale together point to an ICU udata object, diff --git a/deps/icu-small/source/common/unicode/schriter.h b/deps/icu-small/source/common/unicode/schriter.h index d83a57f8d04b57..1a12769e8df106 100644 --- a/deps/icu-small/source/common/unicode/schriter.h +++ b/deps/icu-small/source/common/unicode/schriter.h @@ -69,7 +69,7 @@ class U_COMMON_API StringCharacterIterator : public UCharCharacterIterator { * Create an iterator over the UnicodeString referred to by "textStr". * The UnicodeString object is copied. * The iteration range begins with the code unit specified by - * "textBegin" and ends with the code unit BEFORE the code unit specfied + * "textBegin" and ends with the code unit BEFORE the code unit specified * by "textEnd". The starting position is specified by "textPos". If * "textBegin" and "textEnd" don't form a valid range on "text" (i.e., * textBegin >= textEnd or either is negative or greater than text.size()), diff --git a/deps/icu-small/source/common/unicode/ubidi.h b/deps/icu-small/source/common/unicode/ubidi.h index ef21f2420666cd..254a5bf9ef469f 100644 --- a/deps/icu-small/source/common/unicode/ubidi.h +++ b/deps/icu-small/source/common/unicode/ubidi.h @@ -692,7 +692,7 @@ typedef enum UBiDiReorderingMode { * @stable ICU 3.6 */ UBIDI_REORDER_DEFAULT = 0, /** Logical to Visual algorithm which handles numbers in a way which - * mimicks the behavior of Windows XP. + * mimics the behavior of Windows XP. * @stable ICU 3.6 */ UBIDI_REORDER_NUMBERS_SPECIAL, /** Logical to Visual algorithm grouping numbers with adjacent R characters @@ -1142,7 +1142,7 @@ ubidi_setContext(UBiDi *pBiDi, /** * Perform the Unicode Bidi algorithm. It is defined in the - * Unicode Standard Anned #9, + * Unicode Standard Annex #9, * version 13, * also described in The Unicode Standard, Version 4.0 .

* diff --git a/deps/icu-small/source/common/unicode/ubrk.h b/deps/icu-small/source/common/unicode/ubrk.h index 600328c49c66bc..73c1553b243957 100644 --- a/deps/icu-small/source/common/unicode/ubrk.h +++ b/deps/icu-small/source/common/unicode/ubrk.h @@ -268,7 +268,6 @@ ubrk_openRules(const UChar *rules, UParseError *parseErr, UErrorCode *status); -#ifndef U_HIDE_DRAFT_API /** * Open a new UBreakIterator for locating text boundaries using precompiled binary rules. * Opening a UBreakIterator this way is substantially faster than using ubrk_openRules. @@ -287,15 +286,13 @@ ubrk_openRules(const UChar *rules, * @param status Pointer to UErrorCode to receive any errors. * @return UBreakIterator for the specified rules. * @see ubrk_getBinaryRules - * @draft ICU 59 + * @stable ICU 59 */ -U_DRAFT UBreakIterator* U_EXPORT2 +U_STABLE UBreakIterator* U_EXPORT2 ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, const UChar * text, int32_t textLength, UErrorCode * status); -#endif /* U_HIDE_DRAFT_API */ - /** * Thread safe cloning operation * @param bi iterator to be cloned @@ -510,7 +507,7 @@ ubrk_countAvailable(void); /** -* Returns true if the specfied position is a boundary position. As a side +* Returns true if the specified position is a boundary position. As a side * effect, leaves the iterator pointing to the first boundary position at * or after "offset". * @param bi The break iterator to use. @@ -544,7 +541,7 @@ ubrk_getRuleStatus(UBreakIterator *bi); * @param fillInVec an array to be filled in with the status values. * @param capacity the length of the supplied vector. A length of zero causes * the function to return the number of status values, in the - * normal way, without attemtping to store any values. + * normal way, without attempting to store any values. * @param status receives error codes. * @return The number of rule status values from rules that determined * the most recent boundary returned by the break iterator. @@ -596,7 +593,6 @@ ubrk_refreshUText(UBreakIterator *bi, UErrorCode *status); -#ifndef U_HIDE_DRAFT_API /** * Get a compiled binary version of the rules specifying the behavior of a UBreakIterator. * The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator @@ -620,15 +616,13 @@ ubrk_refreshUText(UBreakIterator *bi, * otherwise 0. If not preflighting and this is larger than * rulesCapacity, *status will be set to an error. * @see ubrk_openBinaryRules - * @draft ICU 59 + * @stable ICU 59 */ -U_DRAFT int32_t U_EXPORT2 +U_STABLE int32_t U_EXPORT2 ubrk_getBinaryRules(UBreakIterator *bi, uint8_t * binaryRules, int32_t rulesCapacity, UErrorCode * status); -#endif /* U_HIDE_DRAFT_API */ - #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ #endif diff --git a/deps/icu-small/source/common/unicode/uchar.h b/deps/icu-small/source/common/unicode/uchar.h index 3613374d9a43a5..4b72ecfc26bf9a 100644 --- a/deps/icu-small/source/common/unicode/uchar.h +++ b/deps/icu-small/source/common/unicode/uchar.h @@ -112,11 +112,11 @@ U_CDECL_BEGIN * Comparison: * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; * most of general categories "Z" (separators) + most whitespace ISO controls - * (including no-break spaces, but excluding IS1..IS4 and ZWSP) + * (including no-break spaces, but excluding IS1..IS4) * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces) * - u_isspace: Z + whitespace ISO controls (including no-break spaces) - * - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP + * - u_isblank: "horizontal spaces" = TAB + Zs */ /** @@ -2702,8 +2702,7 @@ u_isgraph(UChar32 c); * * same as * - * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators) - * except Zero Width Space (ZWSP, U+200B). + * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators). * * Note: There are several ICU whitespace functions; please see the uchar.h * file documentation for a detailed comparison. diff --git a/deps/icu-small/source/common/unicode/uclean.h b/deps/icu-small/source/common/unicode/uclean.h index 3f73af37b83e81..ab0cd6da6ba780 100644 --- a/deps/icu-small/source/common/unicode/uclean.h +++ b/deps/icu-small/source/common/unicode/uclean.h @@ -70,7 +70,7 @@ u_init(UErrorCode *status); * This has the effect of restoring ICU to its initial condition, before * any of these override functions were installed. Refer to * u_setMemoryFunctions(), u_setMutexFunctions and - * utrace_setFunctions(). If ICU is to be reinitialized after after + * utrace_setFunctions(). If ICU is to be reinitialized after * calling u_cleanup(), these runtime override functions will need to * be set up again if they are still required. *

@@ -104,7 +104,7 @@ u_cleanup(void); U_CDECL_BEGIN /** * Pointer type for a user supplied memory allocation function. - * @param context user supplied value, obtained from from u_setMemoryFunctions(). + * @param context user supplied value, obtained from u_setMemoryFunctions(). * @param size The number of bytes to be allocated * @return Pointer to the newly allocated memory, or NULL if the allocation failed. * @stable ICU 2.8 @@ -113,7 +113,7 @@ U_CDECL_BEGIN typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size); /** * Pointer type for a user supplied memory re-allocation function. - * @param context user supplied value, obtained from from u_setMemoryFunctions(). + * @param context user supplied value, obtained from u_setMemoryFunctions(). * @param size The number of bytes to be allocated * @return Pointer to the newly allocated memory, or NULL if the allocation failed. * @stable ICU 2.8 @@ -123,7 +123,7 @@ typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t si /** * Pointer type for a user supplied memory free function. Behavior should be * similar the standard C library free(). - * @param context user supplied value, obtained from from u_setMemoryFunctions(). + * @param context user supplied value, obtained from u_setMemoryFunctions(). * @param mem Pointer to the memory block to be resized * @param size The new size for the block * @return Pointer to the resized memory block, or NULL if the resizing failed. @@ -179,8 +179,8 @@ U_CDECL_BEGIN * The user-supplied function will be called by ICU whenever ICU needs to create a * new mutex. The function implementation should create a mutex, and store a pointer * to something that uniquely identifies the mutex into the UMTX that is supplied - * as a paramter. - * @param context user supplied value, obtained from from u_setMutexFunctions(). + * as a parameter. + * @param context user supplied value, obtained from u_setMutexFunctions(). * @param mutex Receives a pointer that identifies the new mutex. * The mutex init function must set the UMTX to a non-null value. * Subsequent calls by ICU to lock, unlock, or destroy a mutex will @@ -197,7 +197,7 @@ typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX *mutex, UErrorCod * Function Pointer type for a user supplied mutex functions. * One of the user-supplied functions with this signature will be called by ICU * whenever ICU needs to lock, unlock, or destroy a mutex. - * @param context user supplied value, obtained from from u_setMutexFunctions(). + * @param context user supplied value, obtained from u_setMutexFunctions(). * @param mutex specify the mutex on which to operate. * @deprecated ICU 52. This function is no longer supported. * @system @@ -229,7 +229,7 @@ u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtx /** * Pointer type for a user supplied atomic increment or decrement function. - * @param context user supplied value, obtained from from u_setAtomicIncDecFunctions(). + * @param context user supplied value, obtained from u_setAtomicIncDecFunctions(). * @param p Pointer to a 32 bit int to be incremented or decremented * @return The value of the variable after the inc or dec operation. * @deprecated ICU 52. This function is no longer supported. diff --git a/deps/icu-small/source/common/unicode/ucnv.h b/deps/icu-small/source/common/unicode/ucnv.h index 05d0050f4a2fc9..53b4c6f0733aca 100644 --- a/deps/icu-small/source/common/unicode/ucnv.h +++ b/deps/icu-small/source/common/unicode/ucnv.h @@ -207,7 +207,7 @@ typedef void (U_EXPORT2 *UConverterToUCallback) ( /** * Function pointer for error callback in the unicode to codepage direction. - * Called when an error has occured in conversion from unicode, or on open/close of the callback (see reason). + * Called when an error has occurred in conversion from unicode, or on open/close of the callback (see reason). * @param context Pointer to the callback's private data * @param args Information about the conversion in progress * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence @@ -353,7 +353,7 @@ ucnv_compareNames(const char *name1, const char *name2); * ucnv_getAlias for a complete list that is available. * If this parameter is NULL, the default converter will be used. * @param err outgoing error status U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR - * @return the created Unicode converter object, or NULL if an error occured + * @return the created Unicode converter object, or NULL if an error occurred * @see ucnv_openU * @see ucnv_openCCSID * @see ucnv_getAvailableName @@ -386,7 +386,7 @@ ucnv_open(const char *converterName, UErrorCode *err); * @param err outgoing error status U_MEMORY_ALLOCATION_ERROR, * U_FILE_ACCESS_ERROR * @return the created Unicode converter object, or NULL if an - * error occured + * error occurred * @see ucnv_open * @see ucnv_openCCSID * @see ucnv_close @@ -489,7 +489,7 @@ ucnv_openCCSID(int32_t codepage, * @param packageName name of the package (equivalent to 'path' in udata_open() call) * @param converterName name of the data item to be used, without suffix. * @param err outgoing error status U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR - * @return the created Unicode converter object, or NULL if an error occured + * @return the created Unicode converter object, or NULL if an error occurred * @see udata_open * @see ucnv_open * @see ucnv_safeClone diff --git a/deps/icu-small/source/common/unicode/ucnv_err.h b/deps/icu-small/source/common/unicode/ucnv_err.h index e8a79bcd815fbc..08c96c14407c22 100644 --- a/deps/icu-small/source/common/unicode/ucnv_err.h +++ b/deps/icu-small/source/common/unicode/ucnv_err.h @@ -119,19 +119,19 @@ typedef struct UConverter UConverter; #define UCNV_ESCAPE_JAVA "J" /** * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX) - * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX) + * TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX) * @stable ICU 2.0 */ #define UCNV_ESCAPE_C "C" /** * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly - * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly + * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly * @stable ICU 2.0 */ #define UCNV_ESCAPE_XML_DEC "D" /** * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly - * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly + * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly * @stable ICU 2.0 */ #define UCNV_ESCAPE_XML_HEX "X" @@ -171,7 +171,7 @@ typedef enum { code points. The error code U_INVALID_CHAR_FOUND will be set. */ UCNV_RESET = 3, /**< The callback is called with this reason when a - 'reset' has occured. Callback should reset all + 'reset' has occurred. Callback should reset all state. */ UCNV_CLOSE = 4, /**< Called when the converter is closed. The callback should release any allocated memory.*/ @@ -199,7 +199,7 @@ typedef struct { const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ - int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ + int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ } UConverterFromUnicodeArgs; @@ -215,7 +215,7 @@ typedef struct { const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ - int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ + int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ } UConverterToUnicodeArgs; diff --git a/deps/icu-small/source/common/unicode/ucurr.h b/deps/icu-small/source/common/unicode/ucurr.h index ecb54d146f6ee3..adfaf0023bb7cf 100644 --- a/deps/icu-small/source/common/unicode/ucurr.h +++ b/deps/icu-small/source/common/unicode/ucurr.h @@ -103,6 +103,19 @@ typedef enum UCurrNameStyle { * @stable ICU 2.6 */ UCURR_LONG_NAME + +#ifndef U_HIDE_DRAFT_API + , + /** + * Selector for getName() indicating the narrow currency symbol. + * The narrow currency symbol is similar to the regular currency + * symbol, but it always takes the shortest form: for example, + * "$" instead of "US$" for USD in en-CA. + * + * @draft ICU 61 + */ + UCURR_NARROW_SYMBOL_NAME +#endif // U_HIDE_DRAFT_API } UCurrNameStyle; #if !UCONFIG_NO_SERVICE diff --git a/deps/icu-small/source/common/unicode/umachine.h b/deps/icu-small/source/common/unicode/umachine.h index 30de4dba0dbf5b..a9dc1631b00e70 100644 --- a/deps/icu-small/source/common/unicode/umachine.h +++ b/deps/icu-small/source/common/unicode/umachine.h @@ -299,6 +299,10 @@ typedef int8_t UBool; // for AIX, uchar.h needs to be included # include # define U_CHAR16_IS_TYPEDEF 1 +#elif defined(_MSC_VER) && (_MSC_VER < 1900) +// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type, +// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx +# define U_CHAR16_IS_TYPEDEF 1 #else # define U_CHAR16_IS_TYPEDEF 0 #endif @@ -366,7 +370,7 @@ typedef int8_t UBool; * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined. * The current UChar responds to UCHAR_TYPE but OldUChar does not. * - * @draft ICU 59 + * @stable ICU 59 */ #if U_SIZEOF_WCHAR_T==2 typedef wchar_t OldUChar; diff --git a/deps/icu-small/source/common/unicode/uniset.h b/deps/icu-small/source/common/unicode/uniset.h index 914818a00ec41a..c2e0ad48bd7b2c 100644 --- a/deps/icu-small/source/common/unicode/uniset.h +++ b/deps/icu-small/source/common/unicode/uniset.h @@ -1521,6 +1521,7 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { UnicodeString& rebuiltPat, uint32_t options, UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute), + int32_t depth, UErrorCode& ec); //---------------------------------------------------------------- diff --git a/deps/icu-small/source/common/unicode/unistr.h b/deps/icu-small/source/common/unicode/unistr.h index b99a686126c4e1..d0b271754b660e 100644 --- a/deps/icu-small/source/common/unicode/unistr.h +++ b/deps/icu-small/source/common/unicode/unistr.h @@ -2995,10 +2995,6 @@ class U_COMMON_API UnicodeString : public Replaceable */ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text); - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ #if !U_CHAR16_IS_TYPEDEF /** * uint16_t * constructor. @@ -3008,16 +3004,12 @@ class U_COMMON_API UnicodeString : public Replaceable * -DUNISTR_FROM_STRING_EXPLICIT=explicit * on the compiler command line or similar. * @param text NUL-terminated UTF-16 string - * @draft ICU 59 + * @stable ICU 59 */ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) : UnicodeString(ConstChar16Ptr(text)) {} #endif - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) /** * wchar_t * constructor. @@ -3028,16 +3020,12 @@ class U_COMMON_API UnicodeString : public Replaceable * -DUNISTR_FROM_STRING_EXPLICIT=explicit * on the compiler command line or similar. * @param text NUL-terminated UTF-16 string - * @draft ICU 59 + * @stable ICU 59 */ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) : UnicodeString(ConstChar16Ptr(text)) {} #endif - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ /** * nullptr_t constructor. * Effectively the same as the default constructor, makes an empty string object. @@ -3046,7 +3034,7 @@ class U_COMMON_API UnicodeString : public Replaceable * -DUNISTR_FROM_STRING_EXPLICIT=explicit * on the compiler command line or similar. * @param text nullptr - * @draft ICU 59 + * @stable ICU 59 */ UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text); @@ -3060,26 +3048,18 @@ class U_COMMON_API UnicodeString : public Replaceable UnicodeString(const char16_t *text, int32_t textLength); - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ #if !U_CHAR16_IS_TYPEDEF /** * uint16_t * constructor. * Delegates to UnicodeString(const char16_t *, int32_t). * @param text UTF-16 string * @param length string length - * @draft ICU 59 + * @stable ICU 59 */ UnicodeString(const uint16_t *text, int32_t length) : UnicodeString(ConstChar16Ptr(text), length) {} #endif - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) /** * wchar_t * constructor. @@ -3087,22 +3067,18 @@ class U_COMMON_API UnicodeString : public Replaceable * Delegates to UnicodeString(const char16_t *, int32_t). * @param text NUL-terminated UTF-16 string * @param length string length - * @draft ICU 59 + * @stable ICU 59 */ UnicodeString(const wchar_t *text, int32_t length) : UnicodeString(ConstChar16Ptr(text), length) {} #endif - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ /** * nullptr_t constructor. * Effectively the same as the default constructor, makes an empty string object. * @param text nullptr * @param length ignored - * @draft ICU 59 + * @stable ICU 59 */ inline UnicodeString(const std::nullptr_t text, int32_t length); @@ -3152,10 +3128,6 @@ class U_COMMON_API UnicodeString : public Replaceable */ UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity); - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ #if !U_CHAR16_IS_TYPEDEF /** * Writable-aliasing uint16_t * constructor. @@ -3163,16 +3135,12 @@ class U_COMMON_API UnicodeString : public Replaceable * @param buffer writable buffer of/for UTF-16 text * @param buffLength length of the current buffer contents * @param buffCapacity buffer capacity - * @draft ICU 59 + * @stable ICU 59 */ UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) : UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} #endif - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) /** * Writable-aliasing wchar_t * constructor. @@ -3181,23 +3149,19 @@ class U_COMMON_API UnicodeString : public Replaceable * @param buffer writable buffer of/for UTF-16 text * @param buffLength length of the current buffer contents * @param buffCapacity buffer capacity - * @draft ICU 59 + * @stable ICU 59 */ UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) : UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} #endif - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ /** * Writable-aliasing nullptr_t constructor. * Effectively the same as the default constructor, makes an empty string object. * @param buffer nullptr * @param buffLength ignored * @param buffCapacity ignored - * @draft ICU 59 + * @stable ICU 59 */ inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity); diff --git a/deps/icu-small/source/common/unicode/urename.h b/deps/icu-small/source/common/unicode/urename.h index 982655c4425e0b..d8ab85091f5721 100644 --- a/deps/icu-small/source/common/unicode/urename.h +++ b/deps/icu-small/source/common/unicode/urename.h @@ -107,7 +107,6 @@ #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) #define allowedHourFormatsCleanup U_ICU_ENTRY_POINT_RENAME(allowedHourFormatsCleanup) -#define checkImpl U_ICU_ENTRY_POINT_RENAME(checkImpl) #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) #define dayPeriodRulesCleanup U_ICU_ENTRY_POINT_RENAME(dayPeriodRulesCleanup) #define deleteAllowedHourFormats U_ICU_ENTRY_POINT_RENAME(deleteAllowedHourFormats) @@ -446,7 +445,6 @@ #define ubidi_getReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingOptions) #define ubidi_getResultLength U_ICU_ENTRY_POINT_RENAME(ubidi_getResultLength) #define ubidi_getRuns U_ICU_ENTRY_POINT_RENAME(ubidi_getRuns) -#define ubidi_getSingleton U_ICU_ENTRY_POINT_RENAME(ubidi_getSingleton) #define ubidi_getText U_ICU_ENTRY_POINT_RENAME(ubidi_getText) #define ubidi_getVisualIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualIndex) #define ubidi_getVisualMap U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualMap) @@ -551,6 +549,7 @@ #define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure) #define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold) #define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale) +#define ucase_getTrie U_ICU_ENTRY_POINT_RENAME(ucase_getTrie) #define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType) #define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable) #define ucase_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(ucase_hasBinaryProperty) @@ -862,6 +861,7 @@ #define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions) #define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat) #define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal) +#define udatpg_getFieldDisplayName U_ICU_ENTRY_POINT_RENAME(udatpg_getFieldDisplayName) #define udatpg_getPatternForSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getPatternForSkeleton) #define udatpg_getSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getSkeleton) #define udatpg_open U_ICU_ENTRY_POINT_RENAME(udatpg_open) @@ -1326,7 +1326,6 @@ #define uprv_getRawUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getRawUTCtime) #define uprv_getStaticCurrencyName U_ICU_ENTRY_POINT_RENAME(uprv_getStaticCurrencyName) #define uprv_getUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getUTCtime) -#define uprv_haveProperties U_ICU_ENTRY_POINT_RENAME(uprv_haveProperties) #define uprv_int32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_int32Comparator) #define uprv_isASCIILetter U_ICU_ENTRY_POINT_RENAME(uprv_isASCIILetter) #define uprv_isInfinite U_ICU_ENTRY_POINT_RENAME(uprv_isInfinite) diff --git a/deps/icu-small/source/common/unicode/ures.h b/deps/icu-small/source/common/unicode/ures.h index 918b9f208e2cd0..af0ce76f25b1a2 100644 --- a/deps/icu-small/source/common/unicode/ures.h +++ b/deps/icu-small/source/common/unicode/ures.h @@ -16,7 +16,7 @@ * 04/04/99 helena Fixed internal header inclusion. * 04/15/99 Madhu Updated Javadoc * 06/14/99 stephen Removed functions taking a filename suffix. -* 07/20/99 stephen Language-independent ypedef to void* +* 07/20/99 stephen Language-independent typedef to void* * 11/09/99 weiv Added ures_getLocale() * 06/24/02 weiv Added support for resource sharing ****************************************************************************** @@ -138,7 +138,7 @@ typedef enum { /** * Opens a UResourceBundle, from which users can extract strings by using * their corresponding keys. - * Note that the caller is responsible of calling ures_close on each succesfully + * Note that the caller is responsible of calling ures_close on each successfully * opened resource bundle. * @param packageName The packageName and locale together point to an ICU udata object, * as defined by udata_open( packageName, "res", locale, err) @@ -301,7 +301,7 @@ ures_getVersion(const UResourceBundle* resB, * you to query for the real locale of the resource. For example, if you requested * "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned. * For subresources, the locale where this resource comes from will be returned. - * If fallback has occured, getLocale will reflect this. + * If fallback has occurred, getLocale will reflect this. * * @param resourceBundle resource bundle in question * @param status just for catching illegal arguments @@ -580,7 +580,7 @@ ures_hasNext(const UResourceBundle *resourceBundle); * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller. * Alternatively, you can supply a struct to be filled by this function. * @param status fills in the outgoing error code. You may still get a non NULL result even if an - * error occured. Check status instead. + * error occurred. Check status instead. * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it * @stable ICU 2.0 */ @@ -596,7 +596,7 @@ ures_getNextResource(UResourceBundle *resourceBundle, * @param resourceBundle a resource * @param len fill in length of the string * @param key fill in for key associated with this string. NULL if no key - * @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't + * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't * count on it. Check status instead! * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file. * @stable ICU 2.0 @@ -615,7 +615,7 @@ ures_getNextString(UResourceBundle *resourceBundle, * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller. * Alternatively, you can supply a struct to be filled by this function. * @param status fills in the outgoing error code. Don't count on NULL being returned if an error has - * occured. Check status instead. + * occurred. Check status instead. * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it * @stable ICU 2.0 */ @@ -631,7 +631,7 @@ ures_getByIndex(const UResourceBundle *resourceBundle, * @param resourceBundle a resource * @param indexS an index to the wanted string. * @param len fill in length of the string - * @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't + * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't * count on it. Check status instead! * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file. * @stable ICU 2.0 @@ -722,7 +722,7 @@ ures_getByKey(const UResourceBundle *resourceBundle, * @param resB a resource * @param key a key associated with the wanted string * @param len fill in length of the string - * @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't + * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't * count on it. Check status instead! * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file. * @stable ICU 2.0 diff --git a/deps/icu-small/source/common/unicode/uscript.h b/deps/icu-small/source/common/unicode/uscript.h index 3ec235d50ce2c2..0befa1cd422c20 100644 --- a/deps/icu-small/source/common/unicode/uscript.h +++ b/deps/icu-small/source/common/unicode/uscript.h @@ -476,7 +476,7 @@ typedef enum UScriptCode { * @param nameOrAbbrOrLocale name of the script, as given in * PropertyValueAliases.txt, or ISO 15924 code or locale * @param fillIn the UScriptCode buffer to fill in the script code - * @param capacity the capacity (size) fo UScriptCode buffer passed in. + * @param capacity the capacity (size) of UScriptCode buffer passed in. * @param err the error status code. * @return The number of script codes filled in the buffer passed in * @stable ICU 2.4 diff --git a/deps/icu-small/source/common/unicode/ushape.h b/deps/icu-small/source/common/unicode/ushape.h index 5af8ffe1c58c31..3064e0857281c5 100644 --- a/deps/icu-small/source/common/unicode/ushape.h +++ b/deps/icu-small/source/common/unicode/ushape.h @@ -93,7 +93,7 @@ * which must not indicate a failure before the function call. * * @return The number of UChars written to the destination buffer. - * If an error occured, then no output was written, or it may be + * If an error occurred, then no output was written, or it may be * incomplete. If U_BUFFER_OVERFLOW_ERROR is set, then * the return value indicates the necessary destination buffer size. * @stable ICU 2.0 diff --git a/deps/icu-small/source/common/unicode/usprep.h b/deps/icu-small/source/common/unicode/usprep.h index 33ca1461ce0813..7cdc6cdd18ea6b 100644 --- a/deps/icu-small/source/common/unicode/usprep.h +++ b/deps/icu-small/source/common/unicode/usprep.h @@ -33,14 +33,14 @@ * StringPrep prepares Unicode strings for use in network protocols. * Profiles of StingPrep are set of rules and data according to with the * Unicode Strings are prepared. Each profiles contains tables which describe - * how a code point should be treated. The tables are broadly classied into + * how a code point should be treated. The tables are broadly classified into *

    - *
  • Unassinged Table: Contains code points that are unassigned + *
  • Unassigned Table: Contains code points that are unassigned * in the Unicode Version supported by StringPrep. Currently * RFC 3454 supports Unicode 3.2.
  • - *
  • Prohibited Table: Contains code points that are prohibted from + *
  • Prohibited Table: Contains code points that are prohibited from * the output of the StringPrep processing function.
  • - *
  • Mapping Table: Contains code ponts that are deleted from the output or case mapped.
  • + *
  • Mapping Table: Contains code points that are deleted from the output or case mapped.
  • *
* * The procedure for preparing Unicode strings: @@ -230,7 +230,7 @@ U_NAMESPACE_END /** * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC), - * checks for prohited and BiDi characters in the order defined by RFC 3454 + * checks for prohibited and BiDi characters in the order defined by RFC 3454 * depending on the options specified in the profile. * * @param prep The profile to use diff --git a/deps/icu-small/source/common/unicode/ustring.h b/deps/icu-small/source/common/unicode/ustring.h index 1ea27126cc4de9..cf6ec0b6b4c9d2 100644 --- a/deps/icu-small/source/common/unicode/ustring.h +++ b/deps/icu-small/source/common/unicode/ustring.h @@ -403,7 +403,7 @@ u_strspn(const UChar *string, const UChar *matchSet); * @param saveState The current pointer within the original string, * which is set by this function. The saveState * parameter should the address of a local variable of type - * UChar *. (i.e. defined "Uhar *myLocalSaveState" and use + * UChar *. (i.e. defined "UChar *myLocalSaveState" and use * &myLocalSaveState for this parameter). * @return A pointer to the next token found in src, or NULL * when there are no more tokens. @@ -884,7 +884,7 @@ u_memrchr32(const UChar *s, UChar32 c, int32_t count); * Unicode String literals in C. * We need one macro to declare a variable for the string * and to statically preinitialize it if possible, - * and a second macro to dynamically intialize such a string variable if necessary. + * and a second macro to dynamically initialize such a string variable if necessary. * * The macros are defined for maximum performance. * They work only for strings that contain "invariant characters", i.e., diff --git a/deps/icu-small/source/common/unicode/utext.h b/deps/icu-small/source/common/unicode/utext.h index 7eea1da240c1d5..51d11a2e00efb4 100644 --- a/deps/icu-small/source/common/unicode/utext.h +++ b/deps/icu-small/source/common/unicode/utext.h @@ -655,10 +655,10 @@ utext_getPreviousNativeIndex(UText *ut); * @param ut the UText from which to extract data. * @param nativeStart the native index of the first character to extract.\ * If the specified index is out of range, - * it will be pinned to to be within 0 <= index <= textLength + * it will be pinned to be within 0 <= index <= textLength * @param nativeLimit the native string index of the position following the last * character to extract. If the specified index is out of range, - * it will be pinned to to be within 0 <= index <= textLength. + * it will be pinned to be within 0 <= index <= textLength. * nativeLimit must be >= nativeStart. * @param dest the UChar (UTF-16) buffer into which the extracted text is placed * @param destCapacity The size, in UChars, of the destination buffer. May be zero @@ -906,7 +906,7 @@ utext_copy(UText *ut, * Caution: freezing a UText will disable changes made via the specific * frozen UText wrapper only; it will not have any effect on the ability to * directly modify the text by bypassing the UText. Any such backdoor modifications - * are always an error while UText access is occuring because the underlying + * are always an error while UText access is occurring because the underlying * text can get out of sync with UText's buffering. *

* @@ -1452,7 +1452,7 @@ struct UText { void *pExtra; /** - * (protected) Pointer to string or text-containin object or similar. + * (protected) Pointer to string or text-containing object or similar. * This is the source of the text that this UText is wrapping, in a format * that is known to the text provider functions. * @stable ICU 3.4 diff --git a/deps/icu-small/source/common/unicode/utf8.h b/deps/icu-small/source/common/unicode/utf8.h index 59b4b2557021f0..1f076343590240 100644 --- a/deps/icu-small/source/common/unicode/utf8.h +++ b/deps/icu-small/source/common/unicode/utf8.h @@ -348,29 +348,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_NEXT_UNSAFE * @stable ICU 2.4 */ -#define U8_NEXT(s, i, length, c) { \ - (c)=(uint8_t)(s)[(i)++]; \ - if(!U8_IS_SINGLE(c)) { \ - uint8_t __t1, __t2; \ - if( /* handle U+0800..U+FFFF inline */ \ - (0xe0<=(c) && (c)<0xf0) && \ - (((i)+1)<(length) || (length)<0) && \ - U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \ - (__t2=(s)[(i)+1]-0x80)<=0x3f) { \ - (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \ - (i)+=2; \ - } else if( /* handle U+0080..U+07FF inline */ \ - ((c)<0xe0 && (c)>=0xc2) && \ - ((i)!=(length)) && \ - (__t1=(s)[i]-0x80)<=0x3f) { \ - (c)=(((c)&0x1f)<<6)|__t1; \ - ++(i); \ - } else { \ - /* function call for "complicated" and error cases */ \ - (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -1); \ - } \ - } \ -} +#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL) /** * Get a code point from a string at a code point boundary offset, @@ -396,26 +374,33 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_NEXT * @stable ICU 51 */ -#define U8_NEXT_OR_FFFD(s, i, length, c) { \ +#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd) + +/** @internal */ +#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) { \ (c)=(uint8_t)(s)[(i)++]; \ if(!U8_IS_SINGLE(c)) { \ - uint8_t __t1, __t2; \ - if( /* handle U+0800..U+FFFF inline */ \ - (0xe0<=(c) && (c)<0xf0) && \ - (((i)+1)<(length) || (length)<0) && \ - U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \ - (__t2=(s)[(i)+1]-0x80)<=0x3f) { \ - (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \ - (i)+=2; \ - } else if( /* handle U+0080..U+07FF inline */ \ - ((c)<0xe0 && (c)>=0xc2) && \ - ((i)!=(length)) && \ - (__t1=(s)[i]-0x80)<=0x3f) { \ - (c)=(((c)&0x1f)<<6)|__t1; \ - ++(i); \ + uint8_t __t = 0; \ + if((i)!=(length) && \ + /* fetch/validate/assemble all but last trail byte */ \ + ((c)>=0xe0 ? \ + ((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \ + U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \ + (__t&=0x3f, 1) \ + : /* U+10000..U+10FFFF */ \ + ((c)-=0xf0)<=4 && \ + U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \ + ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \ + (__t=(s)[i]-0x80)<=0x3f) && \ + /* valid second-to-last trail byte */ \ + ((c)=((c)<<6)|__t, ++(i)!=(length)) \ + : /* U+0080..U+07FF */ \ + (c)>=0xc2 && ((c)&=0x1f, 1)) && \ + /* last trail byte */ \ + (__t=(s)[i]-0x80)<=0x3f && \ + ((c)=((c)<<6)|__t, ++(i), 1)) { \ } else { \ - /* function call for "complicated" and error cases */ \ - (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -3); \ + (c)=(sub); /* ill-formed*/ \ } \ } \ } @@ -434,21 +419,22 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @stable ICU 2.4 */ #define U8_APPEND_UNSAFE(s, i, c) { \ - if((uint32_t)(c)<=0x7f) { \ - (s)[(i)++]=(uint8_t)(c); \ + uint32_t __uc=(c); \ + if(__uc<=0x7f) { \ + (s)[(i)++]=(uint8_t)__uc; \ } else { \ - if((uint32_t)(c)<=0x7ff) { \ - (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ + if(__uc<=0x7ff) { \ + (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \ } else { \ - if((uint32_t)(c)<=0xffff) { \ - (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ + if(__uc<=0xffff) { \ + (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \ } else { \ - (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ - (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \ } \ - (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ } \ - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ } \ } @@ -470,17 +456,23 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @stable ICU 2.4 */ #define U8_APPEND(s, i, capacity, c, isError) { \ - if((uint32_t)(c)<=0x7f) { \ - (s)[(i)++]=(uint8_t)(c); \ - } else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \ - (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ - } else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \ - (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ - (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ + uint32_t __uc=(c); \ + if(__uc<=0x7f) { \ + (s)[(i)++]=(uint8_t)__uc; \ + } else if(__uc<=0x7ff && (i)+1<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ } else { \ - (i)=utf8_appendCharSafeBody(s, (i), (capacity), c, &(isError)); \ + (isError)=TRUE; \ } \ } @@ -600,12 +592,15 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * If the offset points to a UTF-8 trail byte, * then the offset is moved backward to the corresponding lead byte. * Otherwise, it is not modified. + * * "Safe" macro, checks for illegal sequences and for string boundaries. + * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i]. * * @param s const uint8_t * string * @param start int32_t starting string offset (usually 0) * @param i int32_t string offset, must be start<=i * @see U8_SET_CP_START_UNSAFE + * @see U8_TRUNCATE_IF_INCOMPLETE * @stable ICU 2.4 */ #define U8_SET_CP_START(s, start, i) { \ @@ -614,6 +609,57 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); } \ } +#ifndef U_HIDE_DRAFT_API +/** + * If the string ends with a UTF-8 byte sequence that is valid so far + * but incomplete, then reduce the length of the string to end before + * the lead byte of that incomplete sequence. + * For example, if the string ends with E1 80, the length is reduced by 2. + * + * In all other cases (the string ends with a complete sequence, or it is not + * possible for any further trail byte to extend the trailing sequence) + * the length remains unchanged. + * + * Useful for processing text split across multiple buffers + * (save the incomplete sequence for later) + * and for optimizing iteration + * (check for string length only once per character). + * + * "Safe" macro, checks for illegal sequences and for string boundaries. + * Unlike U8_SET_CP_START(), this macro never reads s[length]. + * + * (In UTF-16, simply check for U16_IS_LEAD(last code unit).) + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param length int32_t string length (usually start<=length) + * @see U8_SET_CP_START + * @draft ICU 61 + */ +#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) \ + if((length)>(start)) { \ + uint8_t __b1=s[(length)-1]; \ + if(U8_IS_SINGLE(__b1)) { \ + /* common ASCII character */ \ + } else if(U8_IS_LEAD(__b1)) { \ + --(length); \ + } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \ + uint8_t __b2=s[(length)-2]; \ + if(0xe0<=__b2 && __b2<=0xf4) { \ + if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \ + U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \ + (length)-=2; \ + } \ + } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \ + uint8_t __b3=s[(length)-3]; \ + if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \ + (length)-=3; \ + } \ + } \ + } \ + } +#endif // U_HIDE_DRAFT_API + /* definitions with backward iteration -------------------------------------- */ /** diff --git a/deps/icu-small/source/common/unicode/utrace.h b/deps/icu-small/source/common/unicode/utrace.h index 5d561109c7f8fd..bf6fd036f06c41 100644 --- a/deps/icu-small/source/common/unicode/utrace.h +++ b/deps/icu-small/source/common/unicode/utrace.h @@ -183,7 +183,7 @@ UTraceData(const void *context, int32_t fnNumber, int32_t level, * tracing functions must themselves filter by checking that the * current thread is the desired thread. * - * @param context an uninterpretted pointer. Whatever is passed in + * @param context an uninterpreted pointer. Whatever is passed in * here will in turn be passed to each of the tracing * functions UTraceEntry, UTraceExit and UTraceData. * ICU does not use or alter this pointer. @@ -320,7 +320,7 @@ utrace_getFunctions(const void **context, * human readable form. Note that a UTraceData function may choose * to not format the data; it could, for example, save it in * in the raw form it was received (more compact), leaving - * formatting for a later trace analyis tool. + * formatting for a later trace analysis tool. * @param outBuf pointer to a buffer to receive the formatted output. Output * will be nul terminated if there is space in the buffer - * if the length of the requested output < the output buffer size. diff --git a/deps/icu-small/source/common/unicode/utypes.h b/deps/icu-small/source/common/unicode/utypes.h index 4c40e6a87c7653..b6cf4965112a16 100644 --- a/deps/icu-small/source/common/unicode/utypes.h +++ b/deps/icu-small/source/common/unicode/utypes.h @@ -145,7 +145,7 @@ /** * U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library. * Defined as a literal, not a string. - * Tricky Preprocessor use - ## operator replaces macro paramters with the literal string + * Tricky Preprocessor use - ## operator replaces macro parameters with the literal string * from the corresponding macro invocation, _before_ other macro substitutions. * Need a nested \#defines to get the actual version numbers rather than * the literal text U_ICU_VERSION_MAJOR_NUM into the name. @@ -446,14 +446,14 @@ typedef enum UErrorCode { U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */ U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */ U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */ - U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illlegal escape sequence */ + U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illegal escape sequence */ U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */ U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */ U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */ U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */ U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */ U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource. - It is very possible that a circular alias definition has occured */ + It is very possible that a circular alias definition has occurred */ U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */ U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */ U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */ @@ -499,7 +499,7 @@ typedef enum UErrorCode { U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */ U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */ U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */ - U_MALFORMED_PRAGMA, /**< A 'use' pragma is invlalid */ + U_MALFORMED_PRAGMA, /**< A 'use' pragma is invalid */ U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */ U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */ U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */ @@ -539,12 +539,15 @@ typedef enum UErrorCode { U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */ U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */ U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */ +#ifndef U_HIDE_DRAFT_API + U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @draft ICU 61 */ +#endif // U_HIDE_DRAFT_API #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal formatting API error code. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - U_FMT_PARSE_ERROR_LIMIT, + U_FMT_PARSE_ERROR_LIMIT = 0x10113, #endif // U_HIDE_DEPRECATED_API /* @@ -555,7 +558,7 @@ typedef enum UErrorCode { U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */ U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */ U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */ - U_BRK_UNCLOSED_SET, /**< UnicodeSet witing an RBBI rule missing a closing ']'. */ + U_BRK_UNCLOSED_SET, /**< UnicodeSet writing an RBBI rule missing a closing ']'. */ U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */ U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */ U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */ @@ -564,7 +567,7 @@ typedef enum UErrorCode { U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */ U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */ U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */ - U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is mal formed */ + U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is malformed */ #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal BreakIterator error code. diff --git a/deps/icu-small/source/common/unicode/uvernum.h b/deps/icu-small/source/common/unicode/uvernum.h index d905a0f50d1d29..0427bcb03db4e3 100644 --- a/deps/icu-small/source/common/unicode/uvernum.h +++ b/deps/icu-small/source/common/unicode/uvernum.h @@ -58,13 +58,13 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION_MAJOR_NUM 60 +#define U_ICU_VERSION_MAJOR_NUM 61 /** The current ICU minor version as an integer. * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_MINOR_NUM 2 +#define U_ICU_VERSION_MINOR_NUM 1 /** The current ICU patchlevel version as an integer. * This value will change in the subsequent releases of ICU @@ -84,7 +84,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_SUFFIX _60 +#define U_ICU_VERSION_SUFFIX _61 /** * \def U_DEF2_ICU_ENTRY_POINT_RENAME @@ -119,19 +119,26 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION "60.2" +#define U_ICU_VERSION "61.1" -/** The current ICU library major/minor version as a string without dots, for library name suffixes. - * This value will change in the subsequent releases of ICU - * @stable ICU 2.6 +/** + * The current ICU library major version number as a string, for library name suffixes. + * This value will change in subsequent releases of ICU. + * + * Until ICU 4.8, this was the combination of the single-digit major and minor ICU version numbers + * into one string without dots ("48"). + * Since ICU 49, it is the double-digit major ICU version number. + * See http://userguide.icu-project.org/design#TOC-Version-Numbers-in-ICU + * + * @stable ICU 2.6 */ -#define U_ICU_VERSION_SHORT "60" +#define U_ICU_VERSION_SHORT "61" #ifndef U_HIDE_INTERNAL_API /** Data version in ICU4C. * @internal ICU 4.4 Internal Use Only **/ -#define U_ICU_DATA_VERSION "60.2" +#define U_ICU_DATA_VERSION "61.1" #endif /* U_HIDE_INTERNAL_API */ /*=========================================================================== diff --git a/deps/icu-small/source/common/unicode/uversion.h b/deps/icu-small/source/common/unicode/uversion.h index cda24b6e0fc5ba..3f0251d3994bcb 100644 --- a/deps/icu-small/source/common/unicode/uversion.h +++ b/deps/icu-small/source/common/unicode/uversion.h @@ -105,7 +105,7 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; * @stable ICU 2.4 */ -/* Define namespace symbols if the compiler supports it. */ +/* Define C++ namespace symbols. */ #ifdef __cplusplus # if U_DISABLE_RENAMING # define U_ICU_NAMESPACE icu @@ -122,7 +122,13 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; # define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE:: # ifndef U_USING_ICU_NAMESPACE -# define U_USING_ICU_NAMESPACE 1 +# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ + defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \ + defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION) +# define U_USING_ICU_NAMESPACE 0 +# else +# define U_USING_ICU_NAMESPACE 0 +# endif # endif # if U_USING_ICU_NAMESPACE U_NAMESPACE_USE diff --git a/deps/icu-small/source/common/unifiedcache.cpp b/deps/icu-small/source/common/unifiedcache.cpp index fd0be593d786f2..f0f660ed06bb19 100644 --- a/deps/icu-small/source/common/unifiedcache.cpp +++ b/deps/icu-small/source/common/unifiedcache.cpp @@ -6,24 +6,26 @@ * others. All Rights Reserved. ****************************************************************************** * -* File UNIFIEDCACHE.CPP +* File unifiedcache.cpp ****************************************************************************** */ -#include "uhash.h" #include "unifiedcache.h" -#include "umutex.h" + +#include // For std::max() + #include "mutex.h" #include "uassert.h" +#include "uhash.h" #include "ucln_cmn.h" +#include "umutex.h" static icu::UnifiedCache *gCache = NULL; -static icu::SharedObject *gNoValue = NULL; static UMutex gCacheMutex = U_MUTEX_INITIALIZER; static UConditionVar gInProgressValueAddedCond = U_CONDITION_INITIALIZER; static icu::UInitOnce gCacheInitOnce = U_INITONCE_INITIALIZER; -static const int32_t MAX_EVICT_ITERATIONS = 10; +static const int32_t MAX_EVICT_ITERATIONS = 10; static const int32_t DEFAULT_MAX_UNUSED = 1000; static const int32_t DEFAULT_PERCENTAGE_OF_IN_USE = 100; @@ -35,10 +37,6 @@ static UBool U_CALLCONV unifiedcache_cleanup() { delete gCache; gCache = NULL; } - if (gNoValue) { - delete gNoValue; - gNoValue = NULL; - } return TRUE; } U_CDECL_END @@ -73,23 +71,15 @@ static void U_CALLCONV cacheInit(UErrorCode &status) { ucln_common_registerCleanup( UCLN_COMMON_UNIFIED_CACHE, unifiedcache_cleanup); - // gNoValue must be created first to avoid assertion error in - // cache constructor. - gNoValue = new SharedObject(); gCache = new UnifiedCache(status); if (gCache == NULL) { status = U_MEMORY_ALLOCATION_ERROR; } if (U_FAILURE(status)) { delete gCache; - delete gNoValue; gCache = NULL; - gNoValue = NULL; return; } - // We add a softref because we want hash elements with gNoValue to be - // elligible for purging but we don't ever want gNoValue to be deleted. - gNoValue->addSoftRef(); } UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) { @@ -104,14 +94,24 @@ UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) { UnifiedCache::UnifiedCache(UErrorCode &status) : fHashtable(NULL), fEvictPos(UHASH_FIRST), - fItemsInUseCount(0), + fNumValuesTotal(0), + fNumValuesInUse(0), fMaxUnused(DEFAULT_MAX_UNUSED), fMaxPercentageOfInUse(DEFAULT_PERCENTAGE_OF_IN_USE), - fAutoEvictedCount(0) { + fAutoEvictedCount(0), + fNoValue(nullptr) { if (U_FAILURE(status)) { return; } - U_ASSERT(gNoValue != NULL); + fNoValue = new SharedObject(); + if (fNoValue == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + fNoValue->softRefCount = 1; // Add fake references to prevent fNoValue from being deleted + fNoValue->hardRefCount = 1; // when other references to it are removed. + fNoValue->cachePtr = this; + fHashtable = uhash_open( &ucache_hashKeys, &ucache_compareKeys, @@ -139,7 +139,7 @@ void UnifiedCache::setEvictionPolicy( int32_t UnifiedCache::unusedCount() const { Mutex lock(&gCacheMutex); - return uhash_count(fHashtable) - fItemsInUseCount; + return uhash_count(fHashtable) - fNumValuesInUse; } int64_t UnifiedCache::autoEvictedCount() const { @@ -161,6 +161,12 @@ void UnifiedCache::flush() const { while (_flush(FALSE)); } +void UnifiedCache::handleUnreferencedObject() const { + Mutex lock(&gCacheMutex); + --fNumValuesInUse; + _runEvictionSlice(); +} + #ifdef UNIFIED_CACHE_DEBUG #include @@ -199,7 +205,7 @@ void UnifiedCache::_dumpContents() const { "Unified Cache: Key '%s', error %d, value %p, total refcount %d, soft refcount %d\n", key->writeDescription(buffer, 256), key->creationStatus, - sharedObject == gNoValue ? NULL :sharedObject, + sharedObject == fNoValue ? NULL :sharedObject, sharedObject->getRefCount(), sharedObject->getSoftRefCount()); } @@ -219,10 +225,11 @@ UnifiedCache::~UnifiedCache() { _flush(TRUE); } uhash_close(fHashtable); + fHashtable = nullptr; + delete fNoValue; + fNoValue = nullptr; } -// Returns the next element in the cache round robin style. -// On entry, gCacheMutex must be held. const UHashElement * UnifiedCache::_nextElement() const { const UHashElement *element = uhash_nextElement(fHashtable, &fEvictPos); @@ -233,46 +240,36 @@ UnifiedCache::_nextElement() const { return element; } -// Flushes the contents of the cache. If cache values hold references to other -// cache values then _flush should be called in a loop until it returns FALSE. -// On entry, gCacheMutex must be held. -// On exit, those values with are evictable are flushed. If all is true -// then every value is flushed even if it is not evictable. -// Returns TRUE if any value in cache was flushed or FALSE otherwise. UBool UnifiedCache::_flush(UBool all) const { UBool result = FALSE; int32_t origSize = uhash_count(fHashtable); for (int32_t i = 0; i < origSize; ++i) { const UHashElement *element = _nextElement(); + if (element == nullptr) { + break; + } if (all || _isEvictable(element)) { const SharedObject *sharedObject = (const SharedObject *) element->value.pointer; + U_ASSERT(sharedObject->cachePtr = this); uhash_removeElement(fHashtable, element); - sharedObject->removeSoftRef(); + removeSoftRef(sharedObject); // Deletes the sharedObject when softRefCount goes to zero. result = TRUE; } } return result; } -// Computes how many items should be evicted. -// On entry, gCacheMutex must be held. -// Returns number of items that should be evicted or a value <= 0 if no -// items need to be evicted. int32_t UnifiedCache::_computeCountOfItemsToEvict() const { - int32_t maxPercentageOfInUseCount = - fItemsInUseCount * fMaxPercentageOfInUse / 100; - int32_t maxUnusedCount = fMaxUnused; - if (maxUnusedCount < maxPercentageOfInUseCount) { - maxUnusedCount = maxPercentageOfInUseCount; - } - return uhash_count(fHashtable) - fItemsInUseCount - maxUnusedCount; + int32_t totalItems = uhash_count(fHashtable); + int32_t evictableItems = totalItems - fNumValuesInUse; + + int32_t unusedLimitByPercentage = fNumValuesInUse * fMaxPercentageOfInUse / 100; + int32_t unusedLimit = std::max(unusedLimitByPercentage, fMaxUnused); + int32_t countOfItemsToEvict = std::max(0, evictableItems - unusedLimit); + return countOfItemsToEvict; } -// Run an eviction slice. -// On entry, gCacheMutex must be held. -// _runEvictionSlice runs a slice of the evict pipeline by examining the next -// 10 entries in the cache round robin style evicting them if they are eligible. void UnifiedCache::_runEvictionSlice() const { int32_t maxItemsToEvict = _computeCountOfItemsToEvict(); if (maxItemsToEvict <= 0) { @@ -280,11 +277,14 @@ void UnifiedCache::_runEvictionSlice() const { } for (int32_t i = 0; i < MAX_EVICT_ITERATIONS; ++i) { const UHashElement *element = _nextElement(); + if (element == nullptr) { + break; + } if (_isEvictable(element)) { const SharedObject *sharedObject = (const SharedObject *) element->value.pointer; uhash_removeElement(fHashtable, element); - sharedObject->removeSoftRef(); + removeSoftRef(sharedObject); // Deletes sharedObject when SoftRefCount goes to zero. ++fAutoEvictedCount; if (--maxItemsToEvict == 0) { break; @@ -293,11 +293,6 @@ void UnifiedCache::_runEvictionSlice() const { } } - -// Places a new value and creationStatus in the cache for the given key. -// On entry, gCacheMutex must be held. key must not exist in the cache. -// On exit, value and creation status placed under key. Soft reference added -// to value on successful add. On error sets status. void UnifiedCache::_putNew( const CacheKeyBase &key, const SharedObject *value, @@ -312,24 +307,17 @@ void UnifiedCache::_putNew( return; } keyToAdopt->fCreationStatus = creationStatus; - if (value->noSoftReferences()) { + if (value->softRefCount == 0) { _registerMaster(keyToAdopt, value); } - uhash_put(fHashtable, keyToAdopt, (void *) value, &status); + void *oldValue = uhash_put(fHashtable, keyToAdopt, (void *) value, &status); + U_ASSERT(oldValue == nullptr); + (void)oldValue; if (U_SUCCESS(status)) { - value->addSoftRef(); + value->softRefCount++; } } -// Places value and status at key if there is no value at key or if cache -// entry for key is in progress. Otherwise, it leaves the current value and -// status there. -// On entry. gCacheMutex must not be held. value must be -// included in the reference count of the object to which it points. -// On exit, value and status are changed to what was already in the cache if -// something was there and not in progress. Otherwise, value and status are left -// unchanged in which case they are placed in the cache on a best-effort basis. -// Caller must call removeRef() on value. void UnifiedCache::_putIfAbsentAndGet( const CacheKeyBase &key, const SharedObject *&value, @@ -352,15 +340,7 @@ void UnifiedCache::_putIfAbsentAndGet( _runEvictionSlice(); } -// Attempts to fetch value and status for key from cache. -// On entry, gCacheMutex must not be held value must be NULL and status must -// be U_ZERO_ERROR. -// On exit, either returns FALSE (In this -// case caller should try to create the object) or returns TRUE with value -// pointing to the fetched value and status set to fetched status. When -// FALSE is returned status may be set to failure if an in progress hash -// entry could not be made but value will remain unchanged. When TRUE is -// returned, caler must call removeRef() on value. + UBool UnifiedCache::_poll( const CacheKeyBase &key, const SharedObject *&value, @@ -369,27 +349,29 @@ UBool UnifiedCache::_poll( U_ASSERT(status == U_ZERO_ERROR); Mutex lock(&gCacheMutex); const UHashElement *element = uhash_find(fHashtable, &key); - while (element != NULL && _inProgress(element)) { + + // If the hash table contains an inProgress placeholder entry for this key, + // this means that another thread is currently constructing the value object. + // Loop, waiting for that construction to complete. + while (element != NULL && _inProgress(element)) { umtx_condWait(&gInProgressValueAddedCond, &gCacheMutex); element = uhash_find(fHashtable, &key); } + + // If the hash table contains an entry for the key, + // fetch out the contents and return them. if (element != NULL) { - _fetch(element, value, status); + _fetch(element, value, status); return TRUE; } - _putNew(key, gNoValue, U_ZERO_ERROR, status); + + // The hash table contained nothing for this key. + // Insert an inProgress place holder value. + // Our caller will create the final value and update the hash table. + _putNew(key, fNoValue, U_ZERO_ERROR, status); return FALSE; } -// Gets value out of cache. -// On entry. gCacheMutex must not be held. value must be NULL. status -// must be U_ZERO_ERROR. -// On exit. value and status set to what is in cache at key or on cache -// miss the key's createObject() is called and value and status are set to -// the result of that. In this latter case, best effort is made to add the -// value and status to the cache. If createObject() fails to create a value, -// gNoValue is stored in cache, and value is set to NULL. Caller must call -// removeRef on value if non NULL. void UnifiedCache::_get( const CacheKeyBase &key, const SharedObject *&value, @@ -398,7 +380,7 @@ void UnifiedCache::_get( U_ASSERT(value == NULL); U_ASSERT(status == U_ZERO_ERROR); if (_poll(key, value, status)) { - if (value == gNoValue) { + if (value == fNoValue) { SharedObject::clearPtr(value); } return; @@ -410,46 +392,22 @@ void UnifiedCache::_get( U_ASSERT(value == NULL || value->hasHardReferences()); U_ASSERT(value != NULL || status != U_ZERO_ERROR); if (value == NULL) { - SharedObject::copyPtr(gNoValue, value); + SharedObject::copyPtr(fNoValue, value); } _putIfAbsentAndGet(key, value, status); - if (value == gNoValue) { + if (value == fNoValue) { SharedObject::clearPtr(value); } } -void UnifiedCache::decrementItemsInUseWithLockingAndEviction() const { - Mutex mutex(&gCacheMutex); - decrementItemsInUse(); - _runEvictionSlice(); -} - -void UnifiedCache::incrementItemsInUse() const { - ++fItemsInUseCount; -} - -void UnifiedCache::decrementItemsInUse() const { - --fItemsInUseCount; +void UnifiedCache::_registerMaster( + const CacheKeyBase *theKey, const SharedObject *value) const { + theKey->fIsMaster = true; + value->cachePtr = this; + ++fNumValuesTotal; + ++fNumValuesInUse; } -// Register a master cache entry. -// On entry, gCacheMutex must be held. -// On exit, items in use count incremented, entry is marked as a master -// entry, and value registered with cache so that subsequent calls to -// addRef() and removeRef() on it correctly updates items in use count -void UnifiedCache::_registerMaster( - const CacheKeyBase *theKey, const SharedObject *value) const { - theKey->fIsMaster = TRUE; - ++fItemsInUseCount; - value->registerWithCache(this); -} - -// Store a value and error in given hash entry. -// On entry, gCacheMutex must be held. Hash entry element must be in progress. -// value must be non NULL. -// On Exit, soft reference added to value. value and status stored in hash -// entry. Soft reference removed from previous stored value. Waiting -// threads notified. void UnifiedCache::_put( const UHashElement *element, const SharedObject *value, @@ -458,86 +416,52 @@ void UnifiedCache::_put( const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer; const SharedObject *oldValue = (const SharedObject *) element->value.pointer; theKey->fCreationStatus = status; - if (value->noSoftReferences()) { + if (value->softRefCount == 0) { _registerMaster(theKey, value); } - value->addSoftRef(); + value->softRefCount++; UHashElement *ptr = const_cast(element); ptr->value.pointer = (void *) value; - oldValue->removeSoftRef(); + U_ASSERT(oldValue == fNoValue); + removeSoftRef(oldValue); // Tell waiting threads that we replace in-progress status with // an error. umtx_condBroadcast(&gInProgressValueAddedCond); } -void -UnifiedCache::copyPtr(const SharedObject *src, const SharedObject *&dest) { - if(src != dest) { - if(dest != NULL) { - dest->removeRefWhileHoldingCacheLock(); - } - dest = src; - if(src != NULL) { - src->addRefWhileHoldingCacheLock(); - } - } -} - -void -UnifiedCache::clearPtr(const SharedObject *&ptr) { - if (ptr != NULL) { - ptr->removeRefWhileHoldingCacheLock(); - ptr = NULL; - } -} - - -// Fetch value and error code from a particular hash entry. -// On entry, gCacheMutex must be held. value must be either NULL or must be -// included in the ref count of the object to which it points. -// On exit, value and status set to what is in the hash entry. Caller must -// eventually call removeRef on value. -// If hash entry is in progress, value will be set to gNoValue and status will -// be set to U_ZERO_ERROR. void UnifiedCache::_fetch( const UHashElement *element, const SharedObject *&value, - UErrorCode &status) { + UErrorCode &status) const { const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer; status = theKey->fCreationStatus; - // Since we have the cache lock, calling regular SharedObject methods + // Since we have the cache lock, calling regular SharedObject add/removeRef // could cause us to deadlock on ourselves since they may need to lock // the cache mutex. - UnifiedCache::copyPtr((const SharedObject *) element->value.pointer, value); + removeHardRef(value); + value = static_cast(element->value.pointer); + addHardRef(value); } -// Determine if given hash entry is in progress. -// On entry, gCacheMutex must be held. -UBool UnifiedCache::_inProgress(const UHashElement *element) { - const SharedObject *value = NULL; + +UBool UnifiedCache::_inProgress(const UHashElement* element) const { UErrorCode status = U_ZERO_ERROR; + const SharedObject * value = NULL; _fetch(element, value, status); UBool result = _inProgress(value, status); - - // Since we have the cache lock, calling regular SharedObject methods - // could cause us to deadlock on ourselves since they may need to lock - // the cache mutex. - UnifiedCache::clearPtr(value); + removeHardRef(value); return result; } -// Determine if given hash entry is in progress. -// On entry, gCacheMutex must be held. UBool UnifiedCache::_inProgress( - const SharedObject *theValue, UErrorCode creationStatus) { - return (theValue == gNoValue && creationStatus == U_ZERO_ERROR); + const SharedObject* theValue, UErrorCode creationStatus) const { + return (theValue == fNoValue && creationStatus == U_ZERO_ERROR); } -// Determine if given hash entry is eligible for eviction. -// On entry, gCacheMutex must be held. -UBool UnifiedCache::_isEvictable(const UHashElement *element) { +UBool UnifiedCache::_isEvictable(const UHashElement *element) const +{ const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer; const SharedObject *theValue = (const SharedObject *) element->value.pointer; @@ -549,7 +473,47 @@ UBool UnifiedCache::_isEvictable(const UHashElement *element) { // We can evict entries that are either not a master or have just // one reference (The one reference being from the cache itself). - return (!theKey->fIsMaster || (theValue->getSoftRefCount() == 1 && theValue->noHardReferences())); + return (!theKey->fIsMaster || (theValue->softRefCount == 1 && theValue->noHardReferences())); +} + +void UnifiedCache::removeSoftRef(const SharedObject *value) const { + U_ASSERT(value->cachePtr == this); + U_ASSERT(value->softRefCount > 0); + if (--value->softRefCount == 0) { + --fNumValuesTotal; + if (value->noHardReferences()) { + delete value; + } else { + // This path only happens from flush(all). Which only happens from the + // UnifiedCache destructor. Nulling out value.cacheptr changes the behavior + // of value.removeRef(), causing the deletion to be done there. + value->cachePtr = nullptr; + } + } +} + +int32_t UnifiedCache::removeHardRef(const SharedObject *value) const { + int refCount = 0; + if (value) { + refCount = umtx_atomic_dec(&value->hardRefCount); + U_ASSERT(refCount >= 0); + if (refCount == 0) { + --fNumValuesInUse; + } + } + return refCount; +} + +int32_t UnifiedCache::addHardRef(const SharedObject *value) const { + int refCount = 0; + if (value) { + refCount = umtx_atomic_inc(&value->hardRefCount); + U_ASSERT(refCount >= 1); + if (refCount == 1) { + fNumValuesInUse++; + } + } + return refCount; } U_NAMESPACE_END diff --git a/deps/icu-small/source/common/unifiedcache.h b/deps/icu-small/source/common/unifiedcache.h index 947ebbdc78cf85..b3ccd60d177ea1 100644 --- a/deps/icu-small/source/common/unifiedcache.h +++ b/deps/icu-small/source/common/unifiedcache.h @@ -190,7 +190,7 @@ class U_COMMON_API UnifiedCache : public UnifiedCacheBase { UnifiedCache(UErrorCode &status); /** - * Returns the cache instance. + * Return a pointer to the global cache instance. */ static UnifiedCache *getInstance(UErrorCode &status); @@ -294,7 +294,7 @@ class U_COMMON_API UnifiedCache : public UnifiedCacheBase { /** * Configures at what point evcition of unused entries will begin. - * Eviction is triggered whenever the number of unused entries exeeds + * Eviction is triggered whenever the number of evictable keys exeeds * BOTH count AND (number of in-use items) * (percentageOfInUseItems / 100). * Once the number of unused entries drops below one of these, * eviction ceases. Because eviction happens incrementally, @@ -341,60 +341,214 @@ class U_COMMON_API UnifiedCache : public UnifiedCacheBase { */ int32_t unusedCount() const; - virtual void incrementItemsInUse() const; - virtual void decrementItemsInUseWithLockingAndEviction() const; - virtual void decrementItemsInUse() const; + virtual void handleUnreferencedObject() const; virtual ~UnifiedCache(); + private: UHashtable *fHashtable; mutable int32_t fEvictPos; - mutable int32_t fItemsInUseCount; + mutable int32_t fNumValuesTotal; + mutable int32_t fNumValuesInUse; int32_t fMaxUnused; int32_t fMaxPercentageOfInUse; mutable int64_t fAutoEvictedCount; + SharedObject *fNoValue; + UnifiedCache(const UnifiedCache &other); UnifiedCache &operator=(const UnifiedCache &other); + + /** + * Flushes the contents of the cache. If cache values hold references to other + * cache values then _flush should be called in a loop until it returns FALSE. + * + * On entry, gCacheMutex must be held. + * On exit, those values with are evictable are flushed. + * + * @param all if false flush evictable items only, which are those with no external + * references, plus those that can be safely recreated.
+ * if true, flush all elements. Any values (sharedObjects) with remaining + * hard (external) references are not deleted, but are detached from + * the cache, so that a subsequent removeRefs can delete them. + * _flush is not thread safe when all is true. + * @return TRUE if any value in cache was flushed or FALSE otherwise. + */ UBool _flush(UBool all) const; + + /** + * Gets value out of cache. + * On entry. gCacheMutex must not be held. value must be NULL. status + * must be U_ZERO_ERROR. + * On exit. value and status set to what is in cache at key or on cache + * miss the key's createObject() is called and value and status are set to + * the result of that. In this latter case, best effort is made to add the + * value and status to the cache. If createObject() fails to create a value, + * fNoValue is stored in cache, and value is set to NULL. Caller must call + * removeRef on value if non NULL. + */ void _get( const CacheKeyBase &key, const SharedObject *&value, const void *creationContext, UErrorCode &status) const; - UBool _poll( - const CacheKeyBase &key, - const SharedObject *&value, - UErrorCode &status) const; - void _putNew( - const CacheKeyBase &key, - const SharedObject *value, - const UErrorCode creationStatus, - UErrorCode &status) const; + + /** + * Attempts to fetch value and status for key from cache. + * On entry, gCacheMutex must not be held value must be NULL and status must + * be U_ZERO_ERROR. + * On exit, either returns FALSE (In this + * case caller should try to create the object) or returns TRUE with value + * pointing to the fetched value and status set to fetched status. When + * FALSE is returned status may be set to failure if an in progress hash + * entry could not be made but value will remain unchanged. When TRUE is + * returned, caller must call removeRef() on value. + */ + UBool _poll( + const CacheKeyBase &key, + const SharedObject *&value, + UErrorCode &status) const; + + /** + * Places a new value and creationStatus in the cache for the given key. + * On entry, gCacheMutex must be held. key must not exist in the cache. + * On exit, value and creation status placed under key. Soft reference added + * to value on successful add. On error sets status. + */ + void _putNew( + const CacheKeyBase &key, + const SharedObject *value, + const UErrorCode creationStatus, + UErrorCode &status) const; + + /** + * Places value and status at key if there is no value at key or if cache + * entry for key is in progress. Otherwise, it leaves the current value and + * status there. + * + * On entry. gCacheMutex must not be held. Value must be + * included in the reference count of the object to which it points. + * + * On exit, value and status are changed to what was already in the cache if + * something was there and not in progress. Otherwise, value and status are left + * unchanged in which case they are placed in the cache on a best-effort basis. + * Caller must call removeRef() on value. + */ void _putIfAbsentAndGet( const CacheKeyBase &key, const SharedObject *&value, UErrorCode &status) const; - const UHashElement *_nextElement() const; + + /** + * Returns the next element in the cache round robin style. + * Returns nullptr if the cache is empty. + * On entry, gCacheMutex must be held. + */ + const UHashElement *_nextElement() const; + + /** + * Return the number of cache items that would need to be evicted + * to bring usage into conformance with eviction policy. + * + * An item corresponds to an entry in the hash table, a hash table element. + * + * On entry, gCacheMutex must be held. + */ int32_t _computeCountOfItemsToEvict() const; + + /** + * Run an eviction slice. + * On entry, gCacheMutex must be held. + * _runEvictionSlice runs a slice of the evict pipeline by examining the next + * 10 entries in the cache round robin style evicting them if they are eligible. + */ void _runEvictionSlice() const; - void _registerMaster( - const CacheKeyBase *theKey, const SharedObject *value) const; + + /** + * Register a master cache entry. A master key is the first key to create + * a given SharedObject value. Subsequent keys whose create function + * produce referneces to an already existing SharedObject are not masters - + * they can be evicted and subsequently recreated. + * + * On entry, gCacheMutex must be held. + * On exit, items in use count incremented, entry is marked as a master + * entry, and value registered with cache so that subsequent calls to + * addRef() and removeRef() on it correctly interact with the cache. + */ + void _registerMaster(const CacheKeyBase *theKey, const SharedObject *value) const; + + /** + * Store a value and creation error status in given hash entry. + * On entry, gCacheMutex must be held. Hash entry element must be in progress. + * value must be non NULL. + * On Exit, soft reference added to value. value and status stored in hash + * entry. Soft reference removed from previous stored value. Waiting + * threads notified. + */ void _put( const UHashElement *element, const SharedObject *value, const UErrorCode status) const; + /** + * Remove a soft reference, and delete the SharedObject if no references remain. + * To be used from within the UnifiedCache implementation only. + * gCacheMutex must be held by caller. + * @param value the SharedObject to be acted on. + */ + void removeSoftRef(const SharedObject *value) const; + + /** + * Increment the hard reference count of the given SharedObject. + * gCacheMutex must be held by the caller. + * Update numValuesEvictable on transitions between zero and one reference. + * + * @param value The SharedObject to be referenced. + * @return the hard reference count after the addition. + */ + int32_t addHardRef(const SharedObject *value) const; + + /** + * Decrement the hard reference count of the given SharedObject. + * gCacheMutex must be held by the caller. + * Update numValuesEvictable on transitions between one and zero reference. + * + * @param value The SharedObject to be referenced. + * @return the hard reference count after the removal. + */ + int32_t removeHardRef(const SharedObject *value) const; + + #ifdef UNIFIED_CACHE_DEBUG void _dumpContents() const; #endif - static void copyPtr(const SharedObject *src, const SharedObject *&dest); - static void clearPtr(const SharedObject *&ptr); - static void _fetch( - const UHashElement *element, - const SharedObject *&value, - UErrorCode &status); - static UBool _inProgress(const UHashElement *element); - static UBool _inProgress( - const SharedObject *theValue, UErrorCode creationStatus); - static UBool _isEvictable(const UHashElement *element); + + /** + * Fetch value and error code from a particular hash entry. + * On entry, gCacheMutex must be held. value must be either NULL or must be + * included in the ref count of the object to which it points. + * On exit, value and status set to what is in the hash entry. Caller must + * eventually call removeRef on value. + * If hash entry is in progress, value will be set to gNoValue and status will + * be set to U_ZERO_ERROR. + */ + void _fetch(const UHashElement *element, const SharedObject *&value, + UErrorCode &status) const; + + /** + * Determine if given hash entry is in progress. + * On entry, gCacheMutex must be held. + */ + UBool _inProgress(const UHashElement *element) const; + + /** + * Determine if given hash entry is in progress. + * On entry, gCacheMutex must be held. + */ + UBool _inProgress(const SharedObject *theValue, UErrorCode creationStatus) const; + + /** + * Determine if given hash entry is eligible for eviction. + * On entry, gCacheMutex must be held. + */ + UBool _isEvictable(const UHashElement *element) const; }; U_NAMESPACE_END diff --git a/deps/icu-small/source/common/uniset_closure.cpp b/deps/icu-small/source/common/uniset_closure.cpp index b5cc21394109dc..97c7bc9d352ab9 100644 --- a/deps/icu-small/source/common/uniset_closure.cpp +++ b/deps/icu-small/source/common/uniset_closure.cpp @@ -129,7 +129,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, // _applyPattern calls add() etc., which set pat to empty. UnicodeString rebuiltPat; RuleCharacterIterator chars(pattern, symbols, pos); - applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status); + applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, 0, status); if (U_FAILURE(status)) return *this; if (chars.inVariable()) { // syntaxError(chars, "Extra chars in variable value"); diff --git a/deps/icu-small/source/common/uniset_props.cpp b/deps/icu-small/source/common/uniset_props.cpp index d0ed074a9be387..ef5d6a32b2d10b 100644 --- a/deps/icu-small/source/common/uniset_props.cpp +++ b/deps/icu-small/source/common/uniset_props.cpp @@ -231,7 +231,7 @@ void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status) { ucase_addPropertyStarts(&sa, &status); break; case UPROPS_SRC_BIDI: - ubidi_addPropertyStarts(ubidi_getSingleton(), &sa, &status); + ubidi_addPropertyStarts(&sa, &status); break; default: status = U_INTERNAL_PROGRAM_ERROR; @@ -257,6 +257,7 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) { return i.fSet; } +namespace { // Cache some sets for other services -------------------------------------- *** void U_CALLCONV createUni32Set(UErrorCode &errorCode) { @@ -315,6 +316,8 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) { // memory leak checker tools #define _dbgct(me) +} // namespace + //---------------------------------------------------------------- // Constructors &c //---------------------------------------------------------------- @@ -382,7 +385,7 @@ UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern, // _applyPattern calls add() etc., which set pat to empty. UnicodeString rebuiltPat; RuleCharacterIterator chars(pattern, symbols, pos); - applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, status); + applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, 0, status); if (U_FAILURE(status)) return; if (chars.inVariable()) { // syntaxError(chars, "Extra chars in variable value"); @@ -406,6 +409,8 @@ UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) { // Implementation: Pattern parsing //---------------------------------------------------------------- +namespace { + /** * A small all-inline class to manage a UnicodeSet pointer. Add * operator->() etc. as needed. @@ -424,6 +429,10 @@ class UnicodeSetPointer { } }; +constexpr int32_t MAX_DEPTH = 100; + +} // namespace + /** * Parse the pattern from the given RuleCharacterIterator. The * iterator is advanced over the parsed pattern. @@ -443,8 +452,13 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, UnicodeString& rebuiltPat, uint32_t options, UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute), + int32_t depth, UErrorCode& ec) { if (U_FAILURE(ec)) return; + if (depth > MAX_DEPTH) { + ec = U_ILLEGAL_ARGUMENT_ERROR; + return; + } // Syntax characters: [ ] ^ - & { } @@ -579,7 +593,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, } switch (setMode) { case 1: - nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec); + nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec); break; case 2: chars.skipIgnored(opts); @@ -837,6 +851,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, // Property set implementation //---------------------------------------------------------------- +namespace { + static UBool numericValueFilter(UChar32 ch, void* context) { return u_getNumericValue(ch) == *(double*)context; } @@ -868,6 +884,8 @@ static UBool scriptExtensionsFilter(UChar32 ch, void* context) { return uscript_hasScript(ch, *(UScriptCode*)context); } +} // namespace + /** * Generic filter-based scanning code for UCD property UnicodeSets. */ @@ -924,6 +942,8 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter, } } +namespace { + static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) { /* Note: we use ' ' in compiler code page */ int32_t j = 0; @@ -941,6 +961,8 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) { return TRUE; } +} // namespace + //---------------------------------------------------------------- // Property set API //---------------------------------------------------------------- diff --git a/deps/icu-small/source/common/uprops.cpp b/deps/icu-small/source/common/uprops.cpp index ace3c4d6d04652..b76896db1b704f 100644 --- a/deps/icu-small/source/common/uprops.cpp +++ b/deps/icu-small/source/common/uprops.cpp @@ -38,8 +38,6 @@ U_NAMESPACE_USE -#define GET_BIDI_PROPS() ubidi_getSingleton() - /* general properties API functions ----------------------------------------- */ struct BinaryProperty; @@ -62,15 +60,15 @@ static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 } static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_isBidiControl(GET_BIDI_PROPS(), c); + return ubidi_isBidiControl(c); } static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_isMirrored(GET_BIDI_PROPS(), c); + return ubidi_isMirrored(c); } static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_isJoinControl(GET_BIDI_PROPS(), c); + return ubidi_isJoinControl(c); } #if UCONFIG_NO_NORMALIZATION @@ -329,11 +327,11 @@ static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /* } static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return (int32_t)ubidi_getPairedBracketType(GET_BIDI_PROPS(), c); + return (int32_t)ubidi_getPairedBracketType(c); } static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) { - return ubidi_getMaxValue(GET_BIDI_PROPS(), which); + return ubidi_getMaxValue(which); } #if UCONFIG_NO_NORMALIZATION @@ -351,11 +349,11 @@ static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UPrope } static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c); + return ubidi_getJoiningGroup(c); } static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_getJoiningType(GET_BIDI_PROPS(), c); + return ubidi_getJoiningType(c); } static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { diff --git a/deps/icu-small/source/common/ushape.cpp b/deps/icu-small/source/common/ushape.cpp index d7886ac06c6b9d..c3f3ef9e2078f0 100644 --- a/deps/icu-small/source/common/ushape.cpp +++ b/deps/icu-small/source/common/ushape.cpp @@ -342,18 +342,16 @@ static void _shapeToArabicDigitsWithContext(UChar *s, int32_t length, UChar digitBase, UBool isLogical, UBool lastStrongWasAL) { - const UBiDiProps *bdp; int32_t i; UChar c; - bdp=ubidi_getSingleton(); digitBase-=0x30; /* the iteration direction depends on the type of input */ if(isLogical) { for(i=0; i0; /* pre-decrement in the body */) { c=s[--i]; - switch(ubidi_getClass(bdp, c)) { + switch(ubidi_getClass(c)) { case U_LEFT_TO_RIGHT: /* L */ case U_RIGHT_TO_LEFT: /* R */ lastStrongWasAL=FALSE; diff --git a/deps/icu-small/source/common/usprep.cpp b/deps/icu-small/source/common/usprep.cpp index c4f831be2e4fe2..54a77172fe1b09 100644 --- a/deps/icu-small/source/common/usprep.cpp +++ b/deps/icu-small/source/common/usprep.cpp @@ -347,10 +347,6 @@ usprep_getProfile(const char* path, newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0); newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0); - if(newProfile->checkBiDi) { - newProfile->bdp = ubidi_getSingleton(); - } - LocalMemory key; LocalMemory keyName; LocalMemory keyPath; @@ -735,7 +731,7 @@ usprep_prepare( const UStringPrepProfile* profile, } if(profile->checkBiDi) { - direction = ubidi_getClass(profile->bdp, ch); + direction = ubidi_getClass(ch); if(firstCharDir == U_CHAR_DIRECTION_COUNT){ firstCharDir = direction; } diff --git a/deps/icu-small/source/common/ustr_wcs.cpp b/deps/icu-small/source/common/ustr_wcs.cpp index 8b6e99221ee253..0372824f21f615 100644 --- a/deps/icu-small/source/common/ustr_wcs.cpp +++ b/deps/icu-small/source/common/ustr_wcs.cpp @@ -342,7 +342,7 @@ _strFromWCS( UChar *dest, pSrcLimit = src + srcLength; for(;;){ - register int32_t nulLen = 0; + int32_t nulLen = 0; /* find nulls in the string */ while(nulLenaddUnchanged(length); + } + if(options & U_OMIT_UNCHANGED_TEXT) { + return destIndex; + } + if(length>(INT32_MAX-destIndex)) { + return -1; // integer overflow + } + if((destIndex+length)<=destCapacity) { + u_memcpy(dest+destIndex, s, length); + } + return destIndex + length; +} + +inline int32_t appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity, const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) { - if(length>0) { - if(edits!=NULL) { - edits->addUnchanged(length); - } - if(options & U_OMIT_UNCHANGED_TEXT) { - return destIndex; - } - if(length>(INT32_MAX-destIndex)) { - return -1; // integer overflow - } - if((destIndex+length)<=destCapacity) { - u_memcpy(dest+destIndex, s, length); - } - destIndex+=length; + if (length <= 0) { + return destIndex; } - return destIndex; + return appendNonEmptyUnchanged(dest, destIndex, destCapacity, s, length, options, edits); } -static UChar32 U_CALLCONV +UChar32 U_CALLCONV utf16_caseContextIterator(void *context, int8_t dir) { UCaseContext *csc=(UCaseContext *)context; UChar32 c; @@ -197,39 +195,205 @@ utf16_caseContextIterator(void *context, int8_t dir) { return U_SENTINEL; } -/* - * Case-maps [srcStart..srcLimit[ but takes - * context [0..srcLength[ into account. +/** + * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account. + * caseLocale < 0: Case-folds [srcStart..srcLimit[. */ -static int32_t -_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map, - UChar *dest, int32_t destCapacity, - const UChar *src, UCaseContext *csc, - int32_t srcStart, int32_t srcLimit, - icu::Edits *edits, - UErrorCode &errorCode) { - /* case mapping loop */ - int32_t srcIndex=srcStart; - int32_t destIndex=0; - while(srcIndexcpStart=cpStart=srcIndex; +int32_t toLower(int32_t caseLocale, uint32_t options, + UChar *dest, int32_t destCapacity, + const UChar *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit, + icu::Edits *edits, UErrorCode &errorCode) { + const int8_t *latinToLower; + if (caseLocale == UCASE_LOC_ROOT || + (caseLocale >= 0 ? + !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) : + (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) { + latinToLower = LatinCase::TO_LOWER_NORMAL; + } else { + latinToLower = LatinCase::TO_LOWER_TR_LT; + } + const UTrie2 *trie = ucase_getTrie(); + int32_t destIndex = 0; + int32_t prev = srcStart; + int32_t srcIndex = srcStart; + for (;;) { + // fast path for simple cases + UChar lead; + while (srcIndex < srcLimit) { + lead = src[srcIndex]; + int32_t delta; + if (lead < LatinCase::LONG_S) { + int8_t d = latinToLower[lead]; + if (d == LatinCase::EXC) { break; } + ++srcIndex; + if (d == 0) { continue; } + delta = d; + } else if (lead >= 0xd800) { + break; // surrogate or higher + } else { + uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead); + if (UCASE_HAS_EXCEPTION(props)) { break; } + ++srcIndex; + if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) { + continue; + } + } + lead += delta; + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, srcIndex - 1 - prev, options, edits); + if (destIndex >= 0) { + destIndex = appendUChar(dest, destIndex, destCapacity, lead); + if (edits != nullptr) { + edits->addReplace(1, 1); + } + } + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + prev = srcIndex; + } + if (srcIndex >= srcLimit) { + break; + } + // slow path + int32_t cpStart = srcIndex++; + UChar trail; UChar32 c; - U16_NEXT(src, srcIndex, srcLimit, c); - csc->cpLimit=srcIndex; + if (U16_IS_LEAD(lead) && srcIndex < srcLimit && U16_IS_TRAIL(trail = src[srcIndex])) { + c = U16_GET_SUPPLEMENTARY(lead, trail); + ++srcIndex; + } else { + c = lead; + } const UChar *s; - c=map(c, utf16_caseContextIterator, csc, &s, caseLocale); - destIndex = appendResult(dest, destIndex, destCapacity, c, s, - srcIndex - cpStart, options, edits); - if (destIndex < 0) { - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; + if (caseLocale >= 0) { + csc->cpStart = cpStart; + csc->cpLimit = srcIndex; + c = ucase_toFullLower(c, utf16_caseContextIterator, csc, &s, caseLocale); + } else { + c = ucase_toFullFolding(c, &s, options); } + if (c >= 0) { + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, cpStart - prev, options, edits); + if (destIndex >= 0) { + destIndex = appendResult(dest, destIndex, destCapacity, c, s, + srcIndex - cpStart, options, edits); + } + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + prev = srcIndex; + } + } + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, srcIndex - prev, options, edits); + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; } + return destIndex; +} +int32_t toUpper(int32_t caseLocale, uint32_t options, + UChar *dest, int32_t destCapacity, + const UChar *src, UCaseContext *csc, int32_t srcLength, + icu::Edits *edits, UErrorCode &errorCode) { + const int8_t *latinToUpper; + if (caseLocale == UCASE_LOC_TURKISH) { + latinToUpper = LatinCase::TO_UPPER_TR; + } else { + latinToUpper = LatinCase::TO_UPPER_NORMAL; + } + const UTrie2 *trie = ucase_getTrie(); + int32_t destIndex = 0; + int32_t prev = 0; + int32_t srcIndex = 0; + for (;;) { + // fast path for simple cases + UChar lead; + while (srcIndex < srcLength) { + lead = src[srcIndex]; + int32_t delta; + if (lead < LatinCase::LONG_S) { + int8_t d = latinToUpper[lead]; + if (d == LatinCase::EXC) { break; } + ++srcIndex; + if (d == 0) { continue; } + delta = d; + } else if (lead >= 0xd800) { + break; // surrogate or higher + } else { + uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead); + if (UCASE_HAS_EXCEPTION(props)) { break; } + ++srcIndex; + if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) { + continue; + } + } + lead += delta; + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, srcIndex - 1 - prev, options, edits); + if (destIndex >= 0) { + destIndex = appendUChar(dest, destIndex, destCapacity, lead); + if (edits != nullptr) { + edits->addReplace(1, 1); + } + } + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + prev = srcIndex; + } + if (srcIndex >= srcLength) { + break; + } + // slow path + int32_t cpStart; + csc->cpStart = cpStart = srcIndex++; + UChar trail; + UChar32 c; + if (U16_IS_LEAD(lead) && srcIndex < srcLength && U16_IS_TRAIL(trail = src[srcIndex])) { + c = U16_GET_SUPPLEMENTARY(lead, trail); + ++srcIndex; + } else { + c = lead; + } + csc->cpLimit = srcIndex; + const UChar *s; + c = ucase_toFullUpper(c, utf16_caseContextIterator, csc, &s, caseLocale); + if (c >= 0) { + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, cpStart - prev, options, edits); + if (destIndex >= 0) { + destIndex = appendResult(dest, destIndex, destCapacity, c, s, + srcIndex - cpStart, options, edits); + } + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + prev = srcIndex; + } + } + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, srcIndex - prev, options, edits); + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } return destIndex; } +} // namespace + +U_NAMESPACE_END + +U_NAMESPACE_USE + #if !UCONFIG_NO_BREAK_ITERATION U_CFUNC int32_t U_CALLCONV @@ -344,11 +508,10 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it if((options&U_TITLECASE_NO_LOWERCASE)==0) { /* Normal operation: Lowercase the rest of the word. */ destIndex+= - _caseMap( - caseLocale, options, ucase_toFullLower, + toLower( + caseLocale, options, dest+destIndex, destCapacity-destIndex, - src, &csc, - titleLimit, index, + src, &csc, titleLimit, index, edits, errorCode); if(errorCode==U_BUFFER_OVERFLOW_ERROR) { errorCode=U_ZERO_ERROR; @@ -1013,8 +1176,8 @@ ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; - int32_t destIndex = _caseMap( - caseLocale, options, ucase_toFullLower, + int32_t destIndex = toLower( + caseLocale, options, dest, destCapacity, src, &csc, 0, srcLength, edits, errorCode); @@ -1035,10 +1198,10 @@ ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; - destIndex = _caseMap( - caseLocale, options, ucase_toFullUpper, + destIndex = toUpper( + caseLocale, options, dest, destCapacity, - src, &csc, 0, srcLength, + src, &csc, srcLength, edits, errorCode); } return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); @@ -1050,23 +1213,11 @@ ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK const UChar *src, int32_t srcLength, icu::Edits *edits, UErrorCode &errorCode) { - /* case mapping loop */ - int32_t srcIndex = 0; - int32_t destIndex = 0; - while (srcIndex < srcLength) { - int32_t cpStart = srcIndex; - UChar32 c; - U16_NEXT(src, srcIndex, srcLength, c); - const UChar *s; - c = ucase_toFullFolding(c, &s, options); - destIndex = appendResult(dest, destIndex, destCapacity, c, s, - srcIndex - cpStart, options, edits); - if (destIndex < 0) { - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - } - + int32_t destIndex = toLower( + -1, options, + dest, destCapacity, + src, nullptr, 0, srcLength, + edits, errorCode); return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); } diff --git a/deps/icu-small/source/common/utf_impl.cpp b/deps/icu-small/source/common/utf_impl.cpp index f78c566e098884..9dd241a12bfa16 100644 --- a/deps/icu-small/source/common/utf_impl.cpp +++ b/deps/icu-small/source/common/utf_impl.cpp @@ -238,33 +238,45 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U int32_t i=*pi; if(U8_IS_TRAIL(c) && i>start) { uint8_t b1=s[--i]; - if(0xc2<=b1 && b1<0xe0) { - *pi=i; - return ((b1-0xc0)<<6)|(c&0x3f); + if(U8_IS_LEAD(b1)) { + if(b1<0xe0) { + *pi=i; + return ((b1-0xc0)<<6)|(c&0x3f); + } else if(b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c)) { + // Truncated 3- or 4-byte sequence. + *pi=i; + return errorValue(1, strict); + } } else if(U8_IS_TRAIL(b1) && i>start) { // Extract the value bits from the last trail byte. c&=0x3f; uint8_t b2=s[--i]; - if(0xe0<=b2 && b2<0xf0) { - b2&=0xf; - if(strict!=-2) { - if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { - *pi=i; - c=(b2<<12)|((b1&0x3f)<<6)|c; - if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) { - return c; - } else { - // strict: forbid non-characters like U+fffe - return errorValue(2, strict); + if(0xe0<=b2 && b2<=0xf4) { + if(b2<0xf0) { + b2&=0xf; + if(strict!=-2) { + if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { + *pi=i; + c=(b2<<12)|((b1&0x3f)<<6)|c; + if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) { + return c; + } else { + // strict: forbid non-characters like U+fffe + return errorValue(2, strict); + } + } + } else { + // strict=-2 -> lenient: allow surrogates + b1-=0x80; + if((b2>0 || b1>=0x20)) { + *pi=i; + return (b2<<12)|(b1<<6)|c; } } - } else { - // strict=-2 -> lenient: allow surrogates - b1-=0x80; - if((b2>0 || b1>=0x20)) { - *pi=i; - return (b2<<12)|(b1<<6)|c; - } + } else if(U8_IS_VALID_LEAD4_AND_T1(b2, b1)) { + // Truncated 4-byte sequence. + *pi=i; + return errorValue(2, strict); } } else if(U8_IS_TRAIL(b2) && i>start) { uint8_t b3=s[--i]; @@ -281,16 +293,7 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U } } } - } else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) { - // Truncated 4-byte sequence. - *pi=i; - return errorValue(2, strict); } - } else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) || - (0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) { - // Truncated 3- or 4-byte sequence. - *pi=i; - return errorValue(1, strict); } } return errorValue(0, strict); @@ -303,29 +306,23 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i) { uint8_t c=s[i]; if(U8_IS_TRAIL(c) && i>start) { uint8_t b1=s[--i]; - if(0xc2<=b1 && b1<0xe0) { - return i; + if(U8_IS_LEAD(b1)) { + if(b1<0xe0 || + (b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c))) { + return i; + } } else if(U8_IS_TRAIL(b1) && i>start) { uint8_t b2=s[--i]; - if(0xe0<=b2 && b2<0xf0) { - if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { + if(0xe0<=b2 && b2<=0xf4) { + if(b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b2, b1) : U8_IS_VALID_LEAD4_AND_T1(b2, b1)) { return i; } } else if(U8_IS_TRAIL(b2) && i>start) { uint8_t b3=s[--i]; - if(0xf0<=b3 && b3<=0xf4) { - if(U8_IS_VALID_LEAD4_AND_T1(b3, b2)) { - return i; - } + if(0xf0<=b3 && b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b3, b2)) { + return i; } - } else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) { - // Truncated 4-byte sequence. - return i; } - } else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) || - (0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) { - // Truncated 3- or 4-byte sequence. - return i; } } return orig_i; diff --git a/deps/icu-small/source/common/utrie.h b/deps/icu-small/source/common/utrie.h index 9c5382c5949b3b..641027a1a3f448 100644 --- a/deps/icu-small/source/common/utrie.h +++ b/deps/icu-small/source/common/utrie.h @@ -556,7 +556,7 @@ struct UNewTrie { * Index values at build-time are 32 bits wide for easier processing. * Bit 31 is set if the data block is used by multiple index values (from utrie_setRange()). */ - int32_t index[UTRIE_MAX_INDEX_LENGTH]; + int32_t index[UTRIE_MAX_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT]; uint32_t *data; uint32_t leadUnitValue; diff --git a/deps/icu-small/source/common/uts46.cpp b/deps/icu-small/source/common/uts46.cpp index 9b8d3ded2fddd1..5a23572eb64af2 100644 --- a/deps/icu-small/source/common/uts46.cpp +++ b/deps/icu-small/source/common/uts46.cpp @@ -1126,7 +1126,6 @@ isASCIIOkBiDi(const char *s, int32_t length) { UBool UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const { - const UBiDiProps *bdp=ubidi_getSingleton(); // [IDNA2008-Tables] // 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER for(int32_t i=0; i; #endif diff --git a/deps/icu-small/source/i18n/dcfmtsym.cpp b/deps/icu-small/source/i18n/dcfmtsym.cpp index f840fde2abaca3..680c3120a1e0f9 100644 --- a/deps/icu-small/source/i18n/dcfmtsym.cpp +++ b/deps/icu-small/source/i18n/dcfmtsym.cpp @@ -38,6 +38,7 @@ #include "uresimp.h" #include "ureslocs.h" #include "charstr.h" +#include "uassert.h" // ***************************************************************************** // class DecimalFormatSymbols @@ -165,6 +166,7 @@ DecimalFormatSymbols::operator=(const DecimalFormatSymbols& rhs) uprv_strcpy(actualLocale, rhs.actualLocale); fIsCustomCurrencySymbol = rhs.fIsCustomCurrencySymbol; fIsCustomIntlCurrencySymbol = rhs.fIsCustomIntlCurrencySymbol; + fCodePointZero = rhs.fCodePointZero; } return *this; } @@ -196,6 +198,7 @@ DecimalFormatSymbols::operator==(const DecimalFormatSymbols& that) const return FALSE; } } + // No need to check fCodePointZero since it is based on fSymbols return locale == that.locale && uprv_strcmp(validLocale, that.validLocale) == 0 && uprv_strcmp(actualLocale, that.actualLocale) == 0; @@ -433,6 +436,24 @@ DecimalFormatSymbols::initialize(const Locale& loc, UErrorCode& status, // Let the monetary number separators equal the default number separators if necessary. sink.resolveMissingMonetarySeparators(fSymbols); + // Resolve codePointZero + UChar32 tempCodePointZero; + for (int32_t i=0; i<=9; i++) { + const UnicodeString& stringDigit = getConstDigitSymbol(i); + if (stringDigit.countChar32() != 1) { + tempCodePointZero = -1; + break; + } + UChar32 cp = stringDigit.char32At(0); + if (i == 0) { + tempCodePointZero = cp; + } else if (cp != tempCodePointZero + i) { + tempCodePointZero = -1; + break; + } + } + fCodePointZero = tempCodePointZero; + // Obtain currency data from the currency API. This is strictly // for backward compatibility; we don't use DecimalFormatSymbols // for currency data anymore. @@ -530,6 +551,8 @@ DecimalFormatSymbols::initialize() { fSymbols[kExponentMultiplicationSymbol] = (UChar)0xd7; // 'x' multiplication symbol for exponents fIsCustomCurrencySymbol = FALSE; fIsCustomIntlCurrencySymbol = FALSE; + fCodePointZero = 0x30; + U_ASSERT(fCodePointZero == fSymbols[kZeroDigitSymbol].char32At(0)); } diff --git a/deps/icu-small/source/i18n/decNumber.cpp b/deps/icu-small/source/i18n/decNumber.cpp index 363f93ea72de40..c19493bdf3bbee 100644 --- a/deps/icu-small/source/i18n/decNumber.cpp +++ b/deps/icu-small/source/i18n/decNumber.cpp @@ -627,10 +627,12 @@ U_CAPI decNumber * U_EXPORT2 uprv_decNumberFromString(decNumber *dn, const char for (; *c=='0' && *(c+1)!='\0';) c++; /* strip insignificant zeros */ firstexp=c; /* save exponent digit place */ + uInt uexponent = 0; /* Avoid undefined behavior on signed int overflow */ for (; ;c++) { if (*c<'0' || *c>'9') break; /* not a digit */ - exponent=X10(exponent)+(Int)*c-(Int)'0'; + uexponent=X10(uexponent)+(uInt)*c-(uInt)'0'; } /* c */ + exponent = (Int)uexponent; /* if not now on a '\0', *c must not be a digit */ if (*c!='\0') break; diff --git a/deps/icu-small/source/i18n/decimalformatpattern.cpp b/deps/icu-small/source/i18n/decimalformatpattern.cpp index c7ec5cd966a3ec..80a1870f33ef8d 100644 --- a/deps/icu-small/source/i18n/decimalformatpattern.cpp +++ b/deps/icu-small/source/i18n/decimalformatpattern.cpp @@ -50,10 +50,12 @@ static void syntaxError(const UnicodeString& pattern, parseError.preContext[stop-start] = 0; //for post-context - start = pos+1; - stop = ((pos+U_PARSE_CONTEXT_LEN)<=pattern.length()) ? (pos+(U_PARSE_CONTEXT_LEN-1)) : - pattern.length(); - pattern.extract(start,stop-start,parseError.postContext,0); + start = pattern.moveIndex32(pos, 1); + stop = pos + U_PARSE_CONTEXT_LEN - 1; + if (stop > pattern.length()) { + stop = pattern.length(); + } + pattern.extract(start, stop - start, parseError.postContext, 0); //null terminate the buffer parseError.postContext[stop-start]= 0; } diff --git a/deps/icu-small/source/i18n/digitlst.cpp b/deps/icu-small/source/i18n/digitlst.cpp index 10a3a5dca1a404..37760defd708bc 100644 --- a/deps/icu-small/source/i18n/digitlst.cpp +++ b/deps/icu-small/source/i18n/digitlst.cpp @@ -44,12 +44,15 @@ #include "digitinterval.h" #include "ucln_in.h" #include "umutex.h" +#include "double-conversion.h" #include #include #include #include #include +using icu::double_conversion::DoubleToStringConverter; + #if !defined(U_USE_STRTOD_L) # if U_PLATFORM_USES_ONLY_WIN32_API # define U_USE_STRTOD_L 1 @@ -850,8 +853,53 @@ DigitList::set(double source) } else { uprv_strcpy(rep,"inf"); } + } else if (uprv_isNaN(source)) { + uprv_strcpy(rep, "NaN"); } else { - sprintf(rep, "%+1.*e", MAX_DBL_DIGITS - 1, source); + bool sign; + int32_t length; + int32_t point; + DoubleToStringConverter::DoubleToAscii( + source, + DoubleToStringConverter::DtoaMode::SHORTEST, + 0, + rep + 1, + sizeof(rep), + &sign, + &length, + &point + ); + + // Convert the raw buffer into a string for decNumber + int32_t power = point - length; + if (sign) { + rep[0] = '-'; + } else { + rep[0] = '0'; + } + length++; + rep[length++] = 'E'; + if (power < 0) { + rep[length++] = '-'; + power = -power; + } else { + rep[length++] = '+'; + } + if (power < 10) { + rep[length++] = power + '0'; + } else if (power < 100) { + rep[length++] = (power / 10) + '0'; + rep[length++] = (power % 10) + '0'; + } else { + U_ASSERT(power < 1000); + rep[length + 2] = (power % 10) + '0'; + power /= 10; + rep[length + 1] = (power % 10) + '0'; + power /= 10; + rep[length] = power + '0'; + length += 3; + } + rep[length++] = 0; } U_ASSERT(uprv_strlen(rep) < sizeof(rep)); diff --git a/deps/icu-small/source/i18n/double-conversion-bignum-dtoa.cpp b/deps/icu-small/source/i18n/double-conversion-bignum-dtoa.cpp new file mode 100644 index 00000000000000..07d0b0eb0f8717 --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-bignum-dtoa.cpp @@ -0,0 +1,659 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#include + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-bignum-dtoa.h" + +#include "double-conversion-bignum.h" +#include "double-conversion-ieee.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +static int NormalizedExponent(uint64_t significand, int exponent) { + ASSERT(significand != 0); + while ((significand & Double::kHiddenBit) == 0) { + significand = significand << 1; + exponent = exponent - 1; + } + return exponent; +} + + +// Forward declarations: +// Returns an estimation of k such that 10^(k-1) <= v < 10^k. +static int EstimatePower(int exponent); +// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator +// and denominator. +static void InitialScaledStartValues(uint64_t significand, + int exponent, + bool lower_boundary_is_closer, + int estimated_power, + bool need_boundary_deltas, + Bignum* numerator, + Bignum* denominator, + Bignum* delta_minus, + Bignum* delta_plus); +// Multiplies numerator/denominator so that its values lies in the range 1-10. +// Returns decimal_point s.t. +// v = numerator'/denominator' * 10^(decimal_point-1) +// where numerator' and denominator' are the values of numerator and +// denominator after the call to this function. +static void FixupMultiply10(int estimated_power, bool is_even, + int* decimal_point, + Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus); +// Generates digits from the left to the right and stops when the generated +// digits yield the shortest decimal representation of v. +static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus, + bool is_even, + Vector buffer, int* length); +// Generates 'requested_digits' after the decimal point. +static void BignumToFixed(int requested_digits, int* decimal_point, + Bignum* numerator, Bignum* denominator, + Vector(buffer), int* length); +// Generates 'count' digits of numerator/denominator. +// Once 'count' digits have been produced rounds the result depending on the +// remainder (remainders of exactly .5 round upwards). Might update the +// decimal_point when rounding up (for example for 0.9999). +static void GenerateCountedDigits(int count, int* decimal_point, + Bignum* numerator, Bignum* denominator, + Vector(buffer), int* length); + + +void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits, + Vector buffer, int* length, int* decimal_point) { + ASSERT(v > 0); + ASSERT(!Double(v).IsSpecial()); + uint64_t significand; + int exponent; + bool lower_boundary_is_closer; + if (mode == BIGNUM_DTOA_SHORTEST_SINGLE) { + float f = static_cast(v); + ASSERT(f == v); + significand = Single(f).Significand(); + exponent = Single(f).Exponent(); + lower_boundary_is_closer = Single(f).LowerBoundaryIsCloser(); + } else { + significand = Double(v).Significand(); + exponent = Double(v).Exponent(); + lower_boundary_is_closer = Double(v).LowerBoundaryIsCloser(); + } + bool need_boundary_deltas = + (mode == BIGNUM_DTOA_SHORTEST || mode == BIGNUM_DTOA_SHORTEST_SINGLE); + + bool is_even = (significand & 1) == 0; + int normalized_exponent = NormalizedExponent(significand, exponent); + // estimated_power might be too low by 1. + int estimated_power = EstimatePower(normalized_exponent); + + // Shortcut for Fixed. + // The requested digits correspond to the digits after the point. If the + // number is much too small, then there is no need in trying to get any + // digits. + if (mode == BIGNUM_DTOA_FIXED && -estimated_power - 1 > requested_digits) { + buffer[0] = '\0'; + *length = 0; + // Set decimal-point to -requested_digits. This is what Gay does. + // Note that it should not have any effect anyways since the string is + // empty. + *decimal_point = -requested_digits; + return; + } + + Bignum numerator; + Bignum denominator; + Bignum delta_minus; + Bignum delta_plus; + // Make sure the bignum can grow large enough. The smallest double equals + // 4e-324. In this case the denominator needs fewer than 324*4 binary digits. + // The maximum double is 1.7976931348623157e308 which needs fewer than + // 308*4 binary digits. + ASSERT(Bignum::kMaxSignificantBits >= 324*4); + InitialScaledStartValues(significand, exponent, lower_boundary_is_closer, + estimated_power, need_boundary_deltas, + &numerator, &denominator, + &delta_minus, &delta_plus); + // We now have v = (numerator / denominator) * 10^estimated_power. + FixupMultiply10(estimated_power, is_even, decimal_point, + &numerator, &denominator, + &delta_minus, &delta_plus); + // We now have v = (numerator / denominator) * 10^(decimal_point-1), and + // 1 <= (numerator + delta_plus) / denominator < 10 + switch (mode) { + case BIGNUM_DTOA_SHORTEST: + case BIGNUM_DTOA_SHORTEST_SINGLE: + GenerateShortestDigits(&numerator, &denominator, + &delta_minus, &delta_plus, + is_even, buffer, length); + break; + case BIGNUM_DTOA_FIXED: + BignumToFixed(requested_digits, decimal_point, + &numerator, &denominator, + buffer, length); + break; + case BIGNUM_DTOA_PRECISION: + GenerateCountedDigits(requested_digits, decimal_point, + &numerator, &denominator, + buffer, length); + break; + default: + UNREACHABLE(); + } + buffer[*length] = '\0'; +} + + +// The procedure starts generating digits from the left to the right and stops +// when the generated digits yield the shortest decimal representation of v. A +// decimal representation of v is a number lying closer to v than to any other +// double, so it converts to v when read. +// +// This is true if d, the decimal representation, is between m- and m+, the +// upper and lower boundaries. d must be strictly between them if !is_even. +// m- := (numerator - delta_minus) / denominator +// m+ := (numerator + delta_plus) / denominator +// +// Precondition: 0 <= (numerator+delta_plus) / denominator < 10. +// If 1 <= (numerator+delta_plus) / denominator < 10 then no leading 0 digit +// will be produced. This should be the standard precondition. +static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus, + bool is_even, + Vector buffer, int* length) { + // Small optimization: if delta_minus and delta_plus are the same just reuse + // one of the two bignums. + if (Bignum::Equal(*delta_minus, *delta_plus)) { + delta_plus = delta_minus; + } + *length = 0; + for (;;) { + uint16_t digit; + digit = numerator->DivideModuloIntBignum(*denominator); + ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive. + // digit = numerator / denominator (integer division). + // numerator = numerator % denominator. + buffer[(*length)++] = static_cast(digit + '0'); + + // Can we stop already? + // If the remainder of the division is less than the distance to the lower + // boundary we can stop. In this case we simply round down (discarding the + // remainder). + // Similarly we test if we can round up (using the upper boundary). + bool in_delta_room_minus; + bool in_delta_room_plus; + if (is_even) { + in_delta_room_minus = Bignum::LessEqual(*numerator, *delta_minus); + } else { + in_delta_room_minus = Bignum::Less(*numerator, *delta_minus); + } + if (is_even) { + in_delta_room_plus = + Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0; + } else { + in_delta_room_plus = + Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0; + } + if (!in_delta_room_minus && !in_delta_room_plus) { + // Prepare for next iteration. + numerator->Times10(); + delta_minus->Times10(); + // We optimized delta_plus to be equal to delta_minus (if they share the + // same value). So don't multiply delta_plus if they point to the same + // object. + if (delta_minus != delta_plus) { + delta_plus->Times10(); + } + } else if (in_delta_room_minus && in_delta_room_plus) { + // Let's see if 2*numerator < denominator. + // If yes, then the next digit would be < 5 and we can round down. + int compare = Bignum::PlusCompare(*numerator, *numerator, *denominator); + if (compare < 0) { + // Remaining digits are less than .5. -> Round down (== do nothing). + } else if (compare > 0) { + // Remaining digits are more than .5 of denominator. -> Round up. + // Note that the last digit could not be a '9' as otherwise the whole + // loop would have stopped earlier. + // We still have an assert here in case the preconditions were not + // satisfied. + ASSERT(buffer[(*length) - 1] != '9'); + buffer[(*length) - 1]++; + } else { + // Halfway case. + // TODO(floitsch): need a way to solve half-way cases. + // For now let's round towards even (since this is what Gay seems to + // do). + + if ((buffer[(*length) - 1] - '0') % 2 == 0) { + // Round down => Do nothing. + } else { + ASSERT(buffer[(*length) - 1] != '9'); + buffer[(*length) - 1]++; + } + } + return; + } else if (in_delta_room_minus) { + // Round down (== do nothing). + return; + } else { // in_delta_room_plus + // Round up. + // Note again that the last digit could not be '9' since this would have + // stopped the loop earlier. + // We still have an ASSERT here, in case the preconditions were not + // satisfied. + ASSERT(buffer[(*length) -1] != '9'); + buffer[(*length) - 1]++; + return; + } + } +} + + +// Let v = numerator / denominator < 10. +// Then we generate 'count' digits of d = x.xxxxx... (without the decimal point) +// from left to right. Once 'count' digits have been produced we decide wether +// to round up or down. Remainders of exactly .5 round upwards. Numbers such +// as 9.999999 propagate a carry all the way, and change the +// exponent (decimal_point), when rounding upwards. +static void GenerateCountedDigits(int count, int* decimal_point, + Bignum* numerator, Bignum* denominator, + Vector buffer, int* length) { + ASSERT(count >= 0); + for (int i = 0; i < count - 1; ++i) { + uint16_t digit; + digit = numerator->DivideModuloIntBignum(*denominator); + ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive. + // digit = numerator / denominator (integer division). + // numerator = numerator % denominator. + buffer[i] = static_cast(digit + '0'); + // Prepare for next iteration. + numerator->Times10(); + } + // Generate the last digit. + uint16_t digit; + digit = numerator->DivideModuloIntBignum(*denominator); + if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) { + digit++; + } + ASSERT(digit <= 10); + buffer[count - 1] = static_cast(digit + '0'); + // Correct bad digits (in case we had a sequence of '9's). Propagate the + // carry until we hat a non-'9' or til we reach the first digit. + for (int i = count - 1; i > 0; --i) { + if (buffer[i] != '0' + 10) break; + buffer[i] = '0'; + buffer[i - 1]++; + } + if (buffer[0] == '0' + 10) { + // Propagate a carry past the top place. + buffer[0] = '1'; + (*decimal_point)++; + } + *length = count; +} + + +// Generates 'requested_digits' after the decimal point. It might omit +// trailing '0's. If the input number is too small then no digits at all are +// generated (ex.: 2 fixed digits for 0.00001). +// +// Input verifies: 1 <= (numerator + delta) / denominator < 10. +static void BignumToFixed(int requested_digits, int* decimal_point, + Bignum* numerator, Bignum* denominator, + Vector(buffer), int* length) { + // Note that we have to look at more than just the requested_digits, since + // a number could be rounded up. Example: v=0.5 with requested_digits=0. + // Even though the power of v equals 0 we can't just stop here. + if (-(*decimal_point) > requested_digits) { + // The number is definitively too small. + // Ex: 0.001 with requested_digits == 1. + // Set decimal-point to -requested_digits. This is what Gay does. + // Note that it should not have any effect anyways since the string is + // empty. + *decimal_point = -requested_digits; + *length = 0; + return; + } else if (-(*decimal_point) == requested_digits) { + // We only need to verify if the number rounds down or up. + // Ex: 0.04 and 0.06 with requested_digits == 1. + ASSERT(*decimal_point == -requested_digits); + // Initially the fraction lies in range (1, 10]. Multiply the denominator + // by 10 so that we can compare more easily. + denominator->Times10(); + if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) { + // If the fraction is >= 0.5 then we have to include the rounded + // digit. + buffer[0] = '1'; + *length = 1; + (*decimal_point)++; + } else { + // Note that we caught most of similar cases earlier. + *length = 0; + } + return; + } else { + // The requested digits correspond to the digits after the point. + // The variable 'needed_digits' includes the digits before the point. + int needed_digits = (*decimal_point) + requested_digits; + GenerateCountedDigits(needed_digits, decimal_point, + numerator, denominator, + buffer, length); + } +} + + +// Returns an estimation of k such that 10^(k-1) <= v < 10^k where +// v = f * 2^exponent and 2^52 <= f < 2^53. +// v is hence a normalized double with the given exponent. The output is an +// approximation for the exponent of the decimal approimation .digits * 10^k. +// +// The result might undershoot by 1 in which case 10^k <= v < 10^k+1. +// Note: this property holds for v's upper boundary m+ too. +// 10^k <= m+ < 10^k+1. +// (see explanation below). +// +// Examples: +// EstimatePower(0) => 16 +// EstimatePower(-52) => 0 +// +// Note: e >= 0 => EstimatedPower(e) > 0. No similar claim can be made for e<0. +static int EstimatePower(int exponent) { + // This function estimates log10 of v where v = f*2^e (with e == exponent). + // Note that 10^floor(log10(v)) <= v, but v <= 10^ceil(log10(v)). + // Note that f is bounded by its container size. Let p = 53 (the double's + // significand size). Then 2^(p-1) <= f < 2^p. + // + // Given that log10(v) == log2(v)/log2(10) and e+(len(f)-1) is quite close + // to log2(v) the function is simplified to (e+(len(f)-1)/log2(10)). + // The computed number undershoots by less than 0.631 (when we compute log3 + // and not log10). + // + // Optimization: since we only need an approximated result this computation + // can be performed on 64 bit integers. On x86/x64 architecture the speedup is + // not really measurable, though. + // + // Since we want to avoid overshooting we decrement by 1e10 so that + // floating-point imprecisions don't affect us. + // + // Explanation for v's boundary m+: the computation takes advantage of + // the fact that 2^(p-1) <= f < 2^p. Boundaries still satisfy this requirement + // (even for denormals where the delta can be much more important). + + const double k1Log10 = 0.30102999566398114; // 1/lg(10) + + // For doubles len(f) == 53 (don't forget the hidden bit). + const int kSignificandSize = Double::kSignificandSize; + double estimate = ceil((exponent + kSignificandSize - 1) * k1Log10 - 1e-10); + return static_cast(estimate); +} + + +// See comments for InitialScaledStartValues. +static void InitialScaledStartValuesPositiveExponent( + uint64_t significand, int exponent, + int estimated_power, bool need_boundary_deltas, + Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus) { + // A positive exponent implies a positive power. + ASSERT(estimated_power >= 0); + // Since the estimated_power is positive we simply multiply the denominator + // by 10^estimated_power. + + // numerator = v. + numerator->AssignUInt64(significand); + numerator->ShiftLeft(exponent); + // denominator = 10^estimated_power. + denominator->AssignPowerUInt16(10, estimated_power); + + if (need_boundary_deltas) { + // Introduce a common denominator so that the deltas to the boundaries are + // integers. + denominator->ShiftLeft(1); + numerator->ShiftLeft(1); + // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common + // denominator (of 2) delta_plus equals 2^e. + delta_plus->AssignUInt16(1); + delta_plus->ShiftLeft(exponent); + // Same for delta_minus. The adjustments if f == 2^p-1 are done later. + delta_minus->AssignUInt16(1); + delta_minus->ShiftLeft(exponent); + } +} + + +// See comments for InitialScaledStartValues +static void InitialScaledStartValuesNegativeExponentPositivePower( + uint64_t significand, int exponent, + int estimated_power, bool need_boundary_deltas, + Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus) { + // v = f * 2^e with e < 0, and with estimated_power >= 0. + // This means that e is close to 0 (have a look at how estimated_power is + // computed). + + // numerator = significand + // since v = significand * 2^exponent this is equivalent to + // numerator = v * / 2^-exponent + numerator->AssignUInt64(significand); + // denominator = 10^estimated_power * 2^-exponent (with exponent < 0) + denominator->AssignPowerUInt16(10, estimated_power); + denominator->ShiftLeft(-exponent); + + if (need_boundary_deltas) { + // Introduce a common denominator so that the deltas to the boundaries are + // integers. + denominator->ShiftLeft(1); + numerator->ShiftLeft(1); + // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common + // denominator (of 2) delta_plus equals 2^e. + // Given that the denominator already includes v's exponent the distance + // to the boundaries is simply 1. + delta_plus->AssignUInt16(1); + // Same for delta_minus. The adjustments if f == 2^p-1 are done later. + delta_minus->AssignUInt16(1); + } +} + + +// See comments for InitialScaledStartValues +static void InitialScaledStartValuesNegativeExponentNegativePower( + uint64_t significand, int exponent, + int estimated_power, bool need_boundary_deltas, + Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus) { + // Instead of multiplying the denominator with 10^estimated_power we + // multiply all values (numerator and deltas) by 10^-estimated_power. + + // Use numerator as temporary container for power_ten. + Bignum* power_ten = numerator; + power_ten->AssignPowerUInt16(10, -estimated_power); + + if (need_boundary_deltas) { + // Since power_ten == numerator we must make a copy of 10^estimated_power + // before we complete the computation of the numerator. + // delta_plus = delta_minus = 10^estimated_power + delta_plus->AssignBignum(*power_ten); + delta_minus->AssignBignum(*power_ten); + } + + // numerator = significand * 2 * 10^-estimated_power + // since v = significand * 2^exponent this is equivalent to + // numerator = v * 10^-estimated_power * 2 * 2^-exponent. + // Remember: numerator has been abused as power_ten. So no need to assign it + // to itself. + ASSERT(numerator == power_ten); + numerator->MultiplyByUInt64(significand); + + // denominator = 2 * 2^-exponent with exponent < 0. + denominator->AssignUInt16(1); + denominator->ShiftLeft(-exponent); + + if (need_boundary_deltas) { + // Introduce a common denominator so that the deltas to the boundaries are + // integers. + numerator->ShiftLeft(1); + denominator->ShiftLeft(1); + // With this shift the boundaries have their correct value, since + // delta_plus = 10^-estimated_power, and + // delta_minus = 10^-estimated_power. + // These assignments have been done earlier. + // The adjustments if f == 2^p-1 (lower boundary is closer) are done later. + } +} + + +// Let v = significand * 2^exponent. +// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator +// and denominator. The functions GenerateShortestDigits and +// GenerateCountedDigits will then convert this ratio to its decimal +// representation d, with the required accuracy. +// Then d * 10^estimated_power is the representation of v. +// (Note: the fraction and the estimated_power might get adjusted before +// generating the decimal representation.) +// +// The initial start values consist of: +// - a scaled numerator: s.t. numerator/denominator == v / 10^estimated_power. +// - a scaled (common) denominator. +// optionally (used by GenerateShortestDigits to decide if it has the shortest +// decimal converting back to v): +// - v - m-: the distance to the lower boundary. +// - m+ - v: the distance to the upper boundary. +// +// v, m+, m-, and therefore v - m- and m+ - v all share the same denominator. +// +// Let ep == estimated_power, then the returned values will satisfy: +// v / 10^ep = numerator / denominator. +// v's boundarys m- and m+: +// m- / 10^ep == v / 10^ep - delta_minus / denominator +// m+ / 10^ep == v / 10^ep + delta_plus / denominator +// Or in other words: +// m- == v - delta_minus * 10^ep / denominator; +// m+ == v + delta_plus * 10^ep / denominator; +// +// Since 10^(k-1) <= v < 10^k (with k == estimated_power) +// or 10^k <= v < 10^(k+1) +// we then have 0.1 <= numerator/denominator < 1 +// or 1 <= numerator/denominator < 10 +// +// It is then easy to kickstart the digit-generation routine. +// +// The boundary-deltas are only filled if the mode equals BIGNUM_DTOA_SHORTEST +// or BIGNUM_DTOA_SHORTEST_SINGLE. + +static void InitialScaledStartValues(uint64_t significand, + int exponent, + bool lower_boundary_is_closer, + int estimated_power, + bool need_boundary_deltas, + Bignum* numerator, + Bignum* denominator, + Bignum* delta_minus, + Bignum* delta_plus) { + if (exponent >= 0) { + InitialScaledStartValuesPositiveExponent( + significand, exponent, estimated_power, need_boundary_deltas, + numerator, denominator, delta_minus, delta_plus); + } else if (estimated_power >= 0) { + InitialScaledStartValuesNegativeExponentPositivePower( + significand, exponent, estimated_power, need_boundary_deltas, + numerator, denominator, delta_minus, delta_plus); + } else { + InitialScaledStartValuesNegativeExponentNegativePower( + significand, exponent, estimated_power, need_boundary_deltas, + numerator, denominator, delta_minus, delta_plus); + } + + if (need_boundary_deltas && lower_boundary_is_closer) { + // The lower boundary is closer at half the distance of "normal" numbers. + // Increase the common denominator and adapt all but the delta_minus. + denominator->ShiftLeft(1); // *2 + numerator->ShiftLeft(1); // *2 + delta_plus->ShiftLeft(1); // *2 + } +} + + +// This routine multiplies numerator/denominator so that its values lies in the +// range 1-10. That is after a call to this function we have: +// 1 <= (numerator + delta_plus) /denominator < 10. +// Let numerator the input before modification and numerator' the argument +// after modification, then the output-parameter decimal_point is such that +// numerator / denominator * 10^estimated_power == +// numerator' / denominator' * 10^(decimal_point - 1) +// In some cases estimated_power was too low, and this is already the case. We +// then simply adjust the power so that 10^(k-1) <= v < 10^k (with k == +// estimated_power) but do not touch the numerator or denominator. +// Otherwise the routine multiplies the numerator and the deltas by 10. +static void FixupMultiply10(int estimated_power, bool is_even, + int* decimal_point, + Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus) { + bool in_range; + if (is_even) { + // For IEEE doubles half-way cases (in decimal system numbers ending with 5) + // are rounded to the closest floating-point number with even significand. + in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0; + } else { + in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0; + } + if (in_range) { + // Since numerator + delta_plus >= denominator we already have + // 1 <= numerator/denominator < 10. Simply update the estimated_power. + *decimal_point = estimated_power + 1; + } else { + *decimal_point = estimated_power; + numerator->Times10(); + if (Bignum::Equal(*delta_minus, *delta_plus)) { + delta_minus->Times10(); + delta_plus->AssignBignum(*delta_minus); + } else { + delta_minus->Times10(); + delta_plus->Times10(); + } + } +} + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-bignum-dtoa.h b/deps/icu-small/source/i18n/double-conversion-bignum-dtoa.h new file mode 100644 index 00000000000000..edc21b0f2e4d5e --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-bignum-dtoa.h @@ -0,0 +1,102 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#ifndef DOUBLE_CONVERSION_BIGNUM_DTOA_H_ +#define DOUBLE_CONVERSION_BIGNUM_DTOA_H_ + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-utils.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +enum BignumDtoaMode { + // Return the shortest correct representation. + // For example the output of 0.299999999999999988897 is (the less accurate but + // correct) 0.3. + BIGNUM_DTOA_SHORTEST, + // Same as BIGNUM_DTOA_SHORTEST but for single-precision floats. + BIGNUM_DTOA_SHORTEST_SINGLE, + // Return a fixed number of digits after the decimal point. + // For instance fixed(0.1, 4) becomes 0.1000 + // If the input number is big, the output will be big. + BIGNUM_DTOA_FIXED, + // Return a fixed number of digits, no matter what the exponent is. + BIGNUM_DTOA_PRECISION +}; + +// Converts the given double 'v' to ascii. +// The result should be interpreted as buffer * 10^(point-length). +// The buffer will be null-terminated. +// +// The input v must be > 0 and different from NaN, and Infinity. +// +// The output depends on the given mode: +// - SHORTEST: produce the least amount of digits for which the internal +// identity requirement is still satisfied. If the digits are printed +// (together with the correct exponent) then reading this number will give +// 'v' again. The buffer will choose the representation that is closest to +// 'v'. If there are two at the same distance, than the number is round up. +// In this mode the 'requested_digits' parameter is ignored. +// - FIXED: produces digits necessary to print a given number with +// 'requested_digits' digits after the decimal point. The produced digits +// might be too short in which case the caller has to fill the gaps with '0's. +// Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2. +// Halfway cases are rounded up. The call toFixed(0.15, 2) thus returns +// buffer="2", point=0. +// Note: the length of the returned buffer has no meaning wrt the significance +// of its digits. That is, just because it contains '0's does not mean that +// any other digit would not satisfy the internal identity requirement. +// - PRECISION: produces 'requested_digits' where the first digit is not '0'. +// Even though the length of produced digits usually equals +// 'requested_digits', the function is allowed to return fewer digits, in +// which case the caller has to fill the missing digits with '0's. +// Halfway cases are again rounded up. +// 'BignumDtoa' expects the given buffer to be big enough to hold all digits +// and a terminating null-character. +void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits, + Vector buffer, int* length, int* point); + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END + +#endif // DOUBLE_CONVERSION_BIGNUM_DTOA_H_ +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-bignum.cpp b/deps/icu-small/source/i18n/double-conversion-bignum.cpp new file mode 100644 index 00000000000000..d5682af35f866d --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-bignum.cpp @@ -0,0 +1,784 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-bignum.h" +#include "double-conversion-utils.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +Bignum::Bignum() + : bigits_(bigits_buffer_, kBigitCapacity), used_digits_(0), exponent_(0) { + for (int i = 0; i < kBigitCapacity; ++i) { + bigits_[i] = 0; + } +} + + +template +static int BitSize(S value) { + (void) value; // Mark variable as used. + return 8 * sizeof(value); +} + +// Guaranteed to lie in one Bigit. +void Bignum::AssignUInt16(uint16_t value) { + ASSERT(kBigitSize >= BitSize(value)); + Zero(); + if (value == 0) return; + + EnsureCapacity(1); + bigits_[0] = value; + used_digits_ = 1; +} + + +void Bignum::AssignUInt64(uint64_t value) { + const int kUInt64Size = 64; + + Zero(); + if (value == 0) return; + + int needed_bigits = kUInt64Size / kBigitSize + 1; + EnsureCapacity(needed_bigits); + for (int i = 0; i < needed_bigits; ++i) { + bigits_[i] = value & kBigitMask; + value = value >> kBigitSize; + } + used_digits_ = needed_bigits; + Clamp(); +} + + +void Bignum::AssignBignum(const Bignum& other) { + exponent_ = other.exponent_; + for (int i = 0; i < other.used_digits_; ++i) { + bigits_[i] = other.bigits_[i]; + } + // Clear the excess digits (if there were any). + for (int i = other.used_digits_; i < used_digits_; ++i) { + bigits_[i] = 0; + } + used_digits_ = other.used_digits_; +} + + +static uint64_t ReadUInt64(Vector buffer, + int from, + int digits_to_read) { + uint64_t result = 0; + for (int i = from; i < from + digits_to_read; ++i) { + int digit = buffer[i] - '0'; + ASSERT(0 <= digit && digit <= 9); + result = result * 10 + digit; + } + return result; +} + + +void Bignum::AssignDecimalString(Vector value) { + // 2^64 = 18446744073709551616 > 10^19 + const int kMaxUint64DecimalDigits = 19; + Zero(); + int length = value.length(); + unsigned int pos = 0; + // Let's just say that each digit needs 4 bits. + while (length >= kMaxUint64DecimalDigits) { + uint64_t digits = ReadUInt64(value, pos, kMaxUint64DecimalDigits); + pos += kMaxUint64DecimalDigits; + length -= kMaxUint64DecimalDigits; + MultiplyByPowerOfTen(kMaxUint64DecimalDigits); + AddUInt64(digits); + } + uint64_t digits = ReadUInt64(value, pos, length); + MultiplyByPowerOfTen(length); + AddUInt64(digits); + Clamp(); +} + + +static int HexCharValue(char c) { + if ('0' <= c && c <= '9') return c - '0'; + if ('a' <= c && c <= 'f') return 10 + c - 'a'; + ASSERT('A' <= c && c <= 'F'); + return 10 + c - 'A'; +} + + +void Bignum::AssignHexString(Vector value) { + Zero(); + int length = value.length(); + + int needed_bigits = length * 4 / kBigitSize + 1; + EnsureCapacity(needed_bigits); + int string_index = length - 1; + for (int i = 0; i < needed_bigits - 1; ++i) { + // These bigits are guaranteed to be "full". + Chunk current_bigit = 0; + for (int j = 0; j < kBigitSize / 4; j++) { + current_bigit += HexCharValue(value[string_index--]) << (j * 4); + } + bigits_[i] = current_bigit; + } + used_digits_ = needed_bigits - 1; + + Chunk most_significant_bigit = 0; // Could be = 0; + for (int j = 0; j <= string_index; ++j) { + most_significant_bigit <<= 4; + most_significant_bigit += HexCharValue(value[j]); + } + if (most_significant_bigit != 0) { + bigits_[used_digits_] = most_significant_bigit; + used_digits_++; + } + Clamp(); +} + + +void Bignum::AddUInt64(uint64_t operand) { + if (operand == 0) return; + Bignum other; + other.AssignUInt64(operand); + AddBignum(other); +} + + +void Bignum::AddBignum(const Bignum& other) { + ASSERT(IsClamped()); + ASSERT(other.IsClamped()); + + // If this has a greater exponent than other append zero-bigits to this. + // After this call exponent_ <= other.exponent_. + Align(other); + + // There are two possibilities: + // aaaaaaaaaaa 0000 (where the 0s represent a's exponent) + // bbbbb 00000000 + // ---------------- + // ccccccccccc 0000 + // or + // aaaaaaaaaa 0000 + // bbbbbbbbb 0000000 + // ----------------- + // cccccccccccc 0000 + // In both cases we might need a carry bigit. + + EnsureCapacity(1 + Max(BigitLength(), other.BigitLength()) - exponent_); + Chunk carry = 0; + int bigit_pos = other.exponent_ - exponent_; + ASSERT(bigit_pos >= 0); + for (int i = 0; i < other.used_digits_; ++i) { + Chunk sum = bigits_[bigit_pos] + other.bigits_[i] + carry; + bigits_[bigit_pos] = sum & kBigitMask; + carry = sum >> kBigitSize; + bigit_pos++; + } + + while (carry != 0) { + Chunk sum = bigits_[bigit_pos] + carry; + bigits_[bigit_pos] = sum & kBigitMask; + carry = sum >> kBigitSize; + bigit_pos++; + } + used_digits_ = Max(bigit_pos, used_digits_); + ASSERT(IsClamped()); +} + + +void Bignum::SubtractBignum(const Bignum& other) { + ASSERT(IsClamped()); + ASSERT(other.IsClamped()); + // We require this to be bigger than other. + ASSERT(LessEqual(other, *this)); + + Align(other); + + int offset = other.exponent_ - exponent_; + Chunk borrow = 0; + int i; + for (i = 0; i < other.used_digits_; ++i) { + ASSERT((borrow == 0) || (borrow == 1)); + Chunk difference = bigits_[i + offset] - other.bigits_[i] - borrow; + bigits_[i + offset] = difference & kBigitMask; + borrow = difference >> (kChunkSize - 1); + } + while (borrow != 0) { + Chunk difference = bigits_[i + offset] - borrow; + bigits_[i + offset] = difference & kBigitMask; + borrow = difference >> (kChunkSize - 1); + ++i; + } + Clamp(); +} + + +void Bignum::ShiftLeft(int shift_amount) { + if (used_digits_ == 0) return; + exponent_ += shift_amount / kBigitSize; + int local_shift = shift_amount % kBigitSize; + EnsureCapacity(used_digits_ + 1); + BigitsShiftLeft(local_shift); +} + + +void Bignum::MultiplyByUInt32(uint32_t factor) { + if (factor == 1) return; + if (factor == 0) { + Zero(); + return; + } + if (used_digits_ == 0) return; + + // The product of a bigit with the factor is of size kBigitSize + 32. + // Assert that this number + 1 (for the carry) fits into double chunk. + ASSERT(kDoubleChunkSize >= kBigitSize + 32 + 1); + DoubleChunk carry = 0; + for (int i = 0; i < used_digits_; ++i) { + DoubleChunk product = static_cast(factor) * bigits_[i] + carry; + bigits_[i] = static_cast(product & kBigitMask); + carry = (product >> kBigitSize); + } + while (carry != 0) { + EnsureCapacity(used_digits_ + 1); + bigits_[used_digits_] = carry & kBigitMask; + used_digits_++; + carry >>= kBigitSize; + } +} + + +void Bignum::MultiplyByUInt64(uint64_t factor) { + if (factor == 1) return; + if (factor == 0) { + Zero(); + return; + } + ASSERT(kBigitSize < 32); + uint64_t carry = 0; + uint64_t low = factor & 0xFFFFFFFF; + uint64_t high = factor >> 32; + for (int i = 0; i < used_digits_; ++i) { + uint64_t product_low = low * bigits_[i]; + uint64_t product_high = high * bigits_[i]; + uint64_t tmp = (carry & kBigitMask) + product_low; + bigits_[i] = tmp & kBigitMask; + carry = (carry >> kBigitSize) + (tmp >> kBigitSize) + + (product_high << (32 - kBigitSize)); + } + while (carry != 0) { + EnsureCapacity(used_digits_ + 1); + bigits_[used_digits_] = carry & kBigitMask; + used_digits_++; + carry >>= kBigitSize; + } +} + + +void Bignum::MultiplyByPowerOfTen(int exponent) { + const uint64_t kFive27 = UINT64_2PART_C(0x6765c793, fa10079d); + const uint16_t kFive1 = 5; + const uint16_t kFive2 = kFive1 * 5; + const uint16_t kFive3 = kFive2 * 5; + const uint16_t kFive4 = kFive3 * 5; + const uint16_t kFive5 = kFive4 * 5; + const uint16_t kFive6 = kFive5 * 5; + const uint32_t kFive7 = kFive6 * 5; + const uint32_t kFive8 = kFive7 * 5; + const uint32_t kFive9 = kFive8 * 5; + const uint32_t kFive10 = kFive9 * 5; + const uint32_t kFive11 = kFive10 * 5; + const uint32_t kFive12 = kFive11 * 5; + const uint32_t kFive13 = kFive12 * 5; + const uint32_t kFive1_to_12[] = + { kFive1, kFive2, kFive3, kFive4, kFive5, kFive6, + kFive7, kFive8, kFive9, kFive10, kFive11, kFive12 }; + + ASSERT(exponent >= 0); + if (exponent == 0) return; + if (used_digits_ == 0) return; + + // We shift by exponent at the end just before returning. + int remaining_exponent = exponent; + while (remaining_exponent >= 27) { + MultiplyByUInt64(kFive27); + remaining_exponent -= 27; + } + while (remaining_exponent >= 13) { + MultiplyByUInt32(kFive13); + remaining_exponent -= 13; + } + if (remaining_exponent > 0) { + MultiplyByUInt32(kFive1_to_12[remaining_exponent - 1]); + } + ShiftLeft(exponent); +} + + +void Bignum::Square() { + ASSERT(IsClamped()); + int product_length = 2 * used_digits_; + EnsureCapacity(product_length); + + // Comba multiplication: compute each column separately. + // Example: r = a2a1a0 * b2b1b0. + // r = 1 * a0b0 + + // 10 * (a1b0 + a0b1) + + // 100 * (a2b0 + a1b1 + a0b2) + + // 1000 * (a2b1 + a1b2) + + // 10000 * a2b2 + // + // In the worst case we have to accumulate nb-digits products of digit*digit. + // + // Assert that the additional number of bits in a DoubleChunk are enough to + // sum up used_digits of Bigit*Bigit. + if ((1 << (2 * (kChunkSize - kBigitSize))) <= used_digits_) { + UNIMPLEMENTED(); + } + DoubleChunk accumulator = 0; + // First shift the digits so we don't overwrite them. + int copy_offset = used_digits_; + for (int i = 0; i < used_digits_; ++i) { + bigits_[copy_offset + i] = bigits_[i]; + } + // We have two loops to avoid some 'if's in the loop. + for (int i = 0; i < used_digits_; ++i) { + // Process temporary digit i with power i. + // The sum of the two indices must be equal to i. + int bigit_index1 = i; + int bigit_index2 = 0; + // Sum all of the sub-products. + while (bigit_index1 >= 0) { + Chunk chunk1 = bigits_[copy_offset + bigit_index1]; + Chunk chunk2 = bigits_[copy_offset + bigit_index2]; + accumulator += static_cast(chunk1) * chunk2; + bigit_index1--; + bigit_index2++; + } + bigits_[i] = static_cast(accumulator) & kBigitMask; + accumulator >>= kBigitSize; + } + for (int i = used_digits_; i < product_length; ++i) { + int bigit_index1 = used_digits_ - 1; + int bigit_index2 = i - bigit_index1; + // Invariant: sum of both indices is again equal to i. + // Inner loop runs 0 times on last iteration, emptying accumulator. + while (bigit_index2 < used_digits_) { + Chunk chunk1 = bigits_[copy_offset + bigit_index1]; + Chunk chunk2 = bigits_[copy_offset + bigit_index2]; + accumulator += static_cast(chunk1) * chunk2; + bigit_index1--; + bigit_index2++; + } + // The overwritten bigits_[i] will never be read in further loop iterations, + // because bigit_index1 and bigit_index2 are always greater + // than i - used_digits_. + bigits_[i] = static_cast(accumulator) & kBigitMask; + accumulator >>= kBigitSize; + } + // Since the result was guaranteed to lie inside the number the + // accumulator must be 0 now. + ASSERT(accumulator == 0); + + // Don't forget to update the used_digits and the exponent. + used_digits_ = product_length; + exponent_ *= 2; + Clamp(); +} + + +void Bignum::AssignPowerUInt16(uint16_t base, int power_exponent) { + ASSERT(base != 0); + ASSERT(power_exponent >= 0); + if (power_exponent == 0) { + AssignUInt16(1); + return; + } + Zero(); + int shifts = 0; + // We expect base to be in range 2-32, and most often to be 10. + // It does not make much sense to implement different algorithms for counting + // the bits. + while ((base & 1) == 0) { + base >>= 1; + shifts++; + } + int bit_size = 0; + int tmp_base = base; + while (tmp_base != 0) { + tmp_base >>= 1; + bit_size++; + } + int final_size = bit_size * power_exponent; + // 1 extra bigit for the shifting, and one for rounded final_size. + EnsureCapacity(final_size / kBigitSize + 2); + + // Left to Right exponentiation. + int mask = 1; + while (power_exponent >= mask) mask <<= 1; + + // The mask is now pointing to the bit above the most significant 1-bit of + // power_exponent. + // Get rid of first 1-bit; + mask >>= 2; + uint64_t this_value = base; + + bool delayed_multipliciation = false; + const uint64_t max_32bits = 0xFFFFFFFF; + while (mask != 0 && this_value <= max_32bits) { + this_value = this_value * this_value; + // Verify that there is enough space in this_value to perform the + // multiplication. The first bit_size bits must be 0. + if ((power_exponent & mask) != 0) { + uint64_t base_bits_mask = + ~((static_cast(1) << (64 - bit_size)) - 1); + bool high_bits_zero = (this_value & base_bits_mask) == 0; + if (high_bits_zero) { + this_value *= base; + } else { + delayed_multipliciation = true; + } + } + mask >>= 1; + } + AssignUInt64(this_value); + if (delayed_multipliciation) { + MultiplyByUInt32(base); + } + + // Now do the same thing as a bignum. + while (mask != 0) { + Square(); + if ((power_exponent & mask) != 0) { + MultiplyByUInt32(base); + } + mask >>= 1; + } + + // And finally add the saved shifts. + ShiftLeft(shifts * power_exponent); +} + + +// Precondition: this/other < 16bit. +uint16_t Bignum::DivideModuloIntBignum(const Bignum& other) { + ASSERT(IsClamped()); + ASSERT(other.IsClamped()); + ASSERT(other.used_digits_ > 0); + + // Easy case: if we have less digits than the divisor than the result is 0. + // Note: this handles the case where this == 0, too. + if (BigitLength() < other.BigitLength()) { + return 0; + } + + Align(other); + + uint16_t result = 0; + + // Start by removing multiples of 'other' until both numbers have the same + // number of digits. + while (BigitLength() > other.BigitLength()) { + // This naive approach is extremely inefficient if `this` divided by other + // is big. This function is implemented for doubleToString where + // the result should be small (less than 10). + ASSERT(other.bigits_[other.used_digits_ - 1] >= ((1 << kBigitSize) / 16)); + ASSERT(bigits_[used_digits_ - 1] < 0x10000); + // Remove the multiples of the first digit. + // Example this = 23 and other equals 9. -> Remove 2 multiples. + result += static_cast(bigits_[used_digits_ - 1]); + SubtractTimes(other, bigits_[used_digits_ - 1]); + } + + ASSERT(BigitLength() == other.BigitLength()); + + // Both bignums are at the same length now. + // Since other has more than 0 digits we know that the access to + // bigits_[used_digits_ - 1] is safe. + Chunk this_bigit = bigits_[used_digits_ - 1]; + Chunk other_bigit = other.bigits_[other.used_digits_ - 1]; + + if (other.used_digits_ == 1) { + // Shortcut for easy (and common) case. + int quotient = this_bigit / other_bigit; + bigits_[used_digits_ - 1] = this_bigit - other_bigit * quotient; + ASSERT(quotient < 0x10000); + result += static_cast(quotient); + Clamp(); + return result; + } + + int division_estimate = this_bigit / (other_bigit + 1); + ASSERT(division_estimate < 0x10000); + result += static_cast(division_estimate); + SubtractTimes(other, division_estimate); + + if (other_bigit * (division_estimate + 1) > this_bigit) { + // No need to even try to subtract. Even if other's remaining digits were 0 + // another subtraction would be too much. + return result; + } + + while (LessEqual(other, *this)) { + SubtractBignum(other); + result++; + } + return result; +} + + +template +static int SizeInHexChars(S number) { + ASSERT(number > 0); + int result = 0; + while (number != 0) { + number >>= 4; + result++; + } + return result; +} + + +static char HexCharOfValue(int value) { + ASSERT(0 <= value && value <= 16); + if (value < 10) return static_cast(value + '0'); + return static_cast(value - 10 + 'A'); +} + + +bool Bignum::ToHexString(char* buffer, int buffer_size) const { + ASSERT(IsClamped()); + // Each bigit must be printable as separate hex-character. + ASSERT(kBigitSize % 4 == 0); + const int kHexCharsPerBigit = kBigitSize / 4; + + if (used_digits_ == 0) { + if (buffer_size < 2) return false; + buffer[0] = '0'; + buffer[1] = '\0'; + return true; + } + // We add 1 for the terminating '\0' character. + int needed_chars = (BigitLength() - 1) * kHexCharsPerBigit + + SizeInHexChars(bigits_[used_digits_ - 1]) + 1; + if (needed_chars > buffer_size) return false; + int string_index = needed_chars - 1; + buffer[string_index--] = '\0'; + for (int i = 0; i < exponent_; ++i) { + for (int j = 0; j < kHexCharsPerBigit; ++j) { + buffer[string_index--] = '0'; + } + } + for (int i = 0; i < used_digits_ - 1; ++i) { + Chunk current_bigit = bigits_[i]; + for (int j = 0; j < kHexCharsPerBigit; ++j) { + buffer[string_index--] = HexCharOfValue(current_bigit & 0xF); + current_bigit >>= 4; + } + } + // And finally the last bigit. + Chunk most_significant_bigit = bigits_[used_digits_ - 1]; + while (most_significant_bigit != 0) { + buffer[string_index--] = HexCharOfValue(most_significant_bigit & 0xF); + most_significant_bigit >>= 4; + } + return true; +} + + +Bignum::Chunk Bignum::BigitAt(int index) const { + if (index >= BigitLength()) return 0; + if (index < exponent_) return 0; + return bigits_[index - exponent_]; +} + + +int Bignum::Compare(const Bignum& a, const Bignum& b) { + ASSERT(a.IsClamped()); + ASSERT(b.IsClamped()); + int bigit_length_a = a.BigitLength(); + int bigit_length_b = b.BigitLength(); + if (bigit_length_a < bigit_length_b) return -1; + if (bigit_length_a > bigit_length_b) return +1; + for (int i = bigit_length_a - 1; i >= Min(a.exponent_, b.exponent_); --i) { + Chunk bigit_a = a.BigitAt(i); + Chunk bigit_b = b.BigitAt(i); + if (bigit_a < bigit_b) return -1; + if (bigit_a > bigit_b) return +1; + // Otherwise they are equal up to this digit. Try the next digit. + } + return 0; +} + + +int Bignum::PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c) { + ASSERT(a.IsClamped()); + ASSERT(b.IsClamped()); + ASSERT(c.IsClamped()); + if (a.BigitLength() < b.BigitLength()) { + return PlusCompare(b, a, c); + } + if (a.BigitLength() + 1 < c.BigitLength()) return -1; + if (a.BigitLength() > c.BigitLength()) return +1; + // The exponent encodes 0-bigits. So if there are more 0-digits in 'a' than + // 'b' has digits, then the bigit-length of 'a'+'b' must be equal to the one + // of 'a'. + if (a.exponent_ >= b.BigitLength() && a.BigitLength() < c.BigitLength()) { + return -1; + } + + Chunk borrow = 0; + // Starting at min_exponent all digits are == 0. So no need to compare them. + int min_exponent = Min(Min(a.exponent_, b.exponent_), c.exponent_); + for (int i = c.BigitLength() - 1; i >= min_exponent; --i) { + Chunk chunk_a = a.BigitAt(i); + Chunk chunk_b = b.BigitAt(i); + Chunk chunk_c = c.BigitAt(i); + Chunk sum = chunk_a + chunk_b; + if (sum > chunk_c + borrow) { + return +1; + } else { + borrow = chunk_c + borrow - sum; + if (borrow > 1) return -1; + borrow <<= kBigitSize; + } + } + if (borrow == 0) return 0; + return -1; +} + + +void Bignum::Clamp() { + while (used_digits_ > 0 && bigits_[used_digits_ - 1] == 0) { + used_digits_--; + } + if (used_digits_ == 0) { + // Zero. + exponent_ = 0; + } +} + + +bool Bignum::IsClamped() const { + return used_digits_ == 0 || bigits_[used_digits_ - 1] != 0; +} + + +void Bignum::Zero() { + for (int i = 0; i < used_digits_; ++i) { + bigits_[i] = 0; + } + used_digits_ = 0; + exponent_ = 0; +} + + +void Bignum::Align(const Bignum& other) { + if (exponent_ > other.exponent_) { + // If "X" represents a "hidden" digit (by the exponent) then we are in the + // following case (a == this, b == other): + // a: aaaaaaXXXX or a: aaaaaXXX + // b: bbbbbbX b: bbbbbbbbXX + // We replace some of the hidden digits (X) of a with 0 digits. + // a: aaaaaa000X or a: aaaaa0XX + int zero_digits = exponent_ - other.exponent_; + EnsureCapacity(used_digits_ + zero_digits); + for (int i = used_digits_ - 1; i >= 0; --i) { + bigits_[i + zero_digits] = bigits_[i]; + } + for (int i = 0; i < zero_digits; ++i) { + bigits_[i] = 0; + } + used_digits_ += zero_digits; + exponent_ -= zero_digits; + ASSERT(used_digits_ >= 0); + ASSERT(exponent_ >= 0); + } +} + + +void Bignum::BigitsShiftLeft(int shift_amount) { + ASSERT(shift_amount < kBigitSize); + ASSERT(shift_amount >= 0); + Chunk carry = 0; + for (int i = 0; i < used_digits_; ++i) { + Chunk new_carry = bigits_[i] >> (kBigitSize - shift_amount); + bigits_[i] = ((bigits_[i] << shift_amount) + carry) & kBigitMask; + carry = new_carry; + } + if (carry != 0) { + bigits_[used_digits_] = carry; + used_digits_++; + } +} + + +void Bignum::SubtractTimes(const Bignum& other, int factor) { + ASSERT(exponent_ <= other.exponent_); + if (factor < 3) { + for (int i = 0; i < factor; ++i) { + SubtractBignum(other); + } + return; + } + Chunk borrow = 0; + int exponent_diff = other.exponent_ - exponent_; + for (int i = 0; i < other.used_digits_; ++i) { + DoubleChunk product = static_cast(factor) * other.bigits_[i]; + DoubleChunk remove = borrow + product; + Chunk difference = bigits_[i + exponent_diff] - (remove & kBigitMask); + bigits_[i + exponent_diff] = difference & kBigitMask; + borrow = static_cast((difference >> (kChunkSize - 1)) + + (remove >> kBigitSize)); + } + for (int i = other.used_digits_ + exponent_diff; i < used_digits_; ++i) { + if (borrow == 0) return; + Chunk difference = bigits_[i] - borrow; + bigits_[i] = difference & kBigitMask; + borrow = difference >> (kChunkSize - 1); + } + Clamp(); +} + + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-bignum.h b/deps/icu-small/source/i18n/double-conversion-bignum.h new file mode 100644 index 00000000000000..d1af3bf5e77b15 --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-bignum.h @@ -0,0 +1,162 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#ifndef DOUBLE_CONVERSION_BIGNUM_H_ +#define DOUBLE_CONVERSION_BIGNUM_H_ + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-utils.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +class Bignum { + public: + // 3584 = 128 * 28. We can represent 2^3584 > 10^1000 accurately. + // This bignum can encode much bigger numbers, since it contains an + // exponent. + static const int kMaxSignificantBits = 3584; + + Bignum(); + void AssignUInt16(uint16_t value); + void AssignUInt64(uint64_t value); + void AssignBignum(const Bignum& other); + + void AssignDecimalString(Vector value); + void AssignHexString(Vector value); + + void AssignPowerUInt16(uint16_t base, int exponent); + + void AddUInt64(uint64_t operand); + void AddBignum(const Bignum& other); + // Precondition: this >= other. + void SubtractBignum(const Bignum& other); + + void Square(); + void ShiftLeft(int shift_amount); + void MultiplyByUInt32(uint32_t factor); + void MultiplyByUInt64(uint64_t factor); + void MultiplyByPowerOfTen(int exponent); + void Times10() { return MultiplyByUInt32(10); } + // Pseudocode: + // int result = this / other; + // this = this % other; + // In the worst case this function is in O(this/other). + uint16_t DivideModuloIntBignum(const Bignum& other); + + bool ToHexString(char* buffer, int buffer_size) const; + + // Returns + // -1 if a < b, + // 0 if a == b, and + // +1 if a > b. + static int Compare(const Bignum& a, const Bignum& b); + static bool Equal(const Bignum& a, const Bignum& b) { + return Compare(a, b) == 0; + } + static bool LessEqual(const Bignum& a, const Bignum& b) { + return Compare(a, b) <= 0; + } + static bool Less(const Bignum& a, const Bignum& b) { + return Compare(a, b) < 0; + } + // Returns Compare(a + b, c); + static int PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c); + // Returns a + b == c + static bool PlusEqual(const Bignum& a, const Bignum& b, const Bignum& c) { + return PlusCompare(a, b, c) == 0; + } + // Returns a + b <= c + static bool PlusLessEqual(const Bignum& a, const Bignum& b, const Bignum& c) { + return PlusCompare(a, b, c) <= 0; + } + // Returns a + b < c + static bool PlusLess(const Bignum& a, const Bignum& b, const Bignum& c) { + return PlusCompare(a, b, c) < 0; + } + private: + typedef uint32_t Chunk; + typedef uint64_t DoubleChunk; + + static const int kChunkSize = sizeof(Chunk) * 8; + static const int kDoubleChunkSize = sizeof(DoubleChunk) * 8; + // With bigit size of 28 we loose some bits, but a double still fits easily + // into two chunks, and more importantly we can use the Comba multiplication. + static const int kBigitSize = 28; + static const Chunk kBigitMask = (1 << kBigitSize) - 1; + // Every instance allocates kBigitLength chunks on the stack. Bignums cannot + // grow. There are no checks if the stack-allocated space is sufficient. + static const int kBigitCapacity = kMaxSignificantBits / kBigitSize; + + void EnsureCapacity(int size) { + if (size > kBigitCapacity) { + UNREACHABLE(); + } + } + void Align(const Bignum& other); + void Clamp(); + bool IsClamped() const; + void Zero(); + // Requires this to have enough capacity (no tests done). + // Updates used_digits_ if necessary. + // shift_amount must be < kBigitSize. + void BigitsShiftLeft(int shift_amount); + // BigitLength includes the "hidden" digits encoded in the exponent. + int BigitLength() const { return used_digits_ + exponent_; } + Chunk BigitAt(int index) const; + void SubtractTimes(const Bignum& other, int factor); + + Chunk bigits_buffer_[kBigitCapacity]; + // A vector backed by bigits_buffer_. This way accesses to the array are + // checked for out-of-bounds errors. + Vector bigits_; + int used_digits_; + // The Bignum's value equals value(bigits_) * 2^(exponent_ * kBigitSize). + int exponent_; + + DISALLOW_COPY_AND_ASSIGN(Bignum); +}; + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END + +#endif // DOUBLE_CONVERSION_BIGNUM_H_ +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-cached-powers.cpp b/deps/icu-small/source/i18n/double-conversion-cached-powers.cpp new file mode 100644 index 00000000000000..e49700444c6b48 --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-cached-powers.cpp @@ -0,0 +1,193 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2006-2008 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#include +#include +#include + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-utils.h" + +#include "double-conversion-cached-powers.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +struct CachedPower { + uint64_t significand; + int16_t binary_exponent; + int16_t decimal_exponent; +}; + +static const CachedPower kCachedPowers[] = { + {UINT64_2PART_C(0xfa8fd5a0, 081c0288), -1220, -348}, + {UINT64_2PART_C(0xbaaee17f, a23ebf76), -1193, -340}, + {UINT64_2PART_C(0x8b16fb20, 3055ac76), -1166, -332}, + {UINT64_2PART_C(0xcf42894a, 5dce35ea), -1140, -324}, + {UINT64_2PART_C(0x9a6bb0aa, 55653b2d), -1113, -316}, + {UINT64_2PART_C(0xe61acf03, 3d1a45df), -1087, -308}, + {UINT64_2PART_C(0xab70fe17, c79ac6ca), -1060, -300}, + {UINT64_2PART_C(0xff77b1fc, bebcdc4f), -1034, -292}, + {UINT64_2PART_C(0xbe5691ef, 416bd60c), -1007, -284}, + {UINT64_2PART_C(0x8dd01fad, 907ffc3c), -980, -276}, + {UINT64_2PART_C(0xd3515c28, 31559a83), -954, -268}, + {UINT64_2PART_C(0x9d71ac8f, ada6c9b5), -927, -260}, + {UINT64_2PART_C(0xea9c2277, 23ee8bcb), -901, -252}, + {UINT64_2PART_C(0xaecc4991, 4078536d), -874, -244}, + {UINT64_2PART_C(0x823c1279, 5db6ce57), -847, -236}, + {UINT64_2PART_C(0xc2109436, 4dfb5637), -821, -228}, + {UINT64_2PART_C(0x9096ea6f, 3848984f), -794, -220}, + {UINT64_2PART_C(0xd77485cb, 25823ac7), -768, -212}, + {UINT64_2PART_C(0xa086cfcd, 97bf97f4), -741, -204}, + {UINT64_2PART_C(0xef340a98, 172aace5), -715, -196}, + {UINT64_2PART_C(0xb23867fb, 2a35b28e), -688, -188}, + {UINT64_2PART_C(0x84c8d4df, d2c63f3b), -661, -180}, + {UINT64_2PART_C(0xc5dd4427, 1ad3cdba), -635, -172}, + {UINT64_2PART_C(0x936b9fce, bb25c996), -608, -164}, + {UINT64_2PART_C(0xdbac6c24, 7d62a584), -582, -156}, + {UINT64_2PART_C(0xa3ab6658, 0d5fdaf6), -555, -148}, + {UINT64_2PART_C(0xf3e2f893, dec3f126), -529, -140}, + {UINT64_2PART_C(0xb5b5ada8, aaff80b8), -502, -132}, + {UINT64_2PART_C(0x87625f05, 6c7c4a8b), -475, -124}, + {UINT64_2PART_C(0xc9bcff60, 34c13053), -449, -116}, + {UINT64_2PART_C(0x964e858c, 91ba2655), -422, -108}, + {UINT64_2PART_C(0xdff97724, 70297ebd), -396, -100}, + {UINT64_2PART_C(0xa6dfbd9f, b8e5b88f), -369, -92}, + {UINT64_2PART_C(0xf8a95fcf, 88747d94), -343, -84}, + {UINT64_2PART_C(0xb9447093, 8fa89bcf), -316, -76}, + {UINT64_2PART_C(0x8a08f0f8, bf0f156b), -289, -68}, + {UINT64_2PART_C(0xcdb02555, 653131b6), -263, -60}, + {UINT64_2PART_C(0x993fe2c6, d07b7fac), -236, -52}, + {UINT64_2PART_C(0xe45c10c4, 2a2b3b06), -210, -44}, + {UINT64_2PART_C(0xaa242499, 697392d3), -183, -36}, + {UINT64_2PART_C(0xfd87b5f2, 8300ca0e), -157, -28}, + {UINT64_2PART_C(0xbce50864, 92111aeb), -130, -20}, + {UINT64_2PART_C(0x8cbccc09, 6f5088cc), -103, -12}, + {UINT64_2PART_C(0xd1b71758, e219652c), -77, -4}, + {UINT64_2PART_C(0x9c400000, 00000000), -50, 4}, + {UINT64_2PART_C(0xe8d4a510, 00000000), -24, 12}, + {UINT64_2PART_C(0xad78ebc5, ac620000), 3, 20}, + {UINT64_2PART_C(0x813f3978, f8940984), 30, 28}, + {UINT64_2PART_C(0xc097ce7b, c90715b3), 56, 36}, + {UINT64_2PART_C(0x8f7e32ce, 7bea5c70), 83, 44}, + {UINT64_2PART_C(0xd5d238a4, abe98068), 109, 52}, + {UINT64_2PART_C(0x9f4f2726, 179a2245), 136, 60}, + {UINT64_2PART_C(0xed63a231, d4c4fb27), 162, 68}, + {UINT64_2PART_C(0xb0de6538, 8cc8ada8), 189, 76}, + {UINT64_2PART_C(0x83c7088e, 1aab65db), 216, 84}, + {UINT64_2PART_C(0xc45d1df9, 42711d9a), 242, 92}, + {UINT64_2PART_C(0x924d692c, a61be758), 269, 100}, + {UINT64_2PART_C(0xda01ee64, 1a708dea), 295, 108}, + {UINT64_2PART_C(0xa26da399, 9aef774a), 322, 116}, + {UINT64_2PART_C(0xf209787b, b47d6b85), 348, 124}, + {UINT64_2PART_C(0xb454e4a1, 79dd1877), 375, 132}, + {UINT64_2PART_C(0x865b8692, 5b9bc5c2), 402, 140}, + {UINT64_2PART_C(0xc83553c5, c8965d3d), 428, 148}, + {UINT64_2PART_C(0x952ab45c, fa97a0b3), 455, 156}, + {UINT64_2PART_C(0xde469fbd, 99a05fe3), 481, 164}, + {UINT64_2PART_C(0xa59bc234, db398c25), 508, 172}, + {UINT64_2PART_C(0xf6c69a72, a3989f5c), 534, 180}, + {UINT64_2PART_C(0xb7dcbf53, 54e9bece), 561, 188}, + {UINT64_2PART_C(0x88fcf317, f22241e2), 588, 196}, + {UINT64_2PART_C(0xcc20ce9b, d35c78a5), 614, 204}, + {UINT64_2PART_C(0x98165af3, 7b2153df), 641, 212}, + {UINT64_2PART_C(0xe2a0b5dc, 971f303a), 667, 220}, + {UINT64_2PART_C(0xa8d9d153, 5ce3b396), 694, 228}, + {UINT64_2PART_C(0xfb9b7cd9, a4a7443c), 720, 236}, + {UINT64_2PART_C(0xbb764c4c, a7a44410), 747, 244}, + {UINT64_2PART_C(0x8bab8eef, b6409c1a), 774, 252}, + {UINT64_2PART_C(0xd01fef10, a657842c), 800, 260}, + {UINT64_2PART_C(0x9b10a4e5, e9913129), 827, 268}, + {UINT64_2PART_C(0xe7109bfb, a19c0c9d), 853, 276}, + {UINT64_2PART_C(0xac2820d9, 623bf429), 880, 284}, + {UINT64_2PART_C(0x80444b5e, 7aa7cf85), 907, 292}, + {UINT64_2PART_C(0xbf21e440, 03acdd2d), 933, 300}, + {UINT64_2PART_C(0x8e679c2f, 5e44ff8f), 960, 308}, + {UINT64_2PART_C(0xd433179d, 9c8cb841), 986, 316}, + {UINT64_2PART_C(0x9e19db92, b4e31ba9), 1013, 324}, + {UINT64_2PART_C(0xeb96bf6e, badf77d9), 1039, 332}, + {UINT64_2PART_C(0xaf87023b, 9bf0ee6b), 1066, 340}, +}; + +static const int kCachedPowersOffset = 348; // -1 * the first decimal_exponent. +static const double kD_1_LOG2_10 = 0.30102999566398114; // 1 / lg(10) +// Difference between the decimal exponents in the table above. +const int PowersOfTenCache::kDecimalExponentDistance = 8; +const int PowersOfTenCache::kMinDecimalExponent = -348; +const int PowersOfTenCache::kMaxDecimalExponent = 340; + +void PowersOfTenCache::GetCachedPowerForBinaryExponentRange( + int min_exponent, + int max_exponent, + DiyFp* power, + int* decimal_exponent) { + int kQ = DiyFp::kSignificandSize; + double k = ceil((min_exponent + kQ - 1) * kD_1_LOG2_10); + int foo = kCachedPowersOffset; + int index = + (foo + static_cast(k) - 1) / kDecimalExponentDistance + 1; + ASSERT(0 <= index && index < static_cast(ARRAY_SIZE(kCachedPowers))); + CachedPower cached_power = kCachedPowers[index]; + ASSERT(min_exponent <= cached_power.binary_exponent); + (void) max_exponent; // Mark variable as used. + ASSERT(cached_power.binary_exponent <= max_exponent); + *decimal_exponent = cached_power.decimal_exponent; + *power = DiyFp(cached_power.significand, cached_power.binary_exponent); +} + + +void PowersOfTenCache::GetCachedPowerForDecimalExponent(int requested_exponent, + DiyFp* power, + int* found_exponent) { + ASSERT(kMinDecimalExponent <= requested_exponent); + ASSERT(requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance); + int index = + (requested_exponent + kCachedPowersOffset) / kDecimalExponentDistance; + CachedPower cached_power = kCachedPowers[index]; + *power = DiyFp(cached_power.significand, cached_power.binary_exponent); + *found_exponent = cached_power.decimal_exponent; + ASSERT(*found_exponent <= requested_exponent); + ASSERT(requested_exponent < *found_exponent + kDecimalExponentDistance); +} + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-cached-powers.h b/deps/icu-small/source/i18n/double-conversion-cached-powers.h new file mode 100644 index 00000000000000..438746b143293f --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-cached-powers.h @@ -0,0 +1,82 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#ifndef DOUBLE_CONVERSION_CACHED_POWERS_H_ +#define DOUBLE_CONVERSION_CACHED_POWERS_H_ + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-diy-fp.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +class PowersOfTenCache { + public: + + // Not all powers of ten are cached. The decimal exponent of two neighboring + // cached numbers will differ by kDecimalExponentDistance. + static const int kDecimalExponentDistance; + + static const int kMinDecimalExponent; + static const int kMaxDecimalExponent; + + // Returns a cached power-of-ten with a binary exponent in the range + // [min_exponent; max_exponent] (boundaries included). + static void GetCachedPowerForBinaryExponentRange(int min_exponent, + int max_exponent, + DiyFp* power, + int* decimal_exponent); + + // Returns a cached power of ten x ~= 10^k such that + // k <= decimal_exponent < k + kCachedPowersDecimalDistance. + // The given decimal_exponent must satisfy + // kMinDecimalExponent <= requested_exponent, and + // requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance. + static void GetCachedPowerForDecimalExponent(int requested_exponent, + DiyFp* power, + int* found_exponent); +}; + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END + +#endif // DOUBLE_CONVERSION_CACHED_POWERS_H_ +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-diy-fp.cpp b/deps/icu-small/source/i18n/double-conversion-diy-fp.cpp new file mode 100644 index 00000000000000..f38430c6c3c90f --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-diy-fp.cpp @@ -0,0 +1,74 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-diy-fp.h" +#include "double-conversion-utils.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +void DiyFp::Multiply(const DiyFp& other) { + // Simply "emulates" a 128 bit multiplication. + // However: the resulting number only contains 64 bits. The least + // significant 64 bits are only used for rounding the most significant 64 + // bits. + const uint64_t kM32 = 0xFFFFFFFFU; + uint64_t a = f_ >> 32; + uint64_t b = f_ & kM32; + uint64_t c = other.f_ >> 32; + uint64_t d = other.f_ & kM32; + uint64_t ac = a * c; + uint64_t bc = b * c; + uint64_t ad = a * d; + uint64_t bd = b * d; + uint64_t tmp = (bd >> 32) + (ad & kM32) + (bc & kM32); + // By adding 1U << 31 to tmp we round the final result. + // Halfway cases will be round up. + tmp += 1U << 31; + uint64_t result_f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32); + e_ += other.e_ + 64; + f_ = result_f; +} + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-diy-fp.h b/deps/icu-small/source/i18n/double-conversion-diy-fp.h new file mode 100644 index 00000000000000..21896851d22d32 --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-diy-fp.h @@ -0,0 +1,136 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#ifndef DOUBLE_CONVERSION_DIY_FP_H_ +#define DOUBLE_CONVERSION_DIY_FP_H_ + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-utils.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +// This "Do It Yourself Floating Point" class implements a floating-point number +// with a uint64 significand and an int exponent. Normalized DiyFp numbers will +// have the most significant bit of the significand set. +// Multiplication and Subtraction do not normalize their results. +// DiyFp are not designed to contain special doubles (NaN and Infinity). +class DiyFp { + public: + static const int kSignificandSize = 64; + + DiyFp() : f_(0), e_(0) {} + DiyFp(uint64_t significand, int exponent) : f_(significand), e_(exponent) {} + + // this = this - other. + // The exponents of both numbers must be the same and the significand of this + // must be bigger than the significand of other. + // The result will not be normalized. + void Subtract(const DiyFp& other) { + ASSERT(e_ == other.e_); + ASSERT(f_ >= other.f_); + f_ -= other.f_; + } + + // Returns a - b. + // The exponents of both numbers must be the same and this must be bigger + // than other. The result will not be normalized. + static DiyFp Minus(const DiyFp& a, const DiyFp& b) { + DiyFp result = a; + result.Subtract(b); + return result; + } + + + // this = this * other. + void Multiply(const DiyFp& other); + + // returns a * b; + static DiyFp Times(const DiyFp& a, const DiyFp& b) { + DiyFp result = a; + result.Multiply(b); + return result; + } + + void Normalize() { + ASSERT(f_ != 0); + uint64_t significand = f_; + int exponent = e_; + + // This method is mainly called for normalizing boundaries. In general + // boundaries need to be shifted by 10 bits. We thus optimize for this case. + const uint64_t k10MSBits = UINT64_2PART_C(0xFFC00000, 00000000); + while ((significand & k10MSBits) == 0) { + significand <<= 10; + exponent -= 10; + } + while ((significand & kUint64MSB) == 0) { + significand <<= 1; + exponent--; + } + f_ = significand; + e_ = exponent; + } + + static DiyFp Normalize(const DiyFp& a) { + DiyFp result = a; + result.Normalize(); + return result; + } + + uint64_t f() const { return f_; } + int e() const { return e_; } + + void set_f(uint64_t new_value) { f_ = new_value; } + void set_e(int new_value) { e_ = new_value; } + + private: + static const uint64_t kUint64MSB = UINT64_2PART_C(0x80000000, 00000000); + + uint64_t f_; + int e_; +}; + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END + +#endif // DOUBLE_CONVERSION_DIY_FP_H_ +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-fast-dtoa.cpp b/deps/icu-small/source/i18n/double-conversion-fast-dtoa.cpp new file mode 100644 index 00000000000000..8d1499a79b4a2e --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-fast-dtoa.cpp @@ -0,0 +1,683 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2012 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-fast-dtoa.h" + +#include "double-conversion-cached-powers.h" +#include "double-conversion-diy-fp.h" +#include "double-conversion-ieee.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +// The minimal and maximal target exponent define the range of w's binary +// exponent, where 'w' is the result of multiplying the input by a cached power +// of ten. +// +// A different range might be chosen on a different platform, to optimize digit +// generation, but a smaller range requires more powers of ten to be cached. +static const int kMinimalTargetExponent = -60; +static const int kMaximalTargetExponent = -32; + + +// Adjusts the last digit of the generated number, and screens out generated +// solutions that may be inaccurate. A solution may be inaccurate if it is +// outside the safe interval, or if we cannot prove that it is closer to the +// input than a neighboring representation of the same length. +// +// Input: * buffer containing the digits of too_high / 10^kappa +// * the buffer's length +// * distance_too_high_w == (too_high - w).f() * unit +// * unsafe_interval == (too_high - too_low).f() * unit +// * rest = (too_high - buffer * 10^kappa).f() * unit +// * ten_kappa = 10^kappa * unit +// * unit = the common multiplier +// Output: returns true if the buffer is guaranteed to contain the closest +// representable number to the input. +// Modifies the generated digits in the buffer to approach (round towards) w. +static bool RoundWeed(Vector buffer, + int length, + uint64_t distance_too_high_w, + uint64_t unsafe_interval, + uint64_t rest, + uint64_t ten_kappa, + uint64_t unit) { + uint64_t small_distance = distance_too_high_w - unit; + uint64_t big_distance = distance_too_high_w + unit; + // Let w_low = too_high - big_distance, and + // w_high = too_high - small_distance. + // Note: w_low < w < w_high + // + // The real w (* unit) must lie somewhere inside the interval + // ]w_low; w_high[ (often written as "(w_low; w_high)") + + // Basically the buffer currently contains a number in the unsafe interval + // ]too_low; too_high[ with too_low < w < too_high + // + // too_high - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + // ^v 1 unit ^ ^ ^ ^ + // boundary_high --------------------- . . . . + // ^v 1 unit . . . . + // - - - - - - - - - - - - - - - - - - - + - - + - - - - - - . . + // . . ^ . . + // . big_distance . . . + // . . . . rest + // small_distance . . . . + // v . . . . + // w_high - - - - - - - - - - - - - - - - - - . . . . + // ^v 1 unit . . . . + // w ---------------------------------------- . . . . + // ^v 1 unit v . . . + // w_low - - - - - - - - - - - - - - - - - - - - - . . . + // . . v + // buffer --------------------------------------------------+-------+-------- + // . . + // safe_interval . + // v . + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - . + // ^v 1 unit . + // boundary_low ------------------------- unsafe_interval + // ^v 1 unit v + // too_low - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + // + // + // Note that the value of buffer could lie anywhere inside the range too_low + // to too_high. + // + // boundary_low, boundary_high and w are approximations of the real boundaries + // and v (the input number). They are guaranteed to be precise up to one unit. + // In fact the error is guaranteed to be strictly less than one unit. + // + // Anything that lies outside the unsafe interval is guaranteed not to round + // to v when read again. + // Anything that lies inside the safe interval is guaranteed to round to v + // when read again. + // If the number inside the buffer lies inside the unsafe interval but not + // inside the safe interval then we simply do not know and bail out (returning + // false). + // + // Similarly we have to take into account the imprecision of 'w' when finding + // the closest representation of 'w'. If we have two potential + // representations, and one is closer to both w_low and w_high, then we know + // it is closer to the actual value v. + // + // By generating the digits of too_high we got the largest (closest to + // too_high) buffer that is still in the unsafe interval. In the case where + // w_high < buffer < too_high we try to decrement the buffer. + // This way the buffer approaches (rounds towards) w. + // There are 3 conditions that stop the decrementation process: + // 1) the buffer is already below w_high + // 2) decrementing the buffer would make it leave the unsafe interval + // 3) decrementing the buffer would yield a number below w_high and farther + // away than the current number. In other words: + // (buffer{-1} < w_high) && w_high - buffer{-1} > buffer - w_high + // Instead of using the buffer directly we use its distance to too_high. + // Conceptually rest ~= too_high - buffer + // We need to do the following tests in this order to avoid over- and + // underflows. + ASSERT(rest <= unsafe_interval); + while (rest < small_distance && // Negated condition 1 + unsafe_interval - rest >= ten_kappa && // Negated condition 2 + (rest + ten_kappa < small_distance || // buffer{-1} > w_high + small_distance - rest >= rest + ten_kappa - small_distance)) { + buffer[length - 1]--; + rest += ten_kappa; + } + + // We have approached w+ as much as possible. We now test if approaching w- + // would require changing the buffer. If yes, then we have two possible + // representations close to w, but we cannot decide which one is closer. + if (rest < big_distance && + unsafe_interval - rest >= ten_kappa && + (rest + ten_kappa < big_distance || + big_distance - rest > rest + ten_kappa - big_distance)) { + return false; + } + + // Weeding test. + // The safe interval is [too_low + 2 ulp; too_high - 2 ulp] + // Since too_low = too_high - unsafe_interval this is equivalent to + // [too_high - unsafe_interval + 4 ulp; too_high - 2 ulp] + // Conceptually we have: rest ~= too_high - buffer + return (2 * unit <= rest) && (rest <= unsafe_interval - 4 * unit); +} + + +// Rounds the buffer upwards if the result is closer to v by possibly adding +// 1 to the buffer. If the precision of the calculation is not sufficient to +// round correctly, return false. +// The rounding might shift the whole buffer in which case the kappa is +// adjusted. For example "99", kappa = 3 might become "10", kappa = 4. +// +// If 2*rest > ten_kappa then the buffer needs to be round up. +// rest can have an error of +/- 1 unit. This function accounts for the +// imprecision and returns false, if the rounding direction cannot be +// unambiguously determined. +// +// Precondition: rest < ten_kappa. +static bool RoundWeedCounted(Vector buffer, + int length, + uint64_t rest, + uint64_t ten_kappa, + uint64_t unit, + int* kappa) { + ASSERT(rest < ten_kappa); + // The following tests are done in a specific order to avoid overflows. They + // will work correctly with any uint64 values of rest < ten_kappa and unit. + // + // If the unit is too big, then we don't know which way to round. For example + // a unit of 50 means that the real number lies within rest +/- 50. If + // 10^kappa == 40 then there is no way to tell which way to round. + if (unit >= ten_kappa) return false; + // Even if unit is just half the size of 10^kappa we are already completely + // lost. (And after the previous test we know that the expression will not + // over/underflow.) + if (ten_kappa - unit <= unit) return false; + // If 2 * (rest + unit) <= 10^kappa we can safely round down. + if ((ten_kappa - rest > rest) && (ten_kappa - 2 * rest >= 2 * unit)) { + return true; + } + // If 2 * (rest - unit) >= 10^kappa, then we can safely round up. + if ((rest > unit) && (ten_kappa - (rest - unit) <= (rest - unit))) { + // Increment the last digit recursively until we find a non '9' digit. + buffer[length - 1]++; + for (int i = length - 1; i > 0; --i) { + if (buffer[i] != '0' + 10) break; + buffer[i] = '0'; + buffer[i - 1]++; + } + // If the first digit is now '0'+ 10 we had a buffer with all '9's. With the + // exception of the first digit all digits are now '0'. Simply switch the + // first digit to '1' and adjust the kappa. Example: "99" becomes "10" and + // the power (the kappa) is increased. + if (buffer[0] == '0' + 10) { + buffer[0] = '1'; + (*kappa) += 1; + } + return true; + } + return false; +} + +// Returns the biggest power of ten that is less than or equal to the given +// number. We furthermore receive the maximum number of bits 'number' has. +// +// Returns power == 10^(exponent_plus_one-1) such that +// power <= number < power * 10. +// If number_bits == 0 then 0^(0-1) is returned. +// The number of bits must be <= 32. +// Precondition: number < (1 << (number_bits + 1)). + +// Inspired by the method for finding an integer log base 10 from here: +// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 +static unsigned int const kSmallPowersOfTen[] = + {0, 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, + 1000000000}; + +static void BiggestPowerTen(uint32_t number, + int number_bits, + uint32_t* power, + int* exponent_plus_one) { + ASSERT(number < (1u << (number_bits + 1))); + // 1233/4096 is approximately 1/lg(10). + int exponent_plus_one_guess = ((number_bits + 1) * 1233 >> 12); + // We increment to skip over the first entry in the kPowersOf10 table. + // Note: kPowersOf10[i] == 10^(i-1). + exponent_plus_one_guess++; + // We don't have any guarantees that 2^number_bits <= number. + if (number < kSmallPowersOfTen[exponent_plus_one_guess]) { + exponent_plus_one_guess--; + } + *power = kSmallPowersOfTen[exponent_plus_one_guess]; + *exponent_plus_one = exponent_plus_one_guess; +} + +// Generates the digits of input number w. +// w is a floating-point number (DiyFp), consisting of a significand and an +// exponent. Its exponent is bounded by kMinimalTargetExponent and +// kMaximalTargetExponent. +// Hence -60 <= w.e() <= -32. +// +// Returns false if it fails, in which case the generated digits in the buffer +// should not be used. +// Preconditions: +// * low, w and high are correct up to 1 ulp (unit in the last place). That +// is, their error must be less than a unit of their last digits. +// * low.e() == w.e() == high.e() +// * low < w < high, and taking into account their error: low~ <= high~ +// * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent +// Postconditions: returns false if procedure fails. +// otherwise: +// * buffer is not null-terminated, but len contains the number of digits. +// * buffer contains the shortest possible decimal digit-sequence +// such that LOW < buffer * 10^kappa < HIGH, where LOW and HIGH are the +// correct values of low and high (without their error). +// * if more than one decimal representation gives the minimal number of +// decimal digits then the one closest to W (where W is the correct value +// of w) is chosen. +// Remark: this procedure takes into account the imprecision of its input +// numbers. If the precision is not enough to guarantee all the postconditions +// then false is returned. This usually happens rarely (~0.5%). +// +// Say, for the sake of example, that +// w.e() == -48, and w.f() == 0x1234567890abcdef +// w's value can be computed by w.f() * 2^w.e() +// We can obtain w's integral digits by simply shifting w.f() by -w.e(). +// -> w's integral part is 0x1234 +// w's fractional part is therefore 0x567890abcdef. +// Printing w's integral part is easy (simply print 0x1234 in decimal). +// In order to print its fraction we repeatedly multiply the fraction by 10 and +// get each digit. Example the first digit after the point would be computed by +// (0x567890abcdef * 10) >> 48. -> 3 +// The whole thing becomes slightly more complicated because we want to stop +// once we have enough digits. That is, once the digits inside the buffer +// represent 'w' we can stop. Everything inside the interval low - high +// represents w. However we have to pay attention to low, high and w's +// imprecision. +static bool DigitGen(DiyFp low, + DiyFp w, + DiyFp high, + Vector buffer, + int* length, + int* kappa) { + ASSERT(low.e() == w.e() && w.e() == high.e()); + ASSERT(low.f() + 1 <= high.f() - 1); + ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent); + // low, w and high are imprecise, but by less than one ulp (unit in the last + // place). + // If we remove (resp. add) 1 ulp from low (resp. high) we are certain that + // the new numbers are outside of the interval we want the final + // representation to lie in. + // Inversely adding (resp. removing) 1 ulp from low (resp. high) would yield + // numbers that are certain to lie in the interval. We will use this fact + // later on. + // We will now start by generating the digits within the uncertain + // interval. Later we will weed out representations that lie outside the safe + // interval and thus _might_ lie outside the correct interval. + uint64_t unit = 1; + DiyFp too_low = DiyFp(low.f() - unit, low.e()); + DiyFp too_high = DiyFp(high.f() + unit, high.e()); + // too_low and too_high are guaranteed to lie outside the interval we want the + // generated number in. + DiyFp unsafe_interval = DiyFp::Minus(too_high, too_low); + // We now cut the input number into two parts: the integral digits and the + // fractionals. We will not write any decimal separator though, but adapt + // kappa instead. + // Reminder: we are currently computing the digits (stored inside the buffer) + // such that: too_low < buffer * 10^kappa < too_high + // We use too_high for the digit_generation and stop as soon as possible. + // If we stop early we effectively round down. + DiyFp one = DiyFp(static_cast(1) << -w.e(), w.e()); + // Division by one is a shift. + uint32_t integrals = static_cast(too_high.f() >> -one.e()); + // Modulo by one is an and. + uint64_t fractionals = too_high.f() & (one.f() - 1); + uint32_t divisor; + int divisor_exponent_plus_one; + BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()), + &divisor, &divisor_exponent_plus_one); + *kappa = divisor_exponent_plus_one; + *length = 0; + // Loop invariant: buffer = too_high / 10^kappa (integer division) + // The invariant holds for the first iteration: kappa has been initialized + // with the divisor exponent + 1. And the divisor is the biggest power of ten + // that is smaller than integrals. + while (*kappa > 0) { + int digit = integrals / divisor; + ASSERT(digit <= 9); + buffer[*length] = static_cast('0' + digit); + (*length)++; + integrals %= divisor; + (*kappa)--; + // Note that kappa now equals the exponent of the divisor and that the + // invariant thus holds again. + uint64_t rest = + (static_cast(integrals) << -one.e()) + fractionals; + // Invariant: too_high = buffer * 10^kappa + DiyFp(rest, one.e()) + // Reminder: unsafe_interval.e() == one.e() + if (rest < unsafe_interval.f()) { + // Rounding down (by not emitting the remaining digits) yields a number + // that lies within the unsafe interval. + return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f(), + unsafe_interval.f(), rest, + static_cast(divisor) << -one.e(), unit); + } + divisor /= 10; + } + + // The integrals have been generated. We are at the point of the decimal + // separator. In the following loop we simply multiply the remaining digits by + // 10 and divide by one. We just need to pay attention to multiply associated + // data (like the interval or 'unit'), too. + // Note that the multiplication by 10 does not overflow, because w.e >= -60 + // and thus one.e >= -60. + ASSERT(one.e() >= -60); + ASSERT(fractionals < one.f()); + ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f()); + for (;;) { + fractionals *= 10; + unit *= 10; + unsafe_interval.set_f(unsafe_interval.f() * 10); + // Integer division by one. + int digit = static_cast(fractionals >> -one.e()); + ASSERT(digit <= 9); + buffer[*length] = static_cast('0' + digit); + (*length)++; + fractionals &= one.f() - 1; // Modulo by one. + (*kappa)--; + if (fractionals < unsafe_interval.f()) { + return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f() * unit, + unsafe_interval.f(), fractionals, one.f(), unit); + } + } +} + + + +// Generates (at most) requested_digits digits of input number w. +// w is a floating-point number (DiyFp), consisting of a significand and an +// exponent. Its exponent is bounded by kMinimalTargetExponent and +// kMaximalTargetExponent. +// Hence -60 <= w.e() <= -32. +// +// Returns false if it fails, in which case the generated digits in the buffer +// should not be used. +// Preconditions: +// * w is correct up to 1 ulp (unit in the last place). That +// is, its error must be strictly less than a unit of its last digit. +// * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent +// +// Postconditions: returns false if procedure fails. +// otherwise: +// * buffer is not null-terminated, but length contains the number of +// digits. +// * the representation in buffer is the most precise representation of +// requested_digits digits. +// * buffer contains at most requested_digits digits of w. If there are less +// than requested_digits digits then some trailing '0's have been removed. +// * kappa is such that +// w = buffer * 10^kappa + eps with |eps| < 10^kappa / 2. +// +// Remark: This procedure takes into account the imprecision of its input +// numbers. If the precision is not enough to guarantee all the postconditions +// then false is returned. This usually happens rarely, but the failure-rate +// increases with higher requested_digits. +static bool DigitGenCounted(DiyFp w, + int requested_digits, + Vector buffer, + int* length, + int* kappa) { + ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent); + ASSERT(kMinimalTargetExponent >= -60); + ASSERT(kMaximalTargetExponent <= -32); + // w is assumed to have an error less than 1 unit. Whenever w is scaled we + // also scale its error. + uint64_t w_error = 1; + // We cut the input number into two parts: the integral digits and the + // fractional digits. We don't emit any decimal separator, but adapt kappa + // instead. Example: instead of writing "1.2" we put "12" into the buffer and + // increase kappa by 1. + DiyFp one = DiyFp(static_cast(1) << -w.e(), w.e()); + // Division by one is a shift. + uint32_t integrals = static_cast(w.f() >> -one.e()); + // Modulo by one is an and. + uint64_t fractionals = w.f() & (one.f() - 1); + uint32_t divisor; + int divisor_exponent_plus_one; + BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()), + &divisor, &divisor_exponent_plus_one); + *kappa = divisor_exponent_plus_one; + *length = 0; + + // Loop invariant: buffer = w / 10^kappa (integer division) + // The invariant holds for the first iteration: kappa has been initialized + // with the divisor exponent + 1. And the divisor is the biggest power of ten + // that is smaller than 'integrals'. + while (*kappa > 0) { + int digit = integrals / divisor; + ASSERT(digit <= 9); + buffer[*length] = static_cast('0' + digit); + (*length)++; + requested_digits--; + integrals %= divisor; + (*kappa)--; + // Note that kappa now equals the exponent of the divisor and that the + // invariant thus holds again. + if (requested_digits == 0) break; + divisor /= 10; + } + + if (requested_digits == 0) { + uint64_t rest = + (static_cast(integrals) << -one.e()) + fractionals; + return RoundWeedCounted(buffer, *length, rest, + static_cast(divisor) << -one.e(), w_error, + kappa); + } + + // The integrals have been generated. We are at the point of the decimal + // separator. In the following loop we simply multiply the remaining digits by + // 10 and divide by one. We just need to pay attention to multiply associated + // data (the 'unit'), too. + // Note that the multiplication by 10 does not overflow, because w.e >= -60 + // and thus one.e >= -60. + ASSERT(one.e() >= -60); + ASSERT(fractionals < one.f()); + ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f()); + while (requested_digits > 0 && fractionals > w_error) { + fractionals *= 10; + w_error *= 10; + // Integer division by one. + int digit = static_cast(fractionals >> -one.e()); + ASSERT(digit <= 9); + buffer[*length] = static_cast('0' + digit); + (*length)++; + requested_digits--; + fractionals &= one.f() - 1; // Modulo by one. + (*kappa)--; + } + if (requested_digits != 0) return false; + return RoundWeedCounted(buffer, *length, fractionals, one.f(), w_error, + kappa); +} + + +// Provides a decimal representation of v. +// Returns true if it succeeds, otherwise the result cannot be trusted. +// There will be *length digits inside the buffer (not null-terminated). +// If the function returns true then +// v == (double) (buffer * 10^decimal_exponent). +// The digits in the buffer are the shortest representation possible: no +// 0.09999999999999999 instead of 0.1. The shorter representation will even be +// chosen even if the longer one would be closer to v. +// The last digit will be closest to the actual v. That is, even if several +// digits might correctly yield 'v' when read again, the closest will be +// computed. +static bool Grisu3(double v, + FastDtoaMode mode, + Vector buffer, + int* length, + int* decimal_exponent) { + DiyFp w = Double(v).AsNormalizedDiyFp(); + // boundary_minus and boundary_plus are the boundaries between v and its + // closest floating-point neighbors. Any number strictly between + // boundary_minus and boundary_plus will round to v when convert to a double. + // Grisu3 will never output representations that lie exactly on a boundary. + DiyFp boundary_minus, boundary_plus; + if (mode == FAST_DTOA_SHORTEST) { + Double(v).NormalizedBoundaries(&boundary_minus, &boundary_plus); + } else { + ASSERT(mode == FAST_DTOA_SHORTEST_SINGLE); + float single_v = static_cast(v); + Single(single_v).NormalizedBoundaries(&boundary_minus, &boundary_plus); + } + ASSERT(boundary_plus.e() == w.e()); + DiyFp ten_mk; // Cached power of ten: 10^-k + int mk; // -k + int ten_mk_minimal_binary_exponent = + kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize); + int ten_mk_maximal_binary_exponent = + kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize); + PowersOfTenCache::GetCachedPowerForBinaryExponentRange( + ten_mk_minimal_binary_exponent, + ten_mk_maximal_binary_exponent, + &ten_mk, &mk); + ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() + + DiyFp::kSignificandSize) && + (kMaximalTargetExponent >= w.e() + ten_mk.e() + + DiyFp::kSignificandSize)); + // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a + // 64 bit significand and ten_mk is thus only precise up to 64 bits. + + // The DiyFp::Times procedure rounds its result, and ten_mk is approximated + // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now + // off by a small amount. + // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w. + // In other words: let f = scaled_w.f() and e = scaled_w.e(), then + // (f-1) * 2^e < w*10^k < (f+1) * 2^e + DiyFp scaled_w = DiyFp::Times(w, ten_mk); + ASSERT(scaled_w.e() == + boundary_plus.e() + ten_mk.e() + DiyFp::kSignificandSize); + // In theory it would be possible to avoid some recomputations by computing + // the difference between w and boundary_minus/plus (a power of 2) and to + // compute scaled_boundary_minus/plus by subtracting/adding from + // scaled_w. However the code becomes much less readable and the speed + // enhancements are not terriffic. + DiyFp scaled_boundary_minus = DiyFp::Times(boundary_minus, ten_mk); + DiyFp scaled_boundary_plus = DiyFp::Times(boundary_plus, ten_mk); + + // DigitGen will generate the digits of scaled_w. Therefore we have + // v == (double) (scaled_w * 10^-mk). + // Set decimal_exponent == -mk and pass it to DigitGen. If scaled_w is not an + // integer than it will be updated. For instance if scaled_w == 1.23 then + // the buffer will be filled with "123" und the decimal_exponent will be + // decreased by 2. + int kappa; + bool result = DigitGen(scaled_boundary_minus, scaled_w, scaled_boundary_plus, + buffer, length, &kappa); + *decimal_exponent = -mk + kappa; + return result; +} + + +// The "counted" version of grisu3 (see above) only generates requested_digits +// number of digits. This version does not generate the shortest representation, +// and with enough requested digits 0.1 will at some point print as 0.9999999... +// Grisu3 is too imprecise for real halfway cases (1.5 will not work) and +// therefore the rounding strategy for halfway cases is irrelevant. +static bool Grisu3Counted(double v, + int requested_digits, + Vector buffer, + int* length, + int* decimal_exponent) { + DiyFp w = Double(v).AsNormalizedDiyFp(); + DiyFp ten_mk; // Cached power of ten: 10^-k + int mk; // -k + int ten_mk_minimal_binary_exponent = + kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize); + int ten_mk_maximal_binary_exponent = + kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize); + PowersOfTenCache::GetCachedPowerForBinaryExponentRange( + ten_mk_minimal_binary_exponent, + ten_mk_maximal_binary_exponent, + &ten_mk, &mk); + ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() + + DiyFp::kSignificandSize) && + (kMaximalTargetExponent >= w.e() + ten_mk.e() + + DiyFp::kSignificandSize)); + // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a + // 64 bit significand and ten_mk is thus only precise up to 64 bits. + + // The DiyFp::Times procedure rounds its result, and ten_mk is approximated + // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now + // off by a small amount. + // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w. + // In other words: let f = scaled_w.f() and e = scaled_w.e(), then + // (f-1) * 2^e < w*10^k < (f+1) * 2^e + DiyFp scaled_w = DiyFp::Times(w, ten_mk); + + // We now have (double) (scaled_w * 10^-mk). + // DigitGen will generate the first requested_digits digits of scaled_w and + // return together with a kappa such that scaled_w ~= buffer * 10^kappa. (It + // will not always be exactly the same since DigitGenCounted only produces a + // limited number of digits.) + int kappa; + bool result = DigitGenCounted(scaled_w, requested_digits, + buffer, length, &kappa); + *decimal_exponent = -mk + kappa; + return result; +} + + +bool FastDtoa(double v, + FastDtoaMode mode, + int requested_digits, + Vector buffer, + int* length, + int* decimal_point) { + ASSERT(v > 0); + ASSERT(!Double(v).IsSpecial()); + + bool result = false; + int decimal_exponent = 0; + switch (mode) { + case FAST_DTOA_SHORTEST: + case FAST_DTOA_SHORTEST_SINGLE: + result = Grisu3(v, mode, buffer, length, &decimal_exponent); + break; + case FAST_DTOA_PRECISION: + result = Grisu3Counted(v, requested_digits, + buffer, length, &decimal_exponent); + break; + default: + UNREACHABLE(); + } + if (result) { + *decimal_point = *length + decimal_exponent; + buffer[*length] = '\0'; + } + return result; +} + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-fast-dtoa.h b/deps/icu-small/source/i18n/double-conversion-fast-dtoa.h new file mode 100644 index 00000000000000..58a6470052c63c --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-fast-dtoa.h @@ -0,0 +1,106 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#ifndef DOUBLE_CONVERSION_FAST_DTOA_H_ +#define DOUBLE_CONVERSION_FAST_DTOA_H_ + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-utils.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +enum FastDtoaMode { + // Computes the shortest representation of the given input. The returned + // result will be the most accurate number of this length. Longer + // representations might be more accurate. + FAST_DTOA_SHORTEST, + // Same as FAST_DTOA_SHORTEST but for single-precision floats. + FAST_DTOA_SHORTEST_SINGLE, + // Computes a representation where the precision (number of digits) is + // given as input. The precision is independent of the decimal point. + FAST_DTOA_PRECISION +}; + +// FastDtoa will produce at most kFastDtoaMaximalLength digits. This does not +// include the terminating '\0' character. +static const int kFastDtoaMaximalLength = 17; +// Same for single-precision numbers. +static const int kFastDtoaMaximalSingleLength = 9; + +// Provides a decimal representation of v. +// The result should be interpreted as buffer * 10^(point - length). +// +// Precondition: +// * v must be a strictly positive finite double. +// +// Returns true if it succeeds, otherwise the result can not be trusted. +// There will be *length digits inside the buffer followed by a null terminator. +// If the function returns true and mode equals +// - FAST_DTOA_SHORTEST, then +// the parameter requested_digits is ignored. +// The result satisfies +// v == (double) (buffer * 10^(point - length)). +// The digits in the buffer are the shortest representation possible. E.g. +// if 0.099999999999 and 0.1 represent the same double then "1" is returned +// with point = 0. +// The last digit will be closest to the actual v. That is, even if several +// digits might correctly yield 'v' when read again, the buffer will contain +// the one closest to v. +// - FAST_DTOA_PRECISION, then +// the buffer contains requested_digits digits. +// the difference v - (buffer * 10^(point-length)) is closest to zero for +// all possible representations of requested_digits digits. +// If there are two values that are equally close, then FastDtoa returns +// false. +// For both modes the buffer must be large enough to hold the result. +bool FastDtoa(double d, + FastDtoaMode mode, + int requested_digits, + Vector buffer, + int* length, + int* decimal_point); + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END + +#endif // DOUBLE_CONVERSION_FAST_DTOA_H_ +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-ieee.h b/deps/icu-small/source/i18n/double-conversion-ieee.h new file mode 100644 index 00000000000000..952bcea27f6615 --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-ieee.h @@ -0,0 +1,420 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2012 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#ifndef DOUBLE_CONVERSION_DOUBLE_H_ +#define DOUBLE_CONVERSION_DOUBLE_H_ + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-diy-fp.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +// We assume that doubles and uint64_t have the same endianness. +static uint64_t double_to_uint64(double d) { return BitCast(d); } +static double uint64_to_double(uint64_t d64) { return BitCast(d64); } +static uint32_t float_to_uint32(float f) { return BitCast(f); } +static float uint32_to_float(uint32_t d32) { return BitCast(d32); } + +// Helper functions for doubles. +class Double { + public: + static const uint64_t kSignMask = UINT64_2PART_C(0x80000000, 00000000); + static const uint64_t kExponentMask = UINT64_2PART_C(0x7FF00000, 00000000); + static const uint64_t kSignificandMask = UINT64_2PART_C(0x000FFFFF, FFFFFFFF); + static const uint64_t kHiddenBit = UINT64_2PART_C(0x00100000, 00000000); + static const int kPhysicalSignificandSize = 52; // Excludes the hidden bit. + static const int kSignificandSize = 53; + + Double() : d64_(0) {} + explicit Double(double d) : d64_(double_to_uint64(d)) {} + explicit Double(uint64_t d64) : d64_(d64) {} + explicit Double(DiyFp diy_fp) + : d64_(DiyFpToUint64(diy_fp)) {} + + // The value encoded by this Double must be greater or equal to +0.0. + // It must not be special (infinity, or NaN). + DiyFp AsDiyFp() const { + ASSERT(Sign() > 0); + ASSERT(!IsSpecial()); + return DiyFp(Significand(), Exponent()); + } + + // The value encoded by this Double must be strictly greater than 0. + DiyFp AsNormalizedDiyFp() const { + ASSERT(value() > 0.0); + uint64_t f = Significand(); + int e = Exponent(); + + // The current double could be a denormal. + while ((f & kHiddenBit) == 0) { + f <<= 1; + e--; + } + // Do the final shifts in one go. + f <<= DiyFp::kSignificandSize - kSignificandSize; + e -= DiyFp::kSignificandSize - kSignificandSize; + return DiyFp(f, e); + } + + // Returns the double's bit as uint64. + uint64_t AsUint64() const { + return d64_; + } + + // Returns the next greater double. Returns +infinity on input +infinity. + double NextDouble() const { + if (d64_ == kInfinity) return Double(kInfinity).value(); + if (Sign() < 0 && Significand() == 0) { + // -0.0 + return 0.0; + } + if (Sign() < 0) { + return Double(d64_ - 1).value(); + } else { + return Double(d64_ + 1).value(); + } + } + + double PreviousDouble() const { + if (d64_ == (kInfinity | kSignMask)) return -Infinity(); + if (Sign() < 0) { + return Double(d64_ + 1).value(); + } else { + if (Significand() == 0) return -0.0; + return Double(d64_ - 1).value(); + } + } + + int Exponent() const { + if (IsDenormal()) return kDenormalExponent; + + uint64_t d64 = AsUint64(); + int biased_e = + static_cast((d64 & kExponentMask) >> kPhysicalSignificandSize); + return biased_e - kExponentBias; + } + + uint64_t Significand() const { + uint64_t d64 = AsUint64(); + uint64_t significand = d64 & kSignificandMask; + if (!IsDenormal()) { + return significand + kHiddenBit; + } else { + return significand; + } + } + + // Returns true if the double is a denormal. + bool IsDenormal() const { + uint64_t d64 = AsUint64(); + return (d64 & kExponentMask) == 0; + } + + // We consider denormals not to be special. + // Hence only Infinity and NaN are special. + bool IsSpecial() const { + uint64_t d64 = AsUint64(); + return (d64 & kExponentMask) == kExponentMask; + } + + bool IsNan() const { + uint64_t d64 = AsUint64(); + return ((d64 & kExponentMask) == kExponentMask) && + ((d64 & kSignificandMask) != 0); + } + + bool IsInfinite() const { + uint64_t d64 = AsUint64(); + return ((d64 & kExponentMask) == kExponentMask) && + ((d64 & kSignificandMask) == 0); + } + + int Sign() const { + uint64_t d64 = AsUint64(); + return (d64 & kSignMask) == 0? 1: -1; + } + + // Precondition: the value encoded by this Double must be greater or equal + // than +0.0. + DiyFp UpperBoundary() const { + ASSERT(Sign() > 0); + return DiyFp(Significand() * 2 + 1, Exponent() - 1); + } + + // Computes the two boundaries of this. + // The bigger boundary (m_plus) is normalized. The lower boundary has the same + // exponent as m_plus. + // Precondition: the value encoded by this Double must be greater than 0. + void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const { + ASSERT(value() > 0.0); + DiyFp v = this->AsDiyFp(); + DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1)); + DiyFp m_minus; + if (LowerBoundaryIsCloser()) { + m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2); + } else { + m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1); + } + m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e())); + m_minus.set_e(m_plus.e()); + *out_m_plus = m_plus; + *out_m_minus = m_minus; + } + + bool LowerBoundaryIsCloser() const { + // The boundary is closer if the significand is of the form f == 2^p-1 then + // the lower boundary is closer. + // Think of v = 1000e10 and v- = 9999e9. + // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but + // at a distance of 1e8. + // The only exception is for the smallest normal: the largest denormal is + // at the same distance as its successor. + // Note: denormals have the same exponent as the smallest normals. + bool physical_significand_is_zero = ((AsUint64() & kSignificandMask) == 0); + return physical_significand_is_zero && (Exponent() != kDenormalExponent); + } + + double value() const { return uint64_to_double(d64_); } + + // Returns the significand size for a given order of magnitude. + // If v = f*2^e with 2^p-1 <= f <= 2^p then p+e is v's order of magnitude. + // This function returns the number of significant binary digits v will have + // once it's encoded into a double. In almost all cases this is equal to + // kSignificandSize. The only exceptions are denormals. They start with + // leading zeroes and their effective significand-size is hence smaller. + static int SignificandSizeForOrderOfMagnitude(int order) { + if (order >= (kDenormalExponent + kSignificandSize)) { + return kSignificandSize; + } + if (order <= kDenormalExponent) return 0; + return order - kDenormalExponent; + } + + static double Infinity() { + return Double(kInfinity).value(); + } + + static double NaN() { + return Double(kNaN).value(); + } + + private: + static const int kExponentBias = 0x3FF + kPhysicalSignificandSize; + static const int kDenormalExponent = -kExponentBias + 1; + static const int kMaxExponent = 0x7FF - kExponentBias; + static const uint64_t kInfinity = UINT64_2PART_C(0x7FF00000, 00000000); + static const uint64_t kNaN = UINT64_2PART_C(0x7FF80000, 00000000); + + const uint64_t d64_; + + static uint64_t DiyFpToUint64(DiyFp diy_fp) { + uint64_t significand = diy_fp.f(); + int exponent = diy_fp.e(); + while (significand > kHiddenBit + kSignificandMask) { + significand >>= 1; + exponent++; + } + if (exponent >= kMaxExponent) { + return kInfinity; + } + if (exponent < kDenormalExponent) { + return 0; + } + while (exponent > kDenormalExponent && (significand & kHiddenBit) == 0) { + significand <<= 1; + exponent--; + } + uint64_t biased_exponent; + if (exponent == kDenormalExponent && (significand & kHiddenBit) == 0) { + biased_exponent = 0; + } else { + biased_exponent = static_cast(exponent + kExponentBias); + } + return (significand & kSignificandMask) | + (biased_exponent << kPhysicalSignificandSize); + } + + DISALLOW_COPY_AND_ASSIGN(Double); +}; + +class Single { + public: + static const uint32_t kSignMask = 0x80000000; + static const uint32_t kExponentMask = 0x7F800000; + static const uint32_t kSignificandMask = 0x007FFFFF; + static const uint32_t kHiddenBit = 0x00800000; + static const int kPhysicalSignificandSize = 23; // Excludes the hidden bit. + static const int kSignificandSize = 24; + + Single() : d32_(0) {} + explicit Single(float f) : d32_(float_to_uint32(f)) {} + explicit Single(uint32_t d32) : d32_(d32) {} + + // The value encoded by this Single must be greater or equal to +0.0. + // It must not be special (infinity, or NaN). + DiyFp AsDiyFp() const { + ASSERT(Sign() > 0); + ASSERT(!IsSpecial()); + return DiyFp(Significand(), Exponent()); + } + + // Returns the single's bit as uint64. + uint32_t AsUint32() const { + return d32_; + } + + int Exponent() const { + if (IsDenormal()) return kDenormalExponent; + + uint32_t d32 = AsUint32(); + int biased_e = + static_cast((d32 & kExponentMask) >> kPhysicalSignificandSize); + return biased_e - kExponentBias; + } + + uint32_t Significand() const { + uint32_t d32 = AsUint32(); + uint32_t significand = d32 & kSignificandMask; + if (!IsDenormal()) { + return significand + kHiddenBit; + } else { + return significand; + } + } + + // Returns true if the single is a denormal. + bool IsDenormal() const { + uint32_t d32 = AsUint32(); + return (d32 & kExponentMask) == 0; + } + + // We consider denormals not to be special. + // Hence only Infinity and NaN are special. + bool IsSpecial() const { + uint32_t d32 = AsUint32(); + return (d32 & kExponentMask) == kExponentMask; + } + + bool IsNan() const { + uint32_t d32 = AsUint32(); + return ((d32 & kExponentMask) == kExponentMask) && + ((d32 & kSignificandMask) != 0); + } + + bool IsInfinite() const { + uint32_t d32 = AsUint32(); + return ((d32 & kExponentMask) == kExponentMask) && + ((d32 & kSignificandMask) == 0); + } + + int Sign() const { + uint32_t d32 = AsUint32(); + return (d32 & kSignMask) == 0? 1: -1; + } + + // Computes the two boundaries of this. + // The bigger boundary (m_plus) is normalized. The lower boundary has the same + // exponent as m_plus. + // Precondition: the value encoded by this Single must be greater than 0. + void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const { + ASSERT(value() > 0.0); + DiyFp v = this->AsDiyFp(); + DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1)); + DiyFp m_minus; + if (LowerBoundaryIsCloser()) { + m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2); + } else { + m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1); + } + m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e())); + m_minus.set_e(m_plus.e()); + *out_m_plus = m_plus; + *out_m_minus = m_minus; + } + + // Precondition: the value encoded by this Single must be greater or equal + // than +0.0. + DiyFp UpperBoundary() const { + ASSERT(Sign() > 0); + return DiyFp(Significand() * 2 + 1, Exponent() - 1); + } + + bool LowerBoundaryIsCloser() const { + // The boundary is closer if the significand is of the form f == 2^p-1 then + // the lower boundary is closer. + // Think of v = 1000e10 and v- = 9999e9. + // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but + // at a distance of 1e8. + // The only exception is for the smallest normal: the largest denormal is + // at the same distance as its successor. + // Note: denormals have the same exponent as the smallest normals. + bool physical_significand_is_zero = ((AsUint32() & kSignificandMask) == 0); + return physical_significand_is_zero && (Exponent() != kDenormalExponent); + } + + float value() const { return uint32_to_float(d32_); } + + static float Infinity() { + return Single(kInfinity).value(); + } + + static float NaN() { + return Single(kNaN).value(); + } + + private: + static const int kExponentBias = 0x7F + kPhysicalSignificandSize; + static const int kDenormalExponent = -kExponentBias + 1; + static const int kMaxExponent = 0xFF - kExponentBias; + static const uint32_t kInfinity = 0x7F800000; + static const uint32_t kNaN = 0x7FC00000; + + const uint32_t d32_; + + DISALLOW_COPY_AND_ASSIGN(Single); +}; + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END + +#endif // DOUBLE_CONVERSION_DOUBLE_H_ +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-utils.h b/deps/icu-small/source/i18n/double-conversion-utils.h new file mode 100644 index 00000000000000..02795b4bc565ae --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-utils.h @@ -0,0 +1,358 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#ifndef DOUBLE_CONVERSION_UTILS_H_ +#define DOUBLE_CONVERSION_UTILS_H_ + +#include +#include + +// ICU PATCH: Use U_ASSERT instead of +#include "uassert.h" +#define ASSERT U_ASSERT + +#ifndef UNIMPLEMENTED +#define UNIMPLEMENTED() (abort()) +#endif +#ifndef DOUBLE_CONVERSION_NO_RETURN +#ifdef _MSC_VER +#define DOUBLE_CONVERSION_NO_RETURN __declspec(noreturn) +#else +#define DOUBLE_CONVERSION_NO_RETURN __attribute__((noreturn)) +#endif +#endif +#ifndef UNREACHABLE +#ifdef _MSC_VER +void DOUBLE_CONVERSION_NO_RETURN abort_noreturn(); +inline void abort_noreturn() { abort(); } +#define UNREACHABLE() (abort_noreturn()) +#else +#define UNREACHABLE() (abort()) +#endif +#endif + + +// Double operations detection based on target architecture. +// Linux uses a 80bit wide floating point stack on x86. This induces double +// rounding, which in turn leads to wrong results. +// An easy way to test if the floating-point operations are correct is to +// evaluate: 89255.0/1e22. If the floating-point stack is 64 bits wide then +// the result is equal to 89255e-22. +// The best way to test this, is to create a division-function and to compare +// the output of the division with the expected result. (Inlining must be +// disabled.) +// On Linux,x86 89255e-22 != Div_double(89255.0/1e22) +// ICU PATCH: Enable ARM builds for Windows with 'defined(_M_ARM)'. +#if defined(_M_X64) || defined(__x86_64__) || \ + defined(__ARMEL__) || defined(__avr32__) || defined(_M_ARM) || \ + defined(__hppa__) || defined(__ia64__) || \ + defined(__mips__) || \ + defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \ + defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ + defined(__sparc__) || defined(__sparc) || defined(__s390__) || \ + defined(__SH4__) || defined(__alpha__) || \ + defined(_MIPS_ARCH_MIPS32R2) || \ + defined(__AARCH64EL__) || defined(__aarch64__) || \ + defined(__riscv) +#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1 +#elif defined(__mc68000__) +#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS +#elif defined(_M_IX86) || defined(__i386__) || defined(__i386) +#if defined(_WIN32) +// Windows uses a 64bit wide floating point stack. +#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1 +#else +#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS +#endif // _WIN32 +#else +#error Target architecture was not detected as supported by Double-Conversion. +#endif + +#if defined(__GNUC__) +#define DOUBLE_CONVERSION_UNUSED __attribute__((unused)) +#else +#define DOUBLE_CONVERSION_UNUSED +#endif + +#if defined(_WIN32) && !defined(__MINGW32__) + +typedef signed char int8_t; +typedef unsigned char uint8_t; +typedef short int16_t; // NOLINT +typedef unsigned short uint16_t; // NOLINT +typedef int int32_t; +typedef unsigned int uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +// intptr_t and friends are defined in crtdefs.h through stdio.h. + +#else + +#include + +#endif + +typedef uint16_t uc16; + +// The following macro works on both 32 and 64-bit platforms. +// Usage: instead of writing 0x1234567890123456 +// write UINT64_2PART_C(0x12345678,90123456); +#define UINT64_2PART_C(a, b) (((static_cast(a) << 32) + 0x##b##u)) + + +// The expression ARRAY_SIZE(a) is a compile-time constant of type +// size_t which represents the number of elements of the given +// array. You should only use ARRAY_SIZE on statically allocated +// arrays. +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(a) \ + ((sizeof(a) / sizeof(*(a))) / \ + static_cast(!(sizeof(a) % sizeof(*(a))))) +#endif + +// A macro to disallow the evil copy constructor and operator= functions +// This should be used in the private: declarations for a class +#ifndef DISALLOW_COPY_AND_ASSIGN +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) +#endif + +// A macro to disallow all the implicit constructors, namely the +// default constructor, copy constructor and operator= functions. +// +// This should be used in the private: declarations for a class +// that wants to prevent anyone from instantiating it. This is +// especially useful for classes containing only static methods. +#ifndef DISALLOW_IMPLICIT_CONSTRUCTORS +#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ + TypeName(); \ + DISALLOW_COPY_AND_ASSIGN(TypeName) +#endif + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +static const int kCharSize = sizeof(char); + +// Returns the maximum of the two parameters. +template +static T Max(T a, T b) { + return a < b ? b : a; +} + + +// Returns the minimum of the two parameters. +template +static T Min(T a, T b) { + return a < b ? a : b; +} + + +inline int StrLength(const char* string) { + size_t length = strlen(string); + ASSERT(length == static_cast(static_cast(length))); + return static_cast(length); +} + +// This is a simplified version of V8's Vector class. +template +class Vector { + public: + Vector() : start_(NULL), length_(0) {} + Vector(T* data, int len) : start_(data), length_(len) { + ASSERT(len == 0 || (len > 0 && data != NULL)); + } + + // Returns a vector using the same backing storage as this one, + // spanning from and including 'from', to but not including 'to'. + Vector SubVector(int from, int to) { + ASSERT(to <= length_); + ASSERT(from < to); + ASSERT(0 <= from); + return Vector(start() + from, to - from); + } + + // Returns the length of the vector. + int length() const { return length_; } + + // Returns whether or not the vector is empty. + bool is_empty() const { return length_ == 0; } + + // Returns the pointer to the start of the data in the vector. + T* start() const { return start_; } + + // Access individual vector elements - checks bounds in debug mode. + T& operator[](int index) const { + ASSERT(0 <= index && index < length_); + return start_[index]; + } + + T& first() { return start_[0]; } + + T& last() { return start_[length_ - 1]; } + + private: + T* start_; + int length_; +}; + + +// Helper class for building result strings in a character buffer. The +// purpose of the class is to use safe operations that checks the +// buffer bounds on all operations in debug mode. +class StringBuilder { + public: + StringBuilder(char* buffer, int buffer_size) + : buffer_(buffer, buffer_size), position_(0) { } + + ~StringBuilder() { if (!is_finalized()) Finalize(); } + + int size() const { return buffer_.length(); } + + // Get the current position in the builder. + int position() const { + ASSERT(!is_finalized()); + return position_; + } + + // Reset the position. + void Reset() { position_ = 0; } + + // Add a single character to the builder. It is not allowed to add + // 0-characters; use the Finalize() method to terminate the string + // instead. + void AddCharacter(char c) { + ASSERT(c != '\0'); + ASSERT(!is_finalized() && position_ < buffer_.length()); + buffer_[position_++] = c; + } + + // Add an entire string to the builder. Uses strlen() internally to + // compute the length of the input string. + void AddString(const char* s) { + AddSubstring(s, StrLength(s)); + } + + // Add the first 'n' characters of the given string 's' to the + // builder. The input string must have enough characters. + void AddSubstring(const char* s, int n) { + ASSERT(!is_finalized() && position_ + n < buffer_.length()); + ASSERT(static_cast(n) <= strlen(s)); + memmove(&buffer_[position_], s, n * kCharSize); + position_ += n; + } + + + // Add character padding to the builder. If count is non-positive, + // nothing is added to the builder. + void AddPadding(char c, int count) { + for (int i = 0; i < count; i++) { + AddCharacter(c); + } + } + + // Finalize the string by 0-terminating it and returning the buffer. + char* Finalize() { + ASSERT(!is_finalized() && position_ < buffer_.length()); + buffer_[position_] = '\0'; + // Make sure nobody managed to add a 0-character to the + // buffer while building the string. + ASSERT(strlen(buffer_.start()) == static_cast(position_)); + position_ = -1; + ASSERT(is_finalized()); + return buffer_.start(); + } + + private: + Vector buffer_; + int position_; + + bool is_finalized() const { return position_ < 0; } + + DISALLOW_IMPLICIT_CONSTRUCTORS(StringBuilder); +}; + +// The type-based aliasing rule allows the compiler to assume that pointers of +// different types (for some definition of different) never alias each other. +// Thus the following code does not work: +// +// float f = foo(); +// int fbits = *(int*)(&f); +// +// The compiler 'knows' that the int pointer can't refer to f since the types +// don't match, so the compiler may cache f in a register, leaving random data +// in fbits. Using C++ style casts makes no difference, however a pointer to +// char data is assumed to alias any other pointer. This is the 'memcpy +// exception'. +// +// Bit_cast uses the memcpy exception to move the bits from a variable of one +// type of a variable of another type. Of course the end result is likely to +// be implementation dependent. Most compilers (gcc-4.2 and MSVC 2005) +// will completely optimize BitCast away. +// +// There is an additional use for BitCast. +// Recent gccs will warn when they see casts that may result in breakage due to +// the type-based aliasing rule. If you have checked that there is no breakage +// you can use BitCast to cast one pointer type to another. This confuses gcc +// enough that it can no longer see that you have cast one pointer type to +// another thus avoiding the warning. +template +inline Dest BitCast(const Source& source) { + // Compile time assertion: sizeof(Dest) == sizeof(Source) + // A compile error here means your Dest and Source have different sizes. + DOUBLE_CONVERSION_UNUSED + typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1]; + + Dest dest; + memmove(&dest, &source, sizeof(dest)); + return dest; +} + +template +inline Dest BitCast(Source* source) { + return BitCast(reinterpret_cast(source)); +} + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END + +#endif // DOUBLE_CONVERSION_UTILS_H_ +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion.cpp b/deps/icu-small/source/i18n/double-conversion.cpp new file mode 100644 index 00000000000000..8629284aa0e0f5 --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion.cpp @@ -0,0 +1,1005 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#include +#include + +// ICU PATCH: Customize header file paths for ICU. +// The files fixed-dtoa.h and strtod.h are not needed. + +#include "double-conversion.h" + +#include "double-conversion-bignum-dtoa.h" +#include "double-conversion-fast-dtoa.h" +#include "double-conversion-ieee.h" +#include "double-conversion-utils.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +#if 0 // not needed for ICU +const DoubleToStringConverter& DoubleToStringConverter::EcmaScriptConverter() { + int flags = UNIQUE_ZERO | EMIT_POSITIVE_EXPONENT_SIGN; + static DoubleToStringConverter converter(flags, + "Infinity", + "NaN", + 'e', + -6, 21, + 6, 0); + return converter; +} + + +bool DoubleToStringConverter::HandleSpecialValues( + double value, + StringBuilder* result_builder) const { + Double double_inspect(value); + if (double_inspect.IsInfinite()) { + if (infinity_symbol_ == NULL) return false; + if (value < 0) { + result_builder->AddCharacter('-'); + } + result_builder->AddString(infinity_symbol_); + return true; + } + if (double_inspect.IsNan()) { + if (nan_symbol_ == NULL) return false; + result_builder->AddString(nan_symbol_); + return true; + } + return false; +} + + +void DoubleToStringConverter::CreateExponentialRepresentation( + const char* decimal_digits, + int length, + int exponent, + StringBuilder* result_builder) const { + ASSERT(length != 0); + result_builder->AddCharacter(decimal_digits[0]); + if (length != 1) { + result_builder->AddCharacter('.'); + result_builder->AddSubstring(&decimal_digits[1], length-1); + } + result_builder->AddCharacter(exponent_character_); + if (exponent < 0) { + result_builder->AddCharacter('-'); + exponent = -exponent; + } else { + if ((flags_ & EMIT_POSITIVE_EXPONENT_SIGN) != 0) { + result_builder->AddCharacter('+'); + } + } + if (exponent == 0) { + result_builder->AddCharacter('0'); + return; + } + ASSERT(exponent < 1e4); + const int kMaxExponentLength = 5; + char buffer[kMaxExponentLength + 1]; + buffer[kMaxExponentLength] = '\0'; + int first_char_pos = kMaxExponentLength; + while (exponent > 0) { + buffer[--first_char_pos] = '0' + (exponent % 10); + exponent /= 10; + } + result_builder->AddSubstring(&buffer[first_char_pos], + kMaxExponentLength - first_char_pos); +} + + +void DoubleToStringConverter::CreateDecimalRepresentation( + const char* decimal_digits, + int length, + int decimal_point, + int digits_after_point, + StringBuilder* result_builder) const { + // Create a representation that is padded with zeros if needed. + if (decimal_point <= 0) { + // "0.00000decimal_rep" or "0.000decimal_rep00". + result_builder->AddCharacter('0'); + if (digits_after_point > 0) { + result_builder->AddCharacter('.'); + result_builder->AddPadding('0', -decimal_point); + ASSERT(length <= digits_after_point - (-decimal_point)); + result_builder->AddSubstring(decimal_digits, length); + int remaining_digits = digits_after_point - (-decimal_point) - length; + result_builder->AddPadding('0', remaining_digits); + } + } else if (decimal_point >= length) { + // "decimal_rep0000.00000" or "decimal_rep.0000". + result_builder->AddSubstring(decimal_digits, length); + result_builder->AddPadding('0', decimal_point - length); + if (digits_after_point > 0) { + result_builder->AddCharacter('.'); + result_builder->AddPadding('0', digits_after_point); + } + } else { + // "decima.l_rep000". + ASSERT(digits_after_point > 0); + result_builder->AddSubstring(decimal_digits, decimal_point); + result_builder->AddCharacter('.'); + ASSERT(length - decimal_point <= digits_after_point); + result_builder->AddSubstring(&decimal_digits[decimal_point], + length - decimal_point); + int remaining_digits = digits_after_point - (length - decimal_point); + result_builder->AddPadding('0', remaining_digits); + } + if (digits_after_point == 0) { + if ((flags_ & EMIT_TRAILING_DECIMAL_POINT) != 0) { + result_builder->AddCharacter('.'); + } + if ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) { + result_builder->AddCharacter('0'); + } + } +} + + +bool DoubleToStringConverter::ToShortestIeeeNumber( + double value, + StringBuilder* result_builder, + DoubleToStringConverter::DtoaMode mode) const { + ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE); + if (Double(value).IsSpecial()) { + return HandleSpecialValues(value, result_builder); + } + + int decimal_point; + bool sign; + const int kDecimalRepCapacity = kBase10MaximalLength + 1; + char decimal_rep[kDecimalRepCapacity]; + int decimal_rep_length; + + DoubleToAscii(value, mode, 0, decimal_rep, kDecimalRepCapacity, + &sign, &decimal_rep_length, &decimal_point); + + bool unique_zero = (flags_ & UNIQUE_ZERO) != 0; + if (sign && (value != 0.0 || !unique_zero)) { + result_builder->AddCharacter('-'); + } + + int exponent = decimal_point - 1; + if ((decimal_in_shortest_low_ <= exponent) && + (exponent < decimal_in_shortest_high_)) { + CreateDecimalRepresentation(decimal_rep, decimal_rep_length, + decimal_point, + Max(0, decimal_rep_length - decimal_point), + result_builder); + } else { + CreateExponentialRepresentation(decimal_rep, decimal_rep_length, exponent, + result_builder); + } + return true; +} + + +bool DoubleToStringConverter::ToFixed(double value, + int requested_digits, + StringBuilder* result_builder) const { + ASSERT(kMaxFixedDigitsBeforePoint == 60); + const double kFirstNonFixed = 1e60; + + if (Double(value).IsSpecial()) { + return HandleSpecialValues(value, result_builder); + } + + if (requested_digits > kMaxFixedDigitsAfterPoint) return false; + if (value >= kFirstNonFixed || value <= -kFirstNonFixed) return false; + + // Find a sufficiently precise decimal representation of n. + int decimal_point; + bool sign; + // Add space for the '\0' byte. + const int kDecimalRepCapacity = + kMaxFixedDigitsBeforePoint + kMaxFixedDigitsAfterPoint + 1; + char decimal_rep[kDecimalRepCapacity]; + int decimal_rep_length; + DoubleToAscii(value, FIXED, requested_digits, + decimal_rep, kDecimalRepCapacity, + &sign, &decimal_rep_length, &decimal_point); + + bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); + if (sign && (value != 0.0 || !unique_zero)) { + result_builder->AddCharacter('-'); + } + + CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point, + requested_digits, result_builder); + return true; +} + + +bool DoubleToStringConverter::ToExponential( + double value, + int requested_digits, + StringBuilder* result_builder) const { + if (Double(value).IsSpecial()) { + return HandleSpecialValues(value, result_builder); + } + + if (requested_digits < -1) return false; + if (requested_digits > kMaxExponentialDigits) return false; + + int decimal_point; + bool sign; + // Add space for digit before the decimal point and the '\0' character. + const int kDecimalRepCapacity = kMaxExponentialDigits + 2; + ASSERT(kDecimalRepCapacity > kBase10MaximalLength); + char decimal_rep[kDecimalRepCapacity]; + int decimal_rep_length; + + if (requested_digits == -1) { + DoubleToAscii(value, SHORTEST, 0, + decimal_rep, kDecimalRepCapacity, + &sign, &decimal_rep_length, &decimal_point); + } else { + DoubleToAscii(value, PRECISION, requested_digits + 1, + decimal_rep, kDecimalRepCapacity, + &sign, &decimal_rep_length, &decimal_point); + ASSERT(decimal_rep_length <= requested_digits + 1); + + for (int i = decimal_rep_length; i < requested_digits + 1; ++i) { + decimal_rep[i] = '0'; + } + decimal_rep_length = requested_digits + 1; + } + + bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); + if (sign && (value != 0.0 || !unique_zero)) { + result_builder->AddCharacter('-'); + } + + int exponent = decimal_point - 1; + CreateExponentialRepresentation(decimal_rep, + decimal_rep_length, + exponent, + result_builder); + return true; +} + + +bool DoubleToStringConverter::ToPrecision(double value, + int precision, + StringBuilder* result_builder) const { + if (Double(value).IsSpecial()) { + return HandleSpecialValues(value, result_builder); + } + + if (precision < kMinPrecisionDigits || precision > kMaxPrecisionDigits) { + return false; + } + + // Find a sufficiently precise decimal representation of n. + int decimal_point; + bool sign; + // Add one for the terminating null character. + const int kDecimalRepCapacity = kMaxPrecisionDigits + 1; + char decimal_rep[kDecimalRepCapacity]; + int decimal_rep_length; + + DoubleToAscii(value, PRECISION, precision, + decimal_rep, kDecimalRepCapacity, + &sign, &decimal_rep_length, &decimal_point); + ASSERT(decimal_rep_length <= precision); + + bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); + if (sign && (value != 0.0 || !unique_zero)) { + result_builder->AddCharacter('-'); + } + + // The exponent if we print the number as x.xxeyyy. That is with the + // decimal point after the first digit. + int exponent = decimal_point - 1; + + int extra_zero = ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) ? 1 : 0; + if ((-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) || + (decimal_point - precision + extra_zero > + max_trailing_padding_zeroes_in_precision_mode_)) { + // Fill buffer to contain 'precision' digits. + // Usually the buffer is already at the correct length, but 'DoubleToAscii' + // is allowed to return less characters. + for (int i = decimal_rep_length; i < precision; ++i) { + decimal_rep[i] = '0'; + } + + CreateExponentialRepresentation(decimal_rep, + precision, + exponent, + result_builder); + } else { + CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point, + Max(0, precision - decimal_point), + result_builder); + } + return true; +} +#endif // not needed for ICU + + +static BignumDtoaMode DtoaToBignumDtoaMode( + DoubleToStringConverter::DtoaMode dtoa_mode) { + switch (dtoa_mode) { + case DoubleToStringConverter::SHORTEST: return BIGNUM_DTOA_SHORTEST; + case DoubleToStringConverter::SHORTEST_SINGLE: + return BIGNUM_DTOA_SHORTEST_SINGLE; + case DoubleToStringConverter::FIXED: return BIGNUM_DTOA_FIXED; + case DoubleToStringConverter::PRECISION: return BIGNUM_DTOA_PRECISION; + default: + UNREACHABLE(); + } +} + + +void DoubleToStringConverter::DoubleToAscii(double v, + DtoaMode mode, + int requested_digits, + char* buffer, + int buffer_length, + bool* sign, + int* length, + int* point) { + Vector vector(buffer, buffer_length); + ASSERT(!Double(v).IsSpecial()); + ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE || requested_digits >= 0); + + if (Double(v).Sign() < 0) { + *sign = true; + v = -v; + } else { + *sign = false; + } + + if (mode == PRECISION && requested_digits == 0) { + vector[0] = '\0'; + *length = 0; + return; + } + + if (v == 0) { + vector[0] = '0'; + vector[1] = '\0'; + *length = 1; + *point = 1; + return; + } + + bool fast_worked; + switch (mode) { + case SHORTEST: + fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST, 0, vector, length, point); + break; +#if 0 // not needed for ICU + case SHORTEST_SINGLE: + fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST_SINGLE, 0, + vector, length, point); + break; + case FIXED: + fast_worked = FastFixedDtoa(v, requested_digits, vector, length, point); + break; + case PRECISION: + fast_worked = FastDtoa(v, FAST_DTOA_PRECISION, requested_digits, + vector, length, point); + break; +#endif // not needed for ICU + default: + fast_worked = false; + UNREACHABLE(); + } + if (fast_worked) return; + + // If the fast dtoa didn't succeed use the slower bignum version. + BignumDtoaMode bignum_mode = DtoaToBignumDtoaMode(mode); + BignumDtoa(v, bignum_mode, requested_digits, vector, length, point); + vector[*length] = '\0'; +} + + +#if 0 // not needed for ICU +// Consumes the given substring from the iterator. +// Returns false, if the substring does not match. +template +static bool ConsumeSubString(Iterator* current, + Iterator end, + const char* substring) { + ASSERT(**current == *substring); + for (substring++; *substring != '\0'; substring++) { + ++*current; + if (*current == end || **current != *substring) return false; + } + ++*current; + return true; +} + + +// Maximum number of significant digits in decimal representation. +// The longest possible double in decimal representation is +// (2^53 - 1) * 2 ^ -1074 that is (2 ^ 53 - 1) * 5 ^ 1074 / 10 ^ 1074 +// (768 digits). If we parse a number whose first digits are equal to a +// mean of 2 adjacent doubles (that could have up to 769 digits) the result +// must be rounded to the bigger one unless the tail consists of zeros, so +// we don't need to preserve all the digits. +const int kMaxSignificantDigits = 772; + + +static const char kWhitespaceTable7[] = { 32, 13, 10, 9, 11, 12 }; +static const int kWhitespaceTable7Length = ARRAY_SIZE(kWhitespaceTable7); + + +static const uc16 kWhitespaceTable16[] = { + 160, 8232, 8233, 5760, 6158, 8192, 8193, 8194, 8195, + 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8239, 8287, 12288, 65279 +}; +static const int kWhitespaceTable16Length = ARRAY_SIZE(kWhitespaceTable16); + + +static bool isWhitespace(int x) { + if (x < 128) { + for (int i = 0; i < kWhitespaceTable7Length; i++) { + if (kWhitespaceTable7[i] == x) return true; + } + } else { + for (int i = 0; i < kWhitespaceTable16Length; i++) { + if (kWhitespaceTable16[i] == x) return true; + } + } + return false; +} + + +// Returns true if a nonspace found and false if the end has reached. +template +static inline bool AdvanceToNonspace(Iterator* current, Iterator end) { + while (*current != end) { + if (!isWhitespace(**current)) return true; + ++*current; + } + return false; +} + + +static bool isDigit(int x, int radix) { + return (x >= '0' && x <= '9' && x < '0' + radix) + || (radix > 10 && x >= 'a' && x < 'a' + radix - 10) + || (radix > 10 && x >= 'A' && x < 'A' + radix - 10); +} + + +static double SignedZero(bool sign) { + return sign ? -0.0 : 0.0; +} + + +// Returns true if 'c' is a decimal digit that is valid for the given radix. +// +// The function is small and could be inlined, but VS2012 emitted a warning +// because it constant-propagated the radix and concluded that the last +// condition was always true. By moving it into a separate function the +// compiler wouldn't warn anymore. +#if _MSC_VER +#pragma optimize("",off) +static bool IsDecimalDigitForRadix(int c, int radix) { + return '0' <= c && c <= '9' && (c - '0') < radix; +} +#pragma optimize("",on) +#else +static bool inline IsDecimalDigitForRadix(int c, int radix) { + return '0' <= c && c <= '9' && (c - '0') < radix; +} +#endif +// Returns true if 'c' is a character digit that is valid for the given radix. +// The 'a_character' should be 'a' or 'A'. +// +// The function is small and could be inlined, but VS2012 emitted a warning +// because it constant-propagated the radix and concluded that the first +// condition was always false. By moving it into a separate function the +// compiler wouldn't warn anymore. +static bool IsCharacterDigitForRadix(int c, int radix, char a_character) { + return radix > 10 && c >= a_character && c < a_character + radix - 10; +} + + +// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end. +template +static double RadixStringToIeee(Iterator* current, + Iterator end, + bool sign, + bool allow_trailing_junk, + double junk_string_value, + bool read_as_double, + bool* result_is_junk) { + ASSERT(*current != end); + + const int kDoubleSize = Double::kSignificandSize; + const int kSingleSize = Single::kSignificandSize; + const int kSignificandSize = read_as_double? kDoubleSize: kSingleSize; + + *result_is_junk = true; + + // Skip leading 0s. + while (**current == '0') { + ++(*current); + if (*current == end) { + *result_is_junk = false; + return SignedZero(sign); + } + } + + int64_t number = 0; + int exponent = 0; + const int radix = (1 << radix_log_2); + + do { + int digit; + if (IsDecimalDigitForRadix(**current, radix)) { + digit = static_cast(**current) - '0'; + } else if (IsCharacterDigitForRadix(**current, radix, 'a')) { + digit = static_cast(**current) - 'a' + 10; + } else if (IsCharacterDigitForRadix(**current, radix, 'A')) { + digit = static_cast(**current) - 'A' + 10; + } else { + if (allow_trailing_junk || !AdvanceToNonspace(current, end)) { + break; + } else { + return junk_string_value; + } + } + + number = number * radix + digit; + int overflow = static_cast(number >> kSignificandSize); + if (overflow != 0) { + // Overflow occurred. Need to determine which direction to round the + // result. + int overflow_bits_count = 1; + while (overflow > 1) { + overflow_bits_count++; + overflow >>= 1; + } + + int dropped_bits_mask = ((1 << overflow_bits_count) - 1); + int dropped_bits = static_cast(number) & dropped_bits_mask; + number >>= overflow_bits_count; + exponent = overflow_bits_count; + + bool zero_tail = true; + for (;;) { + ++(*current); + if (*current == end || !isDigit(**current, radix)) break; + zero_tail = zero_tail && **current == '0'; + exponent += radix_log_2; + } + + if (!allow_trailing_junk && AdvanceToNonspace(current, end)) { + return junk_string_value; + } + + int middle_value = (1 << (overflow_bits_count - 1)); + if (dropped_bits > middle_value) { + number++; // Rounding up. + } else if (dropped_bits == middle_value) { + // Rounding to even to consistency with decimals: half-way case rounds + // up if significant part is odd and down otherwise. + if ((number & 1) != 0 || !zero_tail) { + number++; // Rounding up. + } + } + + // Rounding up may cause overflow. + if ((number & ((int64_t)1 << kSignificandSize)) != 0) { + exponent++; + number >>= 1; + } + break; + } + ++(*current); + } while (*current != end); + + ASSERT(number < ((int64_t)1 << kSignificandSize)); + ASSERT(static_cast(static_cast(number)) == number); + + *result_is_junk = false; + + if (exponent == 0) { + if (sign) { + if (number == 0) return -0.0; + number = -number; + } + return static_cast(number); + } + + ASSERT(number != 0); + return Double(DiyFp(number, exponent)).value(); +} + + +template +double StringToDoubleConverter::StringToIeee( + Iterator input, + int length, + bool read_as_double, + int* processed_characters_count) const { + Iterator current = input; + Iterator end = input + length; + + *processed_characters_count = 0; + + const bool allow_trailing_junk = (flags_ & ALLOW_TRAILING_JUNK) != 0; + const bool allow_leading_spaces = (flags_ & ALLOW_LEADING_SPACES) != 0; + const bool allow_trailing_spaces = (flags_ & ALLOW_TRAILING_SPACES) != 0; + const bool allow_spaces_after_sign = (flags_ & ALLOW_SPACES_AFTER_SIGN) != 0; + + // To make sure that iterator dereferencing is valid the following + // convention is used: + // 1. Each '++current' statement is followed by check for equality to 'end'. + // 2. If AdvanceToNonspace returned false then current == end. + // 3. If 'current' becomes equal to 'end' the function returns or goes to + // 'parsing_done'. + // 4. 'current' is not dereferenced after the 'parsing_done' label. + // 5. Code before 'parsing_done' may rely on 'current != end'. + if (current == end) return empty_string_value_; + + if (allow_leading_spaces || allow_trailing_spaces) { + if (!AdvanceToNonspace(¤t, end)) { + *processed_characters_count = static_cast(current - input); + return empty_string_value_; + } + if (!allow_leading_spaces && (input != current)) { + // No leading spaces allowed, but AdvanceToNonspace moved forward. + return junk_string_value_; + } + } + + // The longest form of simplified number is: "-.1eXXX\0". + const int kBufferSize = kMaxSignificantDigits + 10; + char buffer[kBufferSize]; // NOLINT: size is known at compile time. + int buffer_pos = 0; + + // Exponent will be adjusted if insignificant digits of the integer part + // or insignificant leading zeros of the fractional part are dropped. + int exponent = 0; + int significant_digits = 0; + int insignificant_digits = 0; + bool nonzero_digit_dropped = false; + + bool sign = false; + + if (*current == '+' || *current == '-') { + sign = (*current == '-'); + ++current; + Iterator next_non_space = current; + // Skip following spaces (if allowed). + if (!AdvanceToNonspace(&next_non_space, end)) return junk_string_value_; + if (!allow_spaces_after_sign && (current != next_non_space)) { + return junk_string_value_; + } + current = next_non_space; + } + + if (infinity_symbol_ != NULL) { + if (*current == infinity_symbol_[0]) { + if (!ConsumeSubString(¤t, end, infinity_symbol_)) { + return junk_string_value_; + } + + if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { + return junk_string_value_; + } + if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { + return junk_string_value_; + } + + ASSERT(buffer_pos == 0); + *processed_characters_count = static_cast(current - input); + return sign ? -Double::Infinity() : Double::Infinity(); + } + } + + if (nan_symbol_ != NULL) { + if (*current == nan_symbol_[0]) { + if (!ConsumeSubString(¤t, end, nan_symbol_)) { + return junk_string_value_; + } + + if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { + return junk_string_value_; + } + if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { + return junk_string_value_; + } + + ASSERT(buffer_pos == 0); + *processed_characters_count = static_cast(current - input); + return sign ? -Double::NaN() : Double::NaN(); + } + } + + bool leading_zero = false; + if (*current == '0') { + ++current; + if (current == end) { + *processed_characters_count = static_cast(current - input); + return SignedZero(sign); + } + + leading_zero = true; + + // It could be hexadecimal value. + if ((flags_ & ALLOW_HEX) && (*current == 'x' || *current == 'X')) { + ++current; + if (current == end || !isDigit(*current, 16)) { + return junk_string_value_; // "0x". + } + + bool result_is_junk; + double result = RadixStringToIeee<4>(¤t, + end, + sign, + allow_trailing_junk, + junk_string_value_, + read_as_double, + &result_is_junk); + if (!result_is_junk) { + if (allow_trailing_spaces) AdvanceToNonspace(¤t, end); + *processed_characters_count = static_cast(current - input); + } + return result; + } + + // Ignore leading zeros in the integer part. + while (*current == '0') { + ++current; + if (current == end) { + *processed_characters_count = static_cast(current - input); + return SignedZero(sign); + } + } + } + + bool octal = leading_zero && (flags_ & ALLOW_OCTALS) != 0; + + // Copy significant digits of the integer part (if any) to the buffer. + while (*current >= '0' && *current <= '9') { + if (significant_digits < kMaxSignificantDigits) { + ASSERT(buffer_pos < kBufferSize); + buffer[buffer_pos++] = static_cast(*current); + significant_digits++; + // Will later check if it's an octal in the buffer. + } else { + insignificant_digits++; // Move the digit into the exponential part. + nonzero_digit_dropped = nonzero_digit_dropped || *current != '0'; + } + octal = octal && *current < '8'; + ++current; + if (current == end) goto parsing_done; + } + + if (significant_digits == 0) { + octal = false; + } + + if (*current == '.') { + if (octal && !allow_trailing_junk) return junk_string_value_; + if (octal) goto parsing_done; + + ++current; + if (current == end) { + if (significant_digits == 0 && !leading_zero) { + return junk_string_value_; + } else { + goto parsing_done; + } + } + + if (significant_digits == 0) { + // octal = false; + // Integer part consists of 0 or is absent. Significant digits start after + // leading zeros (if any). + while (*current == '0') { + ++current; + if (current == end) { + *processed_characters_count = static_cast(current - input); + return SignedZero(sign); + } + exponent--; // Move this 0 into the exponent. + } + } + + // There is a fractional part. + // We don't emit a '.', but adjust the exponent instead. + while (*current >= '0' && *current <= '9') { + if (significant_digits < kMaxSignificantDigits) { + ASSERT(buffer_pos < kBufferSize); + buffer[buffer_pos++] = static_cast(*current); + significant_digits++; + exponent--; + } else { + // Ignore insignificant digits in the fractional part. + nonzero_digit_dropped = nonzero_digit_dropped || *current != '0'; + } + ++current; + if (current == end) goto parsing_done; + } + } + + if (!leading_zero && exponent == 0 && significant_digits == 0) { + // If leading_zeros is true then the string contains zeros. + // If exponent < 0 then string was [+-]\.0*... + // If significant_digits != 0 the string is not equal to 0. + // Otherwise there are no digits in the string. + return junk_string_value_; + } + + // Parse exponential part. + if (*current == 'e' || *current == 'E') { + if (octal && !allow_trailing_junk) return junk_string_value_; + if (octal) goto parsing_done; + ++current; + if (current == end) { + if (allow_trailing_junk) { + goto parsing_done; + } else { + return junk_string_value_; + } + } + char exponen_sign = '+'; + if (*current == '+' || *current == '-') { + exponen_sign = static_cast(*current); + ++current; + if (current == end) { + if (allow_trailing_junk) { + goto parsing_done; + } else { + return junk_string_value_; + } + } + } + + if (current == end || *current < '0' || *current > '9') { + if (allow_trailing_junk) { + goto parsing_done; + } else { + return junk_string_value_; + } + } + + const int max_exponent = INT_MAX / 2; + ASSERT(-max_exponent / 2 <= exponent && exponent <= max_exponent / 2); + int num = 0; + do { + // Check overflow. + int digit = *current - '0'; + if (num >= max_exponent / 10 + && !(num == max_exponent / 10 && digit <= max_exponent % 10)) { + num = max_exponent; + } else { + num = num * 10 + digit; + } + ++current; + } while (current != end && *current >= '0' && *current <= '9'); + + exponent += (exponen_sign == '-' ? -num : num); + } + + if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { + return junk_string_value_; + } + if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { + return junk_string_value_; + } + if (allow_trailing_spaces) { + AdvanceToNonspace(¤t, end); + } + + parsing_done: + exponent += insignificant_digits; + + if (octal) { + double result; + bool result_is_junk; + char* start = buffer; + result = RadixStringToIeee<3>(&start, + buffer + buffer_pos, + sign, + allow_trailing_junk, + junk_string_value_, + read_as_double, + &result_is_junk); + ASSERT(!result_is_junk); + *processed_characters_count = static_cast(current - input); + return result; + } + + if (nonzero_digit_dropped) { + buffer[buffer_pos++] = '1'; + exponent--; + } + + ASSERT(buffer_pos < kBufferSize); + buffer[buffer_pos] = '\0'; + + double converted; + if (read_as_double) { + converted = Strtod(Vector(buffer, buffer_pos), exponent); + } else { + converted = Strtof(Vector(buffer, buffer_pos), exponent); + } + *processed_characters_count = static_cast(current - input); + return sign? -converted: converted; +} + + +double StringToDoubleConverter::StringToDouble( + const char* buffer, + int length, + int* processed_characters_count) const { + return StringToIeee(buffer, length, true, processed_characters_count); +} + + +double StringToDoubleConverter::StringToDouble( + const uc16* buffer, + int length, + int* processed_characters_count) const { + return StringToIeee(buffer, length, true, processed_characters_count); +} + + +float StringToDoubleConverter::StringToFloat( + const char* buffer, + int length, + int* processed_characters_count) const { + return static_cast(StringToIeee(buffer, length, false, + processed_characters_count)); +} + + +float StringToDoubleConverter::StringToFloat( + const uc16* buffer, + int length, + int* processed_characters_count) const { + return static_cast(StringToIeee(buffer, length, false, + processed_characters_count)); +} +#endif // not needed for ICU + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion.h b/deps/icu-small/source/i18n/double-conversion.h new file mode 100644 index 00000000000000..0939412734a6bb --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion.h @@ -0,0 +1,566 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2012 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#ifndef DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ +#define DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-utils.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +class DoubleToStringConverter { + public: +#if 0 // not needed for ICU + // When calling ToFixed with a double > 10^kMaxFixedDigitsBeforePoint + // or a requested_digits parameter > kMaxFixedDigitsAfterPoint then the + // function returns false. + static const int kMaxFixedDigitsBeforePoint = 60; + static const int kMaxFixedDigitsAfterPoint = 60; + + // When calling ToExponential with a requested_digits + // parameter > kMaxExponentialDigits then the function returns false. + static const int kMaxExponentialDigits = 120; + + // When calling ToPrecision with a requested_digits + // parameter < kMinPrecisionDigits or requested_digits > kMaxPrecisionDigits + // then the function returns false. + static const int kMinPrecisionDigits = 1; + static const int kMaxPrecisionDigits = 120; + + enum Flags { + NO_FLAGS = 0, + EMIT_POSITIVE_EXPONENT_SIGN = 1, + EMIT_TRAILING_DECIMAL_POINT = 2, + EMIT_TRAILING_ZERO_AFTER_POINT = 4, + UNIQUE_ZERO = 8 + }; + + // Flags should be a bit-or combination of the possible Flags-enum. + // - NO_FLAGS: no special flags. + // - EMIT_POSITIVE_EXPONENT_SIGN: when the number is converted into exponent + // form, emits a '+' for positive exponents. Example: 1.2e+2. + // - EMIT_TRAILING_DECIMAL_POINT: when the input number is an integer and is + // converted into decimal format then a trailing decimal point is appended. + // Example: 2345.0 is converted to "2345.". + // - EMIT_TRAILING_ZERO_AFTER_POINT: in addition to a trailing decimal point + // emits a trailing '0'-character. This flag requires the + // EXMIT_TRAILING_DECIMAL_POINT flag. + // Example: 2345.0 is converted to "2345.0". + // - UNIQUE_ZERO: "-0.0" is converted to "0.0". + // + // Infinity symbol and nan_symbol provide the string representation for these + // special values. If the string is NULL and the special value is encountered + // then the conversion functions return false. + // + // The exponent_character is used in exponential representations. It is + // usually 'e' or 'E'. + // + // When converting to the shortest representation the converter will + // represent input numbers in decimal format if they are in the interval + // [10^decimal_in_shortest_low; 10^decimal_in_shortest_high[ + // (lower boundary included, greater boundary excluded). + // Example: with decimal_in_shortest_low = -6 and + // decimal_in_shortest_high = 21: + // ToShortest(0.000001) -> "0.000001" + // ToShortest(0.0000001) -> "1e-7" + // ToShortest(111111111111111111111.0) -> "111111111111111110000" + // ToShortest(100000000000000000000.0) -> "100000000000000000000" + // ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21" + // + // When converting to precision mode the converter may add + // max_leading_padding_zeroes before returning the number in exponential + // format. + // Example with max_leading_padding_zeroes_in_precision_mode = 6. + // ToPrecision(0.0000012345, 2) -> "0.0000012" + // ToPrecision(0.00000012345, 2) -> "1.2e-7" + // Similarily the converter may add up to + // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid + // returning an exponential representation. A zero added by the + // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit. + // Examples for max_trailing_padding_zeroes_in_precision_mode = 1: + // ToPrecision(230.0, 2) -> "230" + // ToPrecision(230.0, 2) -> "230." with EMIT_TRAILING_DECIMAL_POINT. + // ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT. + DoubleToStringConverter(int flags, + const char* infinity_symbol, + const char* nan_symbol, + char exponent_character, + int decimal_in_shortest_low, + int decimal_in_shortest_high, + int max_leading_padding_zeroes_in_precision_mode, + int max_trailing_padding_zeroes_in_precision_mode) + : flags_(flags), + infinity_symbol_(infinity_symbol), + nan_symbol_(nan_symbol), + exponent_character_(exponent_character), + decimal_in_shortest_low_(decimal_in_shortest_low), + decimal_in_shortest_high_(decimal_in_shortest_high), + max_leading_padding_zeroes_in_precision_mode_( + max_leading_padding_zeroes_in_precision_mode), + max_trailing_padding_zeroes_in_precision_mode_( + max_trailing_padding_zeroes_in_precision_mode) { + // When 'trailing zero after the point' is set, then 'trailing point' + // must be set too. + ASSERT(((flags & EMIT_TRAILING_DECIMAL_POINT) != 0) || + !((flags & EMIT_TRAILING_ZERO_AFTER_POINT) != 0)); + } + + // Returns a converter following the EcmaScript specification. + static const DoubleToStringConverter& EcmaScriptConverter(); + + // Computes the shortest string of digits that correctly represent the input + // number. Depending on decimal_in_shortest_low and decimal_in_shortest_high + // (see constructor) it then either returns a decimal representation, or an + // exponential representation. + // Example with decimal_in_shortest_low = -6, + // decimal_in_shortest_high = 21, + // EMIT_POSITIVE_EXPONENT_SIGN activated, and + // EMIT_TRAILING_DECIMAL_POINT deactived: + // ToShortest(0.000001) -> "0.000001" + // ToShortest(0.0000001) -> "1e-7" + // ToShortest(111111111111111111111.0) -> "111111111111111110000" + // ToShortest(100000000000000000000.0) -> "100000000000000000000" + // ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21" + // + // Note: the conversion may round the output if the returned string + // is accurate enough to uniquely identify the input-number. + // For example the most precise representation of the double 9e59 equals + // "899999999999999918767229449717619953810131273674690656206848", but + // the converter will return the shorter (but still correct) "9e59". + // + // Returns true if the conversion succeeds. The conversion always succeeds + // except when the input value is special and no infinity_symbol or + // nan_symbol has been given to the constructor. + bool ToShortest(double value, StringBuilder* result_builder) const { + return ToShortestIeeeNumber(value, result_builder, SHORTEST); + } + + // Same as ToShortest, but for single-precision floats. + bool ToShortestSingle(float value, StringBuilder* result_builder) const { + return ToShortestIeeeNumber(value, result_builder, SHORTEST_SINGLE); + } + + + // Computes a decimal representation with a fixed number of digits after the + // decimal point. The last emitted digit is rounded. + // + // Examples: + // ToFixed(3.12, 1) -> "3.1" + // ToFixed(3.1415, 3) -> "3.142" + // ToFixed(1234.56789, 4) -> "1234.5679" + // ToFixed(1.23, 5) -> "1.23000" + // ToFixed(0.1, 4) -> "0.1000" + // ToFixed(1e30, 2) -> "1000000000000000019884624838656.00" + // ToFixed(0.1, 30) -> "0.100000000000000005551115123126" + // ToFixed(0.1, 17) -> "0.10000000000000001" + // + // If requested_digits equals 0, then the tail of the result depends on + // the EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT. + // Examples, for requested_digits == 0, + // let EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT be + // - false and false: then 123.45 -> 123 + // 0.678 -> 1 + // - true and false: then 123.45 -> 123. + // 0.678 -> 1. + // - true and true: then 123.45 -> 123.0 + // 0.678 -> 1.0 + // + // Returns true if the conversion succeeds. The conversion always succeeds + // except for the following cases: + // - the input value is special and no infinity_symbol or nan_symbol has + // been provided to the constructor, + // - 'value' > 10^kMaxFixedDigitsBeforePoint, or + // - 'requested_digits' > kMaxFixedDigitsAfterPoint. + // The last two conditions imply that the result will never contain more than + // 1 + kMaxFixedDigitsBeforePoint + 1 + kMaxFixedDigitsAfterPoint characters + // (one additional character for the sign, and one for the decimal point). + bool ToFixed(double value, + int requested_digits, + StringBuilder* result_builder) const; + + // Computes a representation in exponential format with requested_digits + // after the decimal point. The last emitted digit is rounded. + // If requested_digits equals -1, then the shortest exponential representation + // is computed. + // + // Examples with EMIT_POSITIVE_EXPONENT_SIGN deactivated, and + // exponent_character set to 'e'. + // ToExponential(3.12, 1) -> "3.1e0" + // ToExponential(5.0, 3) -> "5.000e0" + // ToExponential(0.001, 2) -> "1.00e-3" + // ToExponential(3.1415, -1) -> "3.1415e0" + // ToExponential(3.1415, 4) -> "3.1415e0" + // ToExponential(3.1415, 3) -> "3.142e0" + // ToExponential(123456789000000, 3) -> "1.235e14" + // ToExponential(1000000000000000019884624838656.0, -1) -> "1e30" + // ToExponential(1000000000000000019884624838656.0, 32) -> + // "1.00000000000000001988462483865600e30" + // ToExponential(1234, 0) -> "1e3" + // + // Returns true if the conversion succeeds. The conversion always succeeds + // except for the following cases: + // - the input value is special and no infinity_symbol or nan_symbol has + // been provided to the constructor, + // - 'requested_digits' > kMaxExponentialDigits. + // The last condition implies that the result will never contain more than + // kMaxExponentialDigits + 8 characters (the sign, the digit before the + // decimal point, the decimal point, the exponent character, the + // exponent's sign, and at most 3 exponent digits). + bool ToExponential(double value, + int requested_digits, + StringBuilder* result_builder) const; + + // Computes 'precision' leading digits of the given 'value' and returns them + // either in exponential or decimal format, depending on + // max_{leading|trailing}_padding_zeroes_in_precision_mode (given to the + // constructor). + // The last computed digit is rounded. + // + // Example with max_leading_padding_zeroes_in_precision_mode = 6. + // ToPrecision(0.0000012345, 2) -> "0.0000012" + // ToPrecision(0.00000012345, 2) -> "1.2e-7" + // Similarily the converter may add up to + // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid + // returning an exponential representation. A zero added by the + // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit. + // Examples for max_trailing_padding_zeroes_in_precision_mode = 1: + // ToPrecision(230.0, 2) -> "230" + // ToPrecision(230.0, 2) -> "230." with EMIT_TRAILING_DECIMAL_POINT. + // ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT. + // Examples for max_trailing_padding_zeroes_in_precision_mode = 3, and no + // EMIT_TRAILING_ZERO_AFTER_POINT: + // ToPrecision(123450.0, 6) -> "123450" + // ToPrecision(123450.0, 5) -> "123450" + // ToPrecision(123450.0, 4) -> "123500" + // ToPrecision(123450.0, 3) -> "123000" + // ToPrecision(123450.0, 2) -> "1.2e5" + // + // Returns true if the conversion succeeds. The conversion always succeeds + // except for the following cases: + // - the input value is special and no infinity_symbol or nan_symbol has + // been provided to the constructor, + // - precision < kMinPericisionDigits + // - precision > kMaxPrecisionDigits + // The last condition implies that the result will never contain more than + // kMaxPrecisionDigits + 7 characters (the sign, the decimal point, the + // exponent character, the exponent's sign, and at most 3 exponent digits). + bool ToPrecision(double value, + int precision, + StringBuilder* result_builder) const; +#endif // not needed for ICU + + enum DtoaMode { + // Produce the shortest correct representation. + // For example the output of 0.299999999999999988897 is (the less accurate + // but correct) 0.3. + SHORTEST, + // Same as SHORTEST, but for single-precision floats. + SHORTEST_SINGLE, + // Produce a fixed number of digits after the decimal point. + // For instance fixed(0.1, 4) becomes 0.1000 + // If the input number is big, the output will be big. + FIXED, + // Fixed number of digits (independent of the decimal point). + PRECISION + }; + + // The maximal number of digits that are needed to emit a double in base 10. + // A higher precision can be achieved by using more digits, but the shortest + // accurate representation of any double will never use more digits than + // kBase10MaximalLength. + // Note that DoubleToAscii null-terminates its input. So the given buffer + // should be at least kBase10MaximalLength + 1 characters long. + static const int kBase10MaximalLength = 17; + + // Converts the given double 'v' to ascii. 'v' must not be NaN, +Infinity, or + // -Infinity. In SHORTEST_SINGLE-mode this restriction also applies to 'v' + // after it has been casted to a single-precision float. That is, in this + // mode static_cast(v) must not be NaN, +Infinity or -Infinity. + // + // The result should be interpreted as buffer * 10^(point-length). + // + // The output depends on the given mode: + // - SHORTEST: produce the least amount of digits for which the internal + // identity requirement is still satisfied. If the digits are printed + // (together with the correct exponent) then reading this number will give + // 'v' again. The buffer will choose the representation that is closest to + // 'v'. If there are two at the same distance, than the one farther away + // from 0 is chosen (halfway cases - ending with 5 - are rounded up). + // In this mode the 'requested_digits' parameter is ignored. + // - SHORTEST_SINGLE: same as SHORTEST but with single-precision. + // - FIXED: produces digits necessary to print a given number with + // 'requested_digits' digits after the decimal point. The produced digits + // might be too short in which case the caller has to fill the remainder + // with '0's. + // Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2. + // Halfway cases are rounded towards +/-Infinity (away from 0). The call + // toFixed(0.15, 2) thus returns buffer="2", point=0. + // The returned buffer may contain digits that would be truncated from the + // shortest representation of the input. + // - PRECISION: produces 'requested_digits' where the first digit is not '0'. + // Even though the length of produced digits usually equals + // 'requested_digits', the function is allowed to return fewer digits, in + // which case the caller has to fill the missing digits with '0's. + // Halfway cases are again rounded away from 0. + // DoubleToAscii expects the given buffer to be big enough to hold all + // digits and a terminating null-character. In SHORTEST-mode it expects a + // buffer of at least kBase10MaximalLength + 1. In all other modes the + // requested_digits parameter and the padding-zeroes limit the size of the + // output. Don't forget the decimal point, the exponent character and the + // terminating null-character when computing the maximal output size. + // The given length is only used in debug mode to ensure the buffer is big + // enough. + // ICU PATCH: Export this as U_I18N_API for unit tests. + static void U_I18N_API DoubleToAscii(double v, + DtoaMode mode, + int requested_digits, + char* buffer, + int buffer_length, + bool* sign, + int* length, + int* point); + +#if 0 // not needed for ICU + private: + // Implementation for ToShortest and ToShortestSingle. + bool ToShortestIeeeNumber(double value, + StringBuilder* result_builder, + DtoaMode mode) const; + + // If the value is a special value (NaN or Infinity) constructs the + // corresponding string using the configured infinity/nan-symbol. + // If either of them is NULL or the value is not special then the + // function returns false. + bool HandleSpecialValues(double value, StringBuilder* result_builder) const; + // Constructs an exponential representation (i.e. 1.234e56). + // The given exponent assumes a decimal point after the first decimal digit. + void CreateExponentialRepresentation(const char* decimal_digits, + int length, + int exponent, + StringBuilder* result_builder) const; + // Creates a decimal representation (i.e 1234.5678). + void CreateDecimalRepresentation(const char* decimal_digits, + int length, + int decimal_point, + int digits_after_point, + StringBuilder* result_builder) const; + + const int flags_; + const char* const infinity_symbol_; + const char* const nan_symbol_; + const char exponent_character_; + const int decimal_in_shortest_low_; + const int decimal_in_shortest_high_; + const int max_leading_padding_zeroes_in_precision_mode_; + const int max_trailing_padding_zeroes_in_precision_mode_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(DoubleToStringConverter); +}; + + +class StringToDoubleConverter { + public: + // Enumeration for allowing octals and ignoring junk when converting + // strings to numbers. + enum Flags { + NO_FLAGS = 0, + ALLOW_HEX = 1, + ALLOW_OCTALS = 2, + ALLOW_TRAILING_JUNK = 4, + ALLOW_LEADING_SPACES = 8, + ALLOW_TRAILING_SPACES = 16, + ALLOW_SPACES_AFTER_SIGN = 32 + }; + + // Flags should be a bit-or combination of the possible Flags-enum. + // - NO_FLAGS: no special flags. + // - ALLOW_HEX: recognizes the prefix "0x". Hex numbers may only be integers. + // Ex: StringToDouble("0x1234") -> 4660.0 + // In StringToDouble("0x1234.56") the characters ".56" are trailing + // junk. The result of the call is hence dependent on + // the ALLOW_TRAILING_JUNK flag and/or the junk value. + // With this flag "0x" is a junk-string. Even with ALLOW_TRAILING_JUNK, + // the string will not be parsed as "0" followed by junk. + // + // - ALLOW_OCTALS: recognizes the prefix "0" for octals: + // If a sequence of octal digits starts with '0', then the number is + // read as octal integer. Octal numbers may only be integers. + // Ex: StringToDouble("01234") -> 668.0 + // StringToDouble("012349") -> 12349.0 // Not a sequence of octal + // // digits. + // In StringToDouble("01234.56") the characters ".56" are trailing + // junk. The result of the call is hence dependent on + // the ALLOW_TRAILING_JUNK flag and/or the junk value. + // In StringToDouble("01234e56") the characters "e56" are trailing + // junk, too. + // - ALLOW_TRAILING_JUNK: ignore trailing characters that are not part of + // a double literal. + // - ALLOW_LEADING_SPACES: skip over leading whitespace, including spaces, + // new-lines, and tabs. + // - ALLOW_TRAILING_SPACES: ignore trailing whitespace. + // - ALLOW_SPACES_AFTER_SIGN: ignore whitespace after the sign. + // Ex: StringToDouble("- 123.2") -> -123.2. + // StringToDouble("+ 123.2") -> 123.2 + // + // empty_string_value is returned when an empty string is given as input. + // If ALLOW_LEADING_SPACES or ALLOW_TRAILING_SPACES are set, then a string + // containing only spaces is converted to the 'empty_string_value', too. + // + // junk_string_value is returned when + // a) ALLOW_TRAILING_JUNK is not set, and a junk character (a character not + // part of a double-literal) is found. + // b) ALLOW_TRAILING_JUNK is set, but the string does not start with a + // double literal. + // + // infinity_symbol and nan_symbol are strings that are used to detect + // inputs that represent infinity and NaN. They can be null, in which case + // they are ignored. + // The conversion routine first reads any possible signs. Then it compares the + // following character of the input-string with the first character of + // the infinity, and nan-symbol. If either matches, the function assumes, that + // a match has been found, and expects the following input characters to match + // the remaining characters of the special-value symbol. + // This means that the following restrictions apply to special-value symbols: + // - they must not start with signs ('+', or '-'), + // - they must not have the same first character. + // - they must not start with digits. + // + // Examples: + // flags = ALLOW_HEX | ALLOW_TRAILING_JUNK, + // empty_string_value = 0.0, + // junk_string_value = NaN, + // infinity_symbol = "infinity", + // nan_symbol = "nan": + // StringToDouble("0x1234") -> 4660.0. + // StringToDouble("0x1234K") -> 4660.0. + // StringToDouble("") -> 0.0 // empty_string_value. + // StringToDouble(" ") -> NaN // junk_string_value. + // StringToDouble(" 1") -> NaN // junk_string_value. + // StringToDouble("0x") -> NaN // junk_string_value. + // StringToDouble("-123.45") -> -123.45. + // StringToDouble("--123.45") -> NaN // junk_string_value. + // StringToDouble("123e45") -> 123e45. + // StringToDouble("123E45") -> 123e45. + // StringToDouble("123e+45") -> 123e45. + // StringToDouble("123E-45") -> 123e-45. + // StringToDouble("123e") -> 123.0 // trailing junk ignored. + // StringToDouble("123e-") -> 123.0 // trailing junk ignored. + // StringToDouble("+NaN") -> NaN // NaN string literal. + // StringToDouble("-infinity") -> -inf. // infinity literal. + // StringToDouble("Infinity") -> NaN // junk_string_value. + // + // flags = ALLOW_OCTAL | ALLOW_LEADING_SPACES, + // empty_string_value = 0.0, + // junk_string_value = NaN, + // infinity_symbol = NULL, + // nan_symbol = NULL: + // StringToDouble("0x1234") -> NaN // junk_string_value. + // StringToDouble("01234") -> 668.0. + // StringToDouble("") -> 0.0 // empty_string_value. + // StringToDouble(" ") -> 0.0 // empty_string_value. + // StringToDouble(" 1") -> 1.0 + // StringToDouble("0x") -> NaN // junk_string_value. + // StringToDouble("0123e45") -> NaN // junk_string_value. + // StringToDouble("01239E45") -> 1239e45. + // StringToDouble("-infinity") -> NaN // junk_string_value. + // StringToDouble("NaN") -> NaN // junk_string_value. + StringToDoubleConverter(int flags, + double empty_string_value, + double junk_string_value, + const char* infinity_symbol, + const char* nan_symbol) + : flags_(flags), + empty_string_value_(empty_string_value), + junk_string_value_(junk_string_value), + infinity_symbol_(infinity_symbol), + nan_symbol_(nan_symbol) { + } + + // Performs the conversion. + // The output parameter 'processed_characters_count' is set to the number + // of characters that have been processed to read the number. + // Spaces than are processed with ALLOW_{LEADING|TRAILING}_SPACES are included + // in the 'processed_characters_count'. Trailing junk is never included. + double StringToDouble(const char* buffer, + int length, + int* processed_characters_count) const; + + // Same as StringToDouble above but for 16 bit characters. + double StringToDouble(const uc16* buffer, + int length, + int* processed_characters_count) const; + + // Same as StringToDouble but reads a float. + // Note that this is not equivalent to static_cast(StringToDouble(...)) + // due to potential double-rounding. + float StringToFloat(const char* buffer, + int length, + int* processed_characters_count) const; + + // Same as StringToFloat above but for 16 bit characters. + float StringToFloat(const uc16* buffer, + int length, + int* processed_characters_count) const; + + private: + const int flags_; + const double empty_string_value_; + const double junk_string_value_; + const char* const infinity_symbol_; + const char* const nan_symbol_; + + template + double StringToIeee(Iterator start_pointer, + int length, + bool read_as_double, + int* processed_characters_count) const; + + DISALLOW_IMPLICIT_CONSTRUCTORS(StringToDoubleConverter); +#endif // not needed for ICU +}; + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END + +#endif // DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/dtptngen.cpp b/deps/icu-small/source/i18n/dtptngen.cpp index 187342e4af2efc..aefd70464eb2d8 100644 --- a/deps/icu-small/source/i18n/dtptngen.cpp +++ b/deps/icu-small/source/i18n/dtptngen.cpp @@ -261,12 +261,21 @@ static const char* const CLDR_FIELD_APPEND[] = { "Hour", "Minute", "Second", "*", "Timezone" }; -static const char* const CLDR_FIELD_NAME[] = { +static const char* const CLDR_FIELD_NAME[UDATPG_FIELD_COUNT] = { "era", "year", "quarter", "month", "week", "weekOfMonth", "weekday", "dayOfYear", "weekdayOfMonth", "day", "dayperiod", // The UDATPG_x_FIELD constants and these fields have a different order than in ICU4J "hour", "minute", "second", "*", "zone" }; +static const char* const CLDR_FIELD_WIDTH[] = { // [UDATPG_WIDTH_COUNT] + "", "-short", "-narrow" +}; + +// TODO(ticket:13619): remove when definition uncommented in dtptngen.h. +static const int32_t UDATPG_WIDTH_COUNT = UDATPG_NARROW + 1; +static constexpr UDateTimePGDisplayWidth UDATPG_WIDTH_APPENDITEM = UDATPG_WIDE; +static constexpr int32_t UDATPG_FIELD_KEY_MAX = 24; // max length of CLDR field tag (type + width) + // For appendItems static const UChar UDATPG_ItemFormat[]= {0x7B, 0x30, 0x7D, 0x20, 0x251C, 0x7B, 0x32, 0x7D, 0x3A, 0x20, 0x7B, 0x31, 0x7D, 0x2524, 0}; // {0} \u251C{2}: {1}\u2524 @@ -379,10 +388,11 @@ DateTimePatternGenerator::operator=(const DateTimePatternGenerator& other) { } for (int32_t i=0; i< UDATPG_FIELD_COUNT; ++i ) { appendItemFormats[i] = other.appendItemFormats[i]; - appendItemNames[i] = other.appendItemNames[i]; - // NUL-terminate for the C API. - appendItemFormats[i].getTerminatedBuffer(); - appendItemNames[i].getTerminatedBuffer(); + appendItemFormats[i].getTerminatedBuffer(); // NUL-terminate for the C API. + for (int32_t j=0; j< UDATPG_WIDTH_COUNT; ++j ) { + fieldDisplayNames[i][j] = other.fieldDisplayNames[i][j]; + fieldDisplayNames[i][j].getTerminatedBuffer(); // NUL-terminate for the C API. + } } UErrorCode status = U_ZERO_ERROR; patternMap->copyFrom(*other.patternMap, status); @@ -399,10 +409,14 @@ DateTimePatternGenerator::operator==(const DateTimePatternGenerator& other) cons if ((pLocale==other.pLocale) && (patternMap->equals(*other.patternMap)) && (dateTimeFormat==other.dateTimeFormat) && (decimal==other.decimal)) { for ( int32_t i=0 ; i0; --i) { + if (uprv_strcmp(CLDR_FIELD_WIDTH[i], hyphenPtr)==0) { + *widthP=(UDateTimePGDisplayWidth)i; + break; + } + } + *hyphenPtr = 0; // now delete width portion of key + } for (int32_t i=0; i> i) & 1) == 0) { + // Mark this rule as being executed so that we don't try to execute it again. + nonNumericalExecutedRuleMask |= 1 << i; + Formattable tempResult; - UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult); + UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, tempResult); if (success && (workingPos.getIndex() > highWaterMark.getIndex())) { result = tempResult; highWaterMark = workingPos; @@ -748,7 +751,7 @@ NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun continue; } Formattable tempResult; - UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult); + UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, tempResult); if (success && workingPos.getIndex() > highWaterMark.getIndex()) { result = tempResult; highWaterMark = workingPos; diff --git a/deps/icu-small/source/i18n/nfrs.h b/deps/icu-small/source/i18n/nfrs.h index 34846ed297b77d..c56fc0707851a7 100644 --- a/deps/icu-small/source/i18n/nfrs.h +++ b/deps/icu-small/source/i18n/nfrs.h @@ -55,7 +55,7 @@ class NFRuleSet : public UMemory { void format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const; void format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const; - UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const; + UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, Formattable& result) const; void appendRules(UnicodeString& result) const; // toString diff --git a/deps/icu-small/source/i18n/nfrule.cpp b/deps/icu-small/source/i18n/nfrule.cpp index 2c26aff2d11fde..9f5deb31683291 100644 --- a/deps/icu-small/source/i18n/nfrule.cpp +++ b/deps/icu-small/source/i18n/nfrule.cpp @@ -900,6 +900,7 @@ NFRule::doParse(const UnicodeString& text, ParsePosition& parsePosition, UBool isFractionRule, double upperBound, + uint32_t nonNumericalExecutedRuleMask, Formattable& resVal) const { // internally we operate on a copy of the string being parsed @@ -1002,6 +1003,7 @@ NFRule::doParse(const UnicodeString& text, temp.setTo(ruleText, sub1Pos, sub2Pos - sub1Pos); double partialResult = matchToDelimiter(workText, start, tempBaseValue, temp, pp, sub1, + nonNumericalExecutedRuleMask, upperBound); // if we got a successful match (or were trying to match a @@ -1022,6 +1024,7 @@ NFRule::doParse(const UnicodeString& text, temp.setTo(ruleText, sub2Pos, ruleText.length() - sub2Pos); partialResult = matchToDelimiter(workText2, 0, partialResult, temp, pp2, sub2, + nonNumericalExecutedRuleMask, upperBound); // if we got a successful match on this second @@ -1158,6 +1161,7 @@ NFRule::matchToDelimiter(const UnicodeString& text, const UnicodeString& delimiter, ParsePosition& pp, const NFSubstitution* sub, + uint32_t nonNumericalExecutedRuleMask, double upperBound) const { UErrorCode status = U_ZERO_ERROR; @@ -1191,6 +1195,7 @@ NFRule::matchToDelimiter(const UnicodeString& text, #else formatter->isLenient(), #endif + nonNumericalExecutedRuleMask, result); // if the substitution could match all the text up to @@ -1244,6 +1249,7 @@ NFRule::matchToDelimiter(const UnicodeString& text, #else formatter->isLenient(), #endif + nonNumericalExecutedRuleMask, result); if (success && (tempPP.getIndex() != 0)) { // if there's a successful match (or it's a null diff --git a/deps/icu-small/source/i18n/nfrule.h b/deps/icu-small/source/i18n/nfrule.h index 21cdd24fbd0573..843a4a0762bb01 100644 --- a/deps/icu-small/source/i18n/nfrule.h +++ b/deps/icu-small/source/i18n/nfrule.h @@ -74,6 +74,7 @@ class NFRule : public UMemory { ParsePosition& pos, UBool isFractional, double upperBound, + uint32_t nonNumericalExecutedRuleMask, Formattable& result) const; UBool shouldRollBack(int64_t number) const; @@ -94,6 +95,7 @@ class NFRule : public UMemory { int32_t indexOfAnyRulePrefix() const; double matchToDelimiter(const UnicodeString& text, int32_t startPos, double baseValue, const UnicodeString& delimiter, ParsePosition& pp, const NFSubstitution* sub, + uint32_t nonNumericalExecutedRuleMask, double upperBound) const; void stripPrefix(UnicodeString& text, const UnicodeString& prefix, ParsePosition& pp) const; diff --git a/deps/icu-small/source/i18n/nfsubs.cpp b/deps/icu-small/source/i18n/nfsubs.cpp index 1a0914152deaa4..ea817453d87c18 100644 --- a/deps/icu-small/source/i18n/nfsubs.cpp +++ b/deps/icu-small/source/i18n/nfsubs.cpp @@ -155,6 +155,7 @@ class ModulusSubstitution : public NFSubstitution { double baseValue, double upperBound, UBool lenientParse, + uint32_t nonNumericalExecutedRuleMask, Formattable& result) const; virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { @@ -221,6 +222,7 @@ class FractionalPartSubstitution : public NFSubstitution { double baseValue, double upperBound, UBool lenientParse, + uint32_t nonNumericalExecutedRuleMask, Formattable& result) const; virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue + oldRuleValue; } @@ -292,6 +294,7 @@ class NumeratorSubstitution : public NFSubstitution { double baseValue, double upperBound, UBool /*lenientParse*/, + uint32_t nonNumericalExecutedRuleMask, Formattable& result) const; virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue / oldRuleValue; } @@ -689,6 +692,7 @@ NFSubstitution::doParse(const UnicodeString& text, double baseValue, double upperBound, UBool lenientParse, + uint32_t nonNumericalExecutedRuleMask, Formattable& result) const { #ifdef RBNF_DEBUG @@ -709,7 +713,7 @@ NFSubstitution::doParse(const UnicodeString& text, // on), then also try parsing the text using a default- // constructed NumberFormat if (ruleSet != NULL) { - ruleSet->parse(text, parsePosition, upperBound, result); + ruleSet->parse(text, parsePosition, upperBound, nonNumericalExecutedRuleMask, result); if (lenientParse && !ruleSet->isFractionRuleSet() && parsePosition.getIndex() == 0) { UErrorCode status = U_ZERO_ERROR; NumberFormat* fmt = NumberFormat::createInstance(status); @@ -931,18 +935,19 @@ ModulusSubstitution::doParse(const UnicodeString& text, double baseValue, double upperBound, UBool lenientParse, + uint32_t nonNumericalExecutedRuleMask, Formattable& result) const { // if this isn't a >>> substitution, we can just use the // inherited parse() routine to do the parsing if (ruleToUse == NULL) { - return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, result); + return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, nonNumericalExecutedRuleMask, result); // but if it IS a >>> substitution, we have to do it here: we // use the specific rule's doParse() method, and then we have to // do some of the other work of NFRuleSet.parse() } else { - ruleToUse->doParse(text, parsePosition, FALSE, upperBound, result); + ruleToUse->doParse(text, parsePosition, FALSE, upperBound, nonNumericalExecutedRuleMask, result); if (parsePosition.getIndex() != 0) { UErrorCode status = U_ZERO_ERROR; @@ -1118,12 +1123,13 @@ FractionalPartSubstitution::doParse(const UnicodeString& text, double baseValue, double /*upperBound*/, UBool lenientParse, + uint32_t nonNumericalExecutedRuleMask, Formattable& resVal) const { // if we're not in byDigits mode, we can just use the inherited // doParse() if (!byDigits) { - return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, resVal); + return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, nonNumericalExecutedRuleMask, resVal); // if we ARE in byDigits mode, parse the text one digit at a time // using this substitution's owning rule set (we do this by setting @@ -1141,7 +1147,7 @@ FractionalPartSubstitution::doParse(const UnicodeString& text, while (workText.length() > 0 && workPos.getIndex() != 0) { workPos.setIndex(0); Formattable temp; - getRuleSet()->parse(workText, workPos, 10, temp); + getRuleSet()->parse(workText, workPos, 10, nonNumericalExecutedRuleMask, temp); UErrorCode status = U_ZERO_ERROR; digit = temp.getLong(status); // digit = temp.getType() == Formattable::kLong ? @@ -1249,6 +1255,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text, double baseValue, double upperBound, UBool /*lenientParse*/, + uint32_t nonNumericalExecutedRuleMask, Formattable& result) const { // we don't have to do anything special to do the parsing here, @@ -1267,7 +1274,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text, while (workText.length() > 0 && workPos.getIndex() != 0) { workPos.setIndex(0); - getRuleSet()->parse(workText, workPos, 1, temp); // parse zero or nothing at all + getRuleSet()->parse(workText, workPos, 1, nonNumericalExecutedRuleMask, temp); // parse zero or nothing at all if (workPos.getIndex() == 0) { // we failed, either there were no more zeros, or the number was formatted with digits // either way, we're done @@ -1289,7 +1296,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text, } // we've parsed off the zeros, now let's parse the rest from our current position - NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, FALSE, result); + NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, FALSE, nonNumericalExecutedRuleMask, result); if (withZeros) { // any base value will do in this case. is there a way to diff --git a/deps/icu-small/source/i18n/nfsubs.h b/deps/icu-small/source/i18n/nfsubs.h index e77f7ada8c7fe3..08de06f2a38e1e 100644 --- a/deps/icu-small/source/i18n/nfsubs.h +++ b/deps/icu-small/source/i18n/nfsubs.h @@ -191,6 +191,7 @@ class NFSubstitution : public UObject { double baseValue, double upperBound, UBool lenientParse, + uint32_t nonNumericalExecutedRuleMask, Formattable& result) const; /** diff --git a/deps/icu-small/source/i18n/number_affixutils.cpp b/deps/icu-small/source/i18n/number_affixutils.cpp index 4dfdbc7ab708fc..df4b267af5a004 100644 --- a/deps/icu-small/source/i18n/number_affixutils.cpp +++ b/deps/icu-small/source/i18n/number_affixutils.cpp @@ -70,6 +70,7 @@ int32_t AffixUtils::estimateLength(const CharSequence &patternString, UErrorCode case STATE_FIRST_QUOTE: case STATE_INSIDE_QUOTE: status = U_ILLEGAL_ARGUMENT_ERROR; + break; default: break; } diff --git a/deps/icu-small/source/i18n/number_compact.cpp b/deps/icu-small/source/i18n/number_compact.cpp index 8ceee1378b24cb..cc0d8fd2a20cce 100644 --- a/deps/icu-small/source/i18n/number_compact.cpp +++ b/deps/icu-small/source/i18n/number_compact.cpp @@ -262,7 +262,6 @@ void CompactHandler::precomputeAllModifiers(MutablePatternModifier &buildReferen buildReference.setPatternInfo(&patternInfo); info.mod = buildReference.createImmutable(status); if (U_FAILURE(status)) { return; } - info.numDigits = patternInfo.positive.integerTotal; info.patternString = patternString; } } @@ -286,7 +285,6 @@ void CompactHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micr StandardPlural::Form plural = quantity.getStandardPlural(rules); const UChar *patternString = data.getPattern(magnitude, plural); - int numDigits = -1; if (patternString == nullptr) { // Use the default (non-compact) modifier. // No need to take any action. @@ -299,7 +297,6 @@ void CompactHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micr const CompactModInfo &info = precomputedMods[i]; if (u_strcmp(patternString, info.patternString) == 0) { info.mod->applyToMicros(micros, quantity); - numDigits = info.numDigits; break; } } @@ -313,12 +310,8 @@ void CompactHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micr PatternParser::parseToPatternInfo(UnicodeString(patternString), patternInfo, status); static_cast(const_cast(micros.modMiddle)) ->setPatternInfo(&patternInfo); - numDigits = patternInfo.positive.integerTotal; } - // FIXME: Deal with numDigits == 0 (Awaiting a test case) - (void)numDigits; - // We already performed rounding. Do not perform it again. micros.rounding = Rounder::constructPassThrough(); } diff --git a/deps/icu-small/source/i18n/number_compact.h b/deps/icu-small/source/i18n/number_compact.h index 2344abf535a962..f7adf36416e92f 100644 --- a/deps/icu-small/source/i18n/number_compact.h +++ b/deps/icu-small/source/i18n/number_compact.h @@ -52,7 +52,6 @@ class CompactData : public MultiplierProducer { struct CompactModInfo { const ImmutablePatternModifier *mod; const UChar* patternString; - int32_t numDigits; }; class CompactHandler : public MicroPropsGenerator, public UMemory { diff --git a/deps/icu-small/source/i18n/number_decimalquantity.cpp b/deps/icu-small/source/i18n/number_decimalquantity.cpp index 72463576666bb1..b68df26ba26167 100644 --- a/deps/icu-small/source/i18n/number_decimalquantity.cpp +++ b/deps/icu-small/source/i18n/number_decimalquantity.cpp @@ -14,12 +14,15 @@ #include "decContext.h" #include "decNumber.h" #include "number_roundingutils.h" +#include "double-conversion.h" #include "unicode/plurrule.h" using namespace icu; using namespace icu::number; using namespace icu::number::impl; +using icu::double_conversion::DoubleToStringConverter; + namespace { int8_t NEGATIVE_FLAG = 1; @@ -265,6 +268,10 @@ bool DecimalQuantity::isNegative() const { return (flags & NEGATIVE_FLAG) != 0; } +int8_t DecimalQuantity::signum() const { + return isNegative() ? -1 : isZero() ? 0 : 1; +} + bool DecimalQuantity::isInfinite() const { return (flags & INFINITY_FLAG) != 0; } @@ -392,31 +399,27 @@ void DecimalQuantity::_setToDoubleFast(double n) { } void DecimalQuantity::convertToAccurateDouble() { - double n = origDouble; - U_ASSERT(n != 0); + U_ASSERT(origDouble != 0); int32_t delta = origDelta; - setBcdToZero(); - // Call the slow oracle function (Double.toString in Java, sprintf in C++). - // The constant DBL_DIG defines a platform-specific number of digits in a double. - // However, this tends to be too low (see #11318). Instead, we always use 14 decimal places. - static constexpr size_t CAP = 1 + 14 + 8; // Extra space for '+', '.', e+NNN, and '\0' - char dstr[CAP]; - snprintf(dstr, CAP, "%+1.14e", n); - - // uprv_decNumberFromString() will parse the string expecting '.' as a - // decimal separator, however sprintf() can use ',' in certain locales. - // Overwrite a ',' with '.' here before proceeding. - char *decimalSeparator = strchr(dstr, ','); - if (decimalSeparator != nullptr) { - *decimalSeparator = '.'; - } - - StringPiece sp(dstr); - DecNumberWithStorage dn; - stringToDecNumber(dstr, dn); - _setToDecNumber(dn.getAlias()); + // Call the slow oracle function (Double.toString in Java, DoubleToAscii in C++). + char buffer[DoubleToStringConverter::kBase10MaximalLength + 1]; + bool sign; // unused; always positive + int32_t length; + int32_t point; + DoubleToStringConverter::DoubleToAscii( + origDouble, + DoubleToStringConverter::DtoaMode::SHORTEST, + 0, + buffer, + sizeof(buffer), + &sign, + &length, + &point + ); + setBcdToZero(); + readDoubleConversionToBcd(buffer, length, point); scale += delta; explicitExactDouble = true; } @@ -833,6 +836,26 @@ void DecimalQuantity::readDecNumberToBcd(decNumber *dn) { precision = dn->digits; } +void DecimalQuantity::readDoubleConversionToBcd( + const char* buffer, int32_t length, int32_t point) { + // NOTE: Despite the fact that double-conversion's API is called + // "DoubleToAscii", they actually use '0' (as opposed to u8'0'). + if (length > 16) { + ensureCapacity(length); + for (int32_t i = 0; i < length; i++) { + fBCD.bcdBytes.ptr[i] = buffer[length-i-1] - '0'; + } + } else { + uint64_t result = 0L; + for (int32_t i = 0; i < length; i++) { + result |= static_cast(buffer[length-i-1] - '0') << (4 * i); + } + fBCD.bcdLong = result; + } + scale = point - length; + precision = length; +} + void DecimalQuantity::compact() { if (usingBytes) { int32_t delta = 0; diff --git a/deps/icu-small/source/i18n/number_decimalquantity.h b/deps/icu-small/source/i18n/number_decimalquantity.h index ccb832623cb7bb..4309c3c6380ac4 100644 --- a/deps/icu-small/source/i18n/number_decimalquantity.h +++ b/deps/icu-small/source/i18n/number_decimalquantity.h @@ -115,6 +115,9 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory { /** @return Whether the value represented by this {@link DecimalQuantity} is less than zero. */ bool isNegative() const; + /** @return -1 if the value is negative; 1 if positive; or 0 if zero. */ + int8_t signum() const; + /** @return Whether the value represented by this {@link DecimalQuantity} is infinite. */ bool isInfinite() const U_OVERRIDE; @@ -395,6 +398,8 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory { void readDecNumberToBcd(decNumber *dn); + void readDoubleConversionToBcd(const char* buffer, int32_t length, int32_t point); + void copyBcdFrom(const DecimalQuantity &other); /** diff --git a/deps/icu-small/source/i18n/number_decimfmtprops.h b/deps/icu-small/source/i18n/number_decimfmtprops.h index 3e25966b6f5612..96356cad45321d 100644 --- a/deps/icu-small/source/i18n/number_decimfmtprops.h +++ b/deps/icu-small/source/i18n/number_decimfmtprops.h @@ -19,8 +19,8 @@ U_NAMESPACE_BEGIN // Export an explicit template instantiation of the LocalPointer that is used as a // data member of CurrencyPluralInfoWrapper. -// (MSVC requires this, even though it should not be necessary.) -#if defined (_MSC_VER) +// (When building DLLs for Windows this is required.) +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN // Ignore warning 4661 as LocalPointerBase does not use operator== or operator!= #pragma warning(suppress: 4661) template class U_I18N_API LocalPointerBase; diff --git a/deps/icu-small/source/i18n/number_fluent.cpp b/deps/icu-small/source/i18n/number_fluent.cpp index 76c3a7ce5c5d16..27113106c50451 100644 --- a/deps/icu-small/source/i18n/number_fluent.cpp +++ b/deps/icu-small/source/i18n/number_fluent.cpp @@ -33,18 +33,39 @@ Derived NumberFormatterSettings::unit(const icu::MeasureUnit &unit) con } template -Derived NumberFormatterSettings::adoptUnit(const icu::MeasureUnit *unit) const { +Derived NumberFormatterSettings::adoptUnit(icu::MeasureUnit *unit) const { Derived copy(*this); // Just copy the unit into the MacroProps by value, and delete it since we have ownership. // NOTE: Slicing occurs here. However, CurrencyUnit can be restored from MeasureUnit. // TimeUnit may be affected, but TimeUnit is not as relevant to number formatting. if (unit != nullptr) { + // TODO: On nullptr, reset to default value? copy.fMacros.unit = *unit; delete unit; } return copy; } +template +Derived NumberFormatterSettings::perUnit(const icu::MeasureUnit &perUnit) const { + Derived copy(*this); + // See comments above about slicing. + copy.fMacros.perUnit = perUnit; + return copy; +} + +template +Derived NumberFormatterSettings::adoptPerUnit(icu::MeasureUnit *perUnit) const { + Derived copy(*this); + // See comments above about slicing and ownership. + if (perUnit != nullptr) { + // TODO: On nullptr, reset to default value? + copy.fMacros.perUnit = *perUnit; + delete perUnit; + } + return copy; +} + template Derived NumberFormatterSettings::rounding(const Rounder &rounder) const { Derived copy(*this); @@ -54,9 +75,11 @@ Derived NumberFormatterSettings::rounding(const Rounder &rounder) const } template -Derived NumberFormatterSettings::grouping(const Grouper &grouper) const { +Derived NumberFormatterSettings::grouping(const UGroupingStrategy &strategy) const { Derived copy(*this); - copy.fMacros.grouper = grouper; + // NOTE: This is slightly different than how the setting is stored in Java + // because we want to put it on the stack. + copy.fMacros.grouper = Grouper::forStrategy(strategy); return copy; } @@ -75,7 +98,7 @@ Derived NumberFormatterSettings::symbols(const DecimalFormatSymbols &sy } template -Derived NumberFormatterSettings::adoptSymbols(const NumberingSystem *ns) const { +Derived NumberFormatterSettings::adoptSymbols(NumberingSystem *ns) const { Derived copy(*this); copy.fMacros.symbols.setTo(ns); return copy; diff --git a/deps/icu-small/source/i18n/number_formatimpl.cpp b/deps/icu-small/source/i18n/number_formatimpl.cpp index 9986ce6d8c606b..bc96cb15dabf90 100644 --- a/deps/icu-small/source/i18n/number_formatimpl.cpp +++ b/deps/icu-small/source/i18n/number_formatimpl.cpp @@ -17,6 +17,8 @@ #include "unicode/dcfmtsym.h" #include "number_scientific.h" #include "number_compact.h" +#include "uresimp.h" +#include "ureslocs.h" using namespace icu; using namespace icu::number; @@ -88,6 +90,37 @@ const char16_t *getPatternForStyle(const Locale &locale, const char *nsName, Cld return pattern; } +struct CurrencyFormatInfoResult { + bool exists; + const char16_t* pattern; + const char16_t* decimalSeparator; + const char16_t* groupingSeparator; +}; +CurrencyFormatInfoResult getCurrencyFormatInfo(const Locale& locale, const char* isoCode, UErrorCode& status) { + // TODO: Load this data in a centralized location like ICU4J? + // TODO: Parts of this same data are loaded in dcfmtsym.cpp; should clean up. + CurrencyFormatInfoResult result = { false, nullptr, nullptr, nullptr }; + if (U_FAILURE(status)) return result; + CharString key; + key.append("Currencies/", status); + key.append(isoCode, status); + UErrorCode localStatus = status; + LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_CURR, locale.getName(), &localStatus)); + ures_getByKeyWithFallback(bundle.getAlias(), key.data(), bundle.getAlias(), &localStatus); + if (U_SUCCESS(localStatus) && ures_getSize(bundle.getAlias())>2) { // the length is 3 if more data is present + ures_getByIndex(bundle.getAlias(), 2, bundle.getAlias(), &localStatus); + int32_t dummy; + result.exists = true; + result.pattern = ures_getStringByIndex(bundle.getAlias(), 0, &dummy, &localStatus); + result.decimalSeparator = ures_getStringByIndex(bundle.getAlias(), 1, &dummy, &localStatus); + result.groupingSeparator = ures_getStringByIndex(bundle.getAlias(), 2, &dummy, &localStatus); + status = localStatus; + } else if (localStatus != U_MISSING_RESOURCE_ERROR) { + status = localStatus; + } + return result; +} + inline bool unitIsCurrency(const MeasureUnit &unit) { return uprv_strcmp("currency", unit.getType()) == 0; } @@ -161,8 +194,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, bool isPercent = isNoUnit && unitIsPercent(macros.unit); bool isPermille = isNoUnit && unitIsPermille(macros.unit); bool isCldrUnit = !isCurrency && !isNoUnit; - bool isAccounting = - macros.sign == UNUM_SIGN_ACCOUNTING || macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS; + bool isAccounting = macros.sign == UNUM_SIGN_ACCOUNTING + || macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS + || macros.sign == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO; CurrencyUnit currency(kDefaultCurrency, status); if (isCurrency) { currency = CurrencyUnit(macros.unit, status); // Restore CurrencyUnit from MeasureUnit @@ -185,20 +219,51 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, } const char *nsName = U_SUCCESS(status) ? ns->getName() : "latn"; - // Load and parse the pattern string. It is used for grouping sizes and affixes only. - CldrPatternStyle patternStyle; - if (isPercent || isPermille) { - patternStyle = CLDR_PATTERN_STYLE_PERCENT; - } else if (!isCurrency || unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) { - patternStyle = CLDR_PATTERN_STYLE_DECIMAL; - } else if (isAccounting) { - // NOTE: Although ACCOUNTING and ACCOUNTING_ALWAYS are only supported in currencies right now, - // the API contract allows us to add support to other units in the future. - patternStyle = CLDR_PATTERN_STYLE_ACCOUNTING; + // Resolve the symbols. Do this here because currency may need to customize them. + if (macros.symbols.isDecimalFormatSymbols()) { + fMicros.symbols = macros.symbols.getDecimalFormatSymbols(); } else { - patternStyle = CLDR_PATTERN_STYLE_CURRENCY; + fMicros.symbols = new DecimalFormatSymbols(macros.locale, *ns, status); + // Give ownership to the NumberFormatterImpl. + fSymbols.adoptInstead(fMicros.symbols); + } + + // Load and parse the pattern string. It is used for grouping sizes and affixes only. + // If we are formatting currency, check for a currency-specific pattern. + const char16_t* pattern = nullptr; + if (isCurrency) { + CurrencyFormatInfoResult info = getCurrencyFormatInfo(macros.locale, currency.getSubtype(), status); + if (info.exists) { + pattern = info.pattern; + // It's clunky to clone an object here, but this code is not frequently executed. + DecimalFormatSymbols* symbols = new DecimalFormatSymbols(*fMicros.symbols); + fMicros.symbols = symbols; + fSymbols.adoptInstead(symbols); + symbols->setSymbol( + DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol, + UnicodeString(info.decimalSeparator), + FALSE); + symbols->setSymbol( + DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol, + UnicodeString(info.groupingSeparator), + FALSE); + } + } + if (pattern == nullptr) { + CldrPatternStyle patternStyle; + if (isPercent || isPermille) { + patternStyle = CLDR_PATTERN_STYLE_PERCENT; + } else if (!isCurrency || unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) { + patternStyle = CLDR_PATTERN_STYLE_DECIMAL; + } else if (isAccounting) { + // NOTE: Although ACCOUNTING and ACCOUNTING_ALWAYS are only supported in currencies right now, + // the API contract allows us to add support to other units in the future. + patternStyle = CLDR_PATTERN_STYLE_ACCOUNTING; + } else { + patternStyle = CLDR_PATTERN_STYLE_CURRENCY; + } + pattern = getPatternForStyle(macros.locale, nsName, patternStyle, status); } - const char16_t *pattern = getPatternForStyle(macros.locale, nsName, patternStyle, status); auto patternInfo = new ParsedPatternInfo(); fPatternInfo.adoptInstead(patternInfo); PatternParser::parseToPatternInfo(UnicodeString(pattern), *patternInfo, status); @@ -207,15 +272,6 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, /// START POPULATING THE DEFAULT MICROPROPS AND BUILDING THE MICROPROPS GENERATOR /// ///////////////////////////////////////////////////////////////////////////////////// - // Symbols - if (macros.symbols.isDecimalFormatSymbols()) { - fMicros.symbols = macros.symbols.getDecimalFormatSymbols(); - } else { - fMicros.symbols = new DecimalFormatSymbols(macros.locale, *ns, status); - // Give ownership to the NumberFormatterImpl. - fSymbols.adoptInstead(fMicros.symbols); - } - // Rounding strategy if (!macros.rounder.isBogus()) { fMicros.rounding = macros.rounder; @@ -233,11 +289,11 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, fMicros.grouping = macros.grouper; } else if (macros.notation.fType == Notation::NTN_COMPACT) { // Compact notation uses minGrouping by default since ICU 59 - fMicros.grouping = Grouper::minTwoDigits(); + fMicros.grouping = Grouper::forStrategy(UNUM_GROUPING_MIN2); } else { - fMicros.grouping = Grouper::defaults(); + fMicros.grouping = Grouper::forStrategy(UNUM_GROUPING_AUTO); } - fMicros.grouping.setLocaleData(*fPatternInfo); + fMicros.grouping.setLocaleData(*fPatternInfo, macros.locale); // Padding strategy if (!macros.padder.isBogus()) { @@ -308,6 +364,7 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, LongNameHandler::forMeasureUnit( macros.locale, macros.unit, + macros.perUnit, unitWidth, resolvePluralRules(macros.rules, macros.locale, status), chain, diff --git a/deps/icu-small/source/i18n/number_grouping.cpp b/deps/icu-small/source/i18n/number_grouping.cpp index 15362825cc68ce..a2b1bbd6b3388e 100644 --- a/deps/icu-small/source/i18n/number_grouping.cpp +++ b/deps/icu-small/source/i18n/number_grouping.cpp @@ -7,36 +7,70 @@ #include "unicode/numberformatter.h" #include "number_patternstring.h" +#include "uresimp.h" using namespace icu; using namespace icu::number; using namespace icu::number::impl; -Grouper Grouper::defaults() { - return {-2, -2, false}; +namespace { + +int16_t getMinGroupingForLocale(const Locale& locale) { + // TODO: Cache this? + UErrorCode localStatus = U_ZERO_ERROR; + LocalUResourceBundlePointer bundle(ures_open(NULL, locale.getName(), &localStatus)); + int32_t resultLen = 0; + const char16_t* result = ures_getStringByKeyWithFallback( + bundle.getAlias(), + "NumberElements/minimumGroupingDigits", + &resultLen, + &localStatus); + // TODO: Is it safe to assume resultLen == 1? Would locales set minGrouping >= 10? + if (U_FAILURE(localStatus) || resultLen != 1) { + return 1; + } + return result[0] - u'0'; } -Grouper Grouper::minTwoDigits() { - return {-2, -2, true}; } -Grouper Grouper::none() { - return {-1, -1, false}; +Grouper Grouper::forStrategy(UGroupingStrategy grouping) { + switch (grouping) { + case UNUM_GROUPING_OFF: + return {-1, -1, -2}; + case UNUM_GROUPING_AUTO: + return {-2, -2, -2}; + case UNUM_GROUPING_MIN2: + return {-2, -2, -3}; + case UNUM_GROUPING_ON_ALIGNED: + return {-4, -4, 1}; + case UNUM_GROUPING_THOUSANDS: + return {3, 3, 1}; + default: + U_ASSERT(FALSE); + } } -void Grouper::setLocaleData(const impl::ParsedPatternInfo &patternInfo) { - if (fGrouping1 != -2) { +void Grouper::setLocaleData(const impl::ParsedPatternInfo &patternInfo, const Locale& locale) { + if (fGrouping1 != -2 && fGrouping2 != -4) { return; } - auto grouping1 = static_cast (patternInfo.positive.groupingSizes & 0xffff); - auto grouping2 = static_cast ((patternInfo.positive.groupingSizes >> 16) & 0xffff); - auto grouping3 = static_cast ((patternInfo.positive.groupingSizes >> 32) & 0xffff); + auto grouping1 = static_cast (patternInfo.positive.groupingSizes & 0xffff); + auto grouping2 = static_cast ((patternInfo.positive.groupingSizes >> 16) & 0xffff); + auto grouping3 = static_cast ((patternInfo.positive.groupingSizes >> 32) & 0xffff); if (grouping2 == -1) { - grouping1 = -1; + grouping1 = fGrouping1 == -4 ? (short) 3 : (short) -1; } if (grouping3 == -1) { grouping2 = grouping1; } + if (fMinGrouping == -2) { + fMinGrouping = getMinGroupingForLocale(locale); + } else if (fMinGrouping == -3) { + fMinGrouping = uprv_max(2, getMinGroupingForLocale(locale)); + } else { + // leave fMinGrouping alone + } fGrouping1 = grouping1; fGrouping2 = grouping2; } @@ -49,7 +83,7 @@ bool Grouper::groupAtPosition(int32_t position, const impl::DecimalQuantity &val } position -= fGrouping1; return position >= 0 && (position % fGrouping2) == 0 - && value.getUpperDisplayMagnitude() - fGrouping1 + 1 >= (fMin2 ? 2 : 1); + && value.getUpperDisplayMagnitude() - fGrouping1 + 1 >= fMinGrouping; } #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/number_integerwidth.cpp b/deps/icu-small/source/i18n/number_integerwidth.cpp index 10dacfc4acb96f..4a612273f5e530 100644 --- a/deps/icu-small/source/i18n/number_integerwidth.cpp +++ b/deps/icu-small/source/i18n/number_integerwidth.cpp @@ -13,25 +13,28 @@ using namespace icu; using namespace icu::number; using namespace icu::number::impl; -IntegerWidth::IntegerWidth(int8_t minInt, int8_t maxInt) { +IntegerWidth::IntegerWidth(digits_t minInt, digits_t maxInt) { fUnion.minMaxInt.fMinInt = minInt; fUnion.minMaxInt.fMaxInt = maxInt; } IntegerWidth IntegerWidth::zeroFillTo(int32_t minInt) { if (minInt >= 0 && minInt <= kMaxIntFracSig) { - return {static_cast(minInt), -1}; + return {static_cast(minInt), -1}; } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } IntegerWidth IntegerWidth::truncateAt(int32_t maxInt) { if (fHasError) { return *this; } // No-op on error - if (maxInt >= 0 && maxInt <= kMaxIntFracSig) { - return {fUnion.minMaxInt.fMinInt, static_cast(maxInt)}; + digits_t minInt = fUnion.minMaxInt.fMinInt; + if (maxInt >= 0 && maxInt <= kMaxIntFracSig && minInt <= maxInt) { + return {minInt, static_cast(maxInt)}; + } else if (maxInt == -1) { + return {minInt, -1}; } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } diff --git a/deps/icu-small/source/i18n/number_longnames.cpp b/deps/icu-small/source/i18n/number_longnames.cpp index 88b3413585a0f7..5c363442e7c033 100644 --- a/deps/icu-small/source/i18n/number_longnames.cpp +++ b/deps/icu-small/source/i18n/number_longnames.cpp @@ -5,6 +5,7 @@ #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT +#include "unicode/simpleformatter.h" #include "unicode/ures.h" #include "ureslocs.h" #include "charstr.h" @@ -19,6 +20,37 @@ using namespace icu::number::impl; namespace { +constexpr int32_t DNAM_INDEX = StandardPlural::Form::COUNT; +constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1; +constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 2; + +static int32_t getIndex(const char* pluralKeyword, UErrorCode& status) { + // pluralKeyword can also be "dnam" or "per" + if (uprv_strcmp(pluralKeyword, "dnam") == 0) { + return DNAM_INDEX; + } else if (uprv_strcmp(pluralKeyword, "per") == 0) { + return PER_INDEX; + } else { + StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status); + return plural; + } +} + +static UnicodeString getWithPlural( + const UnicodeString* strings, + int32_t plural, + UErrorCode& status) { + UnicodeString result = strings[plural]; + if (result.isBogus()) { + result = strings[StandardPlural::Form::OTHER]; + } + if (result.isBogus()) { + // There should always be data in the "other" plural variant. + status = U_INTERNAL_PROGRAM_ERROR; + } + return result; +} + ////////////////////////// /// BEGIN DATA LOADING /// @@ -28,7 +60,7 @@ class PluralTableSink : public ResourceSink { public: explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) { // Initialize the array to bogus strings. - for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { + for (int32_t i = 0; i < ARRAY_LENGTH; i++) { outArray[i].setToBogus(); } } @@ -36,17 +68,13 @@ class PluralTableSink : public ResourceSink { void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE { ResourceTable pluralsTable = value.getTable(status); if (U_FAILURE(status)) { return; } - for (int i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) { - // In MeasureUnit data, ignore dnam and per units for now. - if (uprv_strcmp(key, "dnam") == 0 || uprv_strcmp(key, "per") == 0) { - continue; - } - StandardPlural::Form plural = StandardPlural::fromString(key, status); + for (int32_t i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) { + int32_t index = getIndex(key, status); if (U_FAILURE(status)) { return; } - if (!outArray[plural].isBogus()) { + if (!outArray[index].isBogus()) { continue; } - outArray[plural] = value.getUnicodeString(status); + outArray[index] = value.getUnicodeString(status); if (U_FAILURE(status)) { return; } } } @@ -105,6 +133,22 @@ void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, } } +UnicodeString getPerUnitFormat(const Locale& locale, const UNumberUnitWidth &width, UErrorCode& status) { + LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); + if (U_FAILURE(status)) { return {}; } + CharString key; + key.append("units", status); + if (width == UNUM_UNIT_WIDTH_NARROW) { + key.append("Narrow", status); + } else if (width == UNUM_UNIT_WIDTH_SHORT) { + key.append("Short", status); + } + key.append("/compound/per", status); + int32_t len = 0; + const UChar* ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &status); + return UnicodeString(ptr, len); +} + //////////////////////// /// END DATA LOADING /// //////////////////////// @@ -112,11 +156,24 @@ void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, } // namespace LongNameHandler -LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unit, const UNumberUnitWidth &width, - const PluralRules *rules, const MicroPropsGenerator *parent, - UErrorCode &status) { +LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unitRef, const MeasureUnit &perUnit, + const UNumberUnitWidth &width, const PluralRules *rules, + const MicroPropsGenerator *parent, UErrorCode &status) { + MeasureUnit unit = unitRef; + if (uprv_strcmp(perUnit.getType(), "none") != 0) { + // Compound unit: first try to simplify (e.g., meters per second is its own unit). + bool isResolved = false; + MeasureUnit resolved = MeasureUnit::resolveUnitPerUnit(unit, perUnit, &isResolved); + if (isResolved) { + unit = resolved; + } else { + // No simplified form is available. + return forCompoundUnit(loc, unit, perUnit, width, rules, parent, status); + } + } + LongNameHandler result(rules, parent); - UnicodeString simpleFormats[StandardPlural::Form::COUNT]; + UnicodeString simpleFormats[ARRAY_LENGTH]; getMeasureData(loc, unit, width, simpleFormats, status); if (U_FAILURE(status)) { return result; } // TODO: What field to use for units? @@ -124,12 +181,47 @@ LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unit, cons return result; } +LongNameHandler +LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit, const MeasureUnit &perUnit, + const UNumberUnitWidth &width, const PluralRules *rules, + const MicroPropsGenerator *parent, UErrorCode &status) { + LongNameHandler result(rules, parent); + UnicodeString primaryData[ARRAY_LENGTH]; + getMeasureData(loc, unit, width, primaryData, status); + if (U_FAILURE(status)) { return result; } + UnicodeString secondaryData[ARRAY_LENGTH]; + getMeasureData(loc, perUnit, width, secondaryData, status); + if (U_FAILURE(status)) { return result; } + + UnicodeString perUnitFormat; + if (!secondaryData[PER_INDEX].isBogus()) { + perUnitFormat = secondaryData[PER_INDEX]; + } else { + UnicodeString rawPerUnitFormat = getPerUnitFormat(loc, width, status); + if (U_FAILURE(status)) { return result; } + // rawPerUnitFormat is something like "{0}/{1}"; we need to substitute in the secondary unit. + SimpleFormatter compiled(rawPerUnitFormat, 2, 2, status); + if (U_FAILURE(status)) { return result; } + UnicodeString secondaryFormat = getWithPlural(secondaryData, StandardPlural::Form::ONE, status); + if (U_FAILURE(status)) { return result; } + SimpleFormatter secondaryCompiled(secondaryFormat, 1, 1, status); + if (U_FAILURE(status)) { return result; } + UnicodeString secondaryString = secondaryCompiled.getTextWithNoArguments().trim(); + // TODO: Why does UnicodeString need to be explicit in the following line? + compiled.format(UnicodeString(u"{0}"), secondaryString, perUnitFormat, status); + if (U_FAILURE(status)) { return result; } + } + // TODO: What field to use for units? + multiSimpleFormatsToModifiers(primaryData, perUnitFormat, UNUM_FIELD_COUNT, result.fModifiers, status); + return result; +} + LongNameHandler LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit ¤cy, const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status) { LongNameHandler result(rules, parent); - UnicodeString simpleFormats[StandardPlural::Form::COUNT]; + UnicodeString simpleFormats[ARRAY_LENGTH]; getCurrencyLongNameData(loc, currency, simpleFormats, status); if (U_FAILURE(status)) { return result; } simpleFormatsToModifiers(simpleFormats, UNUM_CURRENCY_FIELD, result.fModifiers, status); @@ -139,20 +231,30 @@ LongNameHandler LongNameHandler::forCurrencyLongNames(const Locale &loc, const C void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field, SimpleModifier *output, UErrorCode &status) { for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { - UnicodeString simpleFormat = simpleFormats[i]; - if (simpleFormat.isBogus()) { - simpleFormat = simpleFormats[StandardPlural::Form::OTHER]; - } - if (simpleFormat.isBogus()) { - // There should always be data in the "other" plural variant. - status = U_INTERNAL_PROGRAM_ERROR; - return; - } - SimpleFormatter compiledFormatter(simpleFormat, 1, 1, status); + UnicodeString simpleFormat = getWithPlural(simpleFormats, i, status); + if (U_FAILURE(status)) { return; } + SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status); + if (U_FAILURE(status)) { return; } output[i] = SimpleModifier(compiledFormatter, field, false); } } +void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat, + Field field, SimpleModifier *output, UErrorCode &status) { + SimpleFormatter trailCompiled(trailFormat, 1, 1, status); + if (U_FAILURE(status)) { return; } + for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { + UnicodeString leadFormat = getWithPlural(leadFormats, i, status); + if (U_FAILURE(status)) { return; } + UnicodeString compoundFormat; + trailCompiled.format(leadFormat, compoundFormat, status); + if (U_FAILURE(status)) { return; } + SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status); + if (U_FAILURE(status)) { return; } + output[i] = SimpleModifier(compoundCompiled, field, false); + } +} + void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const { parent->processQuantity(quantity, micros, status); diff --git a/deps/icu-small/source/i18n/number_longnames.h b/deps/icu-small/source/i18n/number_longnames.h index 22ecbac30e1ebc..8738bb99e7d2e6 100644 --- a/deps/icu-small/source/i18n/number_longnames.h +++ b/deps/icu-small/source/i18n/number_longnames.h @@ -21,8 +21,9 @@ class LongNameHandler : public MicroPropsGenerator, public UMemory { const MicroPropsGenerator *parent, UErrorCode &status); static LongNameHandler - forMeasureUnit(const Locale &loc, const MeasureUnit &unit, const UNumberUnitWidth &width, - const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status); + forMeasureUnit(const Locale &loc, const MeasureUnit &unit, const MeasureUnit &perUnit, + const UNumberUnitWidth &width, const PluralRules *rules, + const MicroPropsGenerator *parent, UErrorCode &status); void processQuantity(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const U_OVERRIDE; @@ -35,8 +36,15 @@ class LongNameHandler : public MicroPropsGenerator, public UMemory { LongNameHandler(const PluralRules *rules, const MicroPropsGenerator *parent) : rules(rules), parent(parent) {} + static LongNameHandler + forCompoundUnit(const Locale &loc, const MeasureUnit &unit, const MeasureUnit &perUnit, + const UNumberUnitWidth &width, const PluralRules *rules, + const MicroPropsGenerator *parent, UErrorCode &status); + static void simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field, SimpleModifier *output, UErrorCode &status); + static void multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat, + Field field, SimpleModifier *output, UErrorCode &status); }; } // namespace impl diff --git a/deps/icu-small/source/i18n/number_modifiers.cpp b/deps/icu-small/source/i18n/number_modifiers.cpp index a19b12d11ed7a2..872b97010d74b7 100644 --- a/deps/icu-small/source/i18n/number_modifiers.cpp +++ b/deps/icu-small/source/i18n/number_modifiers.cpp @@ -74,19 +74,29 @@ bool ConstantAffixModifier::isStrong() const { SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong) : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong) { - U_ASSERT(1 == - SimpleFormatter::getArgumentLimit(fCompiledPattern.getBuffer(), fCompiledPattern.length())); - if (fCompiledPattern.charAt(1) != 0) { + int32_t argLimit = SimpleFormatter::getArgumentLimit( + fCompiledPattern.getBuffer(), fCompiledPattern.length()); + if (argLimit == 0) { + // No arguments in compiled pattern fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT; - fSuffixOffset = 3 + fPrefixLength; - } else { - fPrefixLength = 0; - fSuffixOffset = 2; - } - if (3 + fPrefixLength < fCompiledPattern.length()) { - fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT; - } else { + U_ASSERT(2 + fPrefixLength == fCompiledPattern.length()); + // Set suffixOffset = -1 to indicate no arguments in compiled pattern. + fSuffixOffset = -1; fSuffixLength = 0; + } else { + U_ASSERT(argLimit == 1); + if (fCompiledPattern.charAt(1) != 0) { + fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT; + fSuffixOffset = 3 + fPrefixLength; + } else { + fPrefixLength = 0; + fSuffixOffset = 2; + } + if (3 + fPrefixLength < fCompiledPattern.length()) { + fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT; + } else { + fSuffixLength = 0; + } } } @@ -123,26 +133,37 @@ bool SimpleModifier::isStrong() const { int32_t SimpleModifier::formatAsPrefixSuffix(NumberStringBuilder &result, int32_t startIndex, int32_t endIndex, Field field, UErrorCode &status) const { - if (fPrefixLength > 0) { - result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, field, status); - } - if (fSuffixLength > 0) { - result.insert( - endIndex + fPrefixLength, - fCompiledPattern, - 1 + fSuffixOffset, - 1 + fSuffixOffset + fSuffixLength, - field, - status); + if (fSuffixOffset == -1) { + // There is no argument for the inner number; overwrite the entire segment with our string. + return result.splice(startIndex, endIndex, fCompiledPattern, 2, 2 + fPrefixLength, field, status); + } else { + if (fPrefixLength > 0) { + result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, field, status); + } + if (fSuffixLength > 0) { + result.insert( + endIndex + fPrefixLength, + fCompiledPattern, + 1 + fSuffixOffset, + 1 + fSuffixOffset + fSuffixLength, + field, + status); + } + return fPrefixLength + fSuffixLength; } - return fPrefixLength + fSuffixLength; } int32_t ConstantMultiFieldModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex, UErrorCode &status) const { - // Insert the suffix first since inserting the prefix will change the rightIndex - int32_t length = output.insert(rightIndex, fSuffix, status); - length += output.insert(leftIndex, fPrefix, status); + int32_t length = output.insert(leftIndex, fPrefix, status); + if (fOverwrite) { + length += output.splice( + leftIndex + length, + rightIndex + length, + UnicodeString(), 0, 0, + UNUM_FIELD_COUNT, status); + } + length += output.insert(rightIndex + length, fSuffix, status); return length; } @@ -162,10 +183,11 @@ bool ConstantMultiFieldModifier::isStrong() const { CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const NumberStringBuilder &prefix, const NumberStringBuilder &suffix, + bool overwrite, bool strong, const DecimalFormatSymbols &symbols, UErrorCode &status) - : ConstantMultiFieldModifier(prefix, suffix, strong) { + : ConstantMultiFieldModifier(prefix, suffix, overwrite, strong) { // Check for currency spacing. Do not build the UnicodeSets unless there is // a currency code point at a boundary. if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == UNUM_CURRENCY_FIELD) { diff --git a/deps/icu-small/source/i18n/number_modifiers.h b/deps/icu-small/source/i18n/number_modifiers.h index 6a88828a44dd71..4762a6f6d37a2d 100644 --- a/deps/icu-small/source/i18n/number_modifiers.h +++ b/deps/icu-small/source/i18n/number_modifiers.h @@ -103,8 +103,15 @@ class U_I18N_API SimpleModifier : public Modifier, public UMemory { */ class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory { public: - ConstantMultiFieldModifier(const NumberStringBuilder &prefix, const NumberStringBuilder &suffix, - bool strong) : fPrefix(prefix), fSuffix(suffix), fStrong(strong) {} + ConstantMultiFieldModifier( + const NumberStringBuilder &prefix, + const NumberStringBuilder &suffix, + bool overwrite, + bool strong) + : fPrefix(prefix), + fSuffix(suffix), + fOverwrite(overwrite), + fStrong(strong) {} int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex, UErrorCode &status) const U_OVERRIDE; @@ -120,6 +127,7 @@ class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory { // value and is treated internally as immutable. NumberStringBuilder fPrefix; NumberStringBuilder fSuffix; + bool fOverwrite; bool fStrong; }; @@ -127,8 +135,13 @@ class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory { class U_I18N_API CurrencySpacingEnabledModifier : public ConstantMultiFieldModifier { public: /** Safe code path */ - CurrencySpacingEnabledModifier(const NumberStringBuilder &prefix, const NumberStringBuilder &suffix, - bool strong, const DecimalFormatSymbols &symbols, UErrorCode &status); + CurrencySpacingEnabledModifier( + const NumberStringBuilder &prefix, + const NumberStringBuilder &suffix, + bool overwrite, + bool strong, + const DecimalFormatSymbols &symbols, + UErrorCode &status); int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex, UErrorCode &status) const U_OVERRIDE; @@ -216,31 +229,33 @@ class U_I18N_API ParameterizedModifier : public UMemory { } } - void adoptPositiveNegativeModifiers(const Modifier *positive, const Modifier *negative) { - mods[0] = positive; - mods[1] = negative; + void adoptPositiveNegativeModifiers( + const Modifier *positive, const Modifier *zero, const Modifier *negative) { + mods[2] = positive; + mods[1] = zero; + mods[0] = negative; } /** The modifier is ADOPTED. */ - void adoptSignPluralModifier(bool isNegative, StandardPlural::Form plural, const Modifier *mod) { - mods[getModIndex(isNegative, plural)] = mod; + void adoptSignPluralModifier(int8_t signum, StandardPlural::Form plural, const Modifier *mod) { + mods[getModIndex(signum, plural)] = mod; } /** Returns a reference to the modifier; no ownership change. */ - const Modifier *getModifier(bool isNegative) const { - return mods[isNegative ? 1 : 0]; + const Modifier *getModifier(int8_t signum) const { + return mods[signum + 1]; } /** Returns a reference to the modifier; no ownership change. */ - const Modifier *getModifier(bool isNegative, StandardPlural::Form plural) const { - return mods[getModIndex(isNegative, plural)]; + const Modifier *getModifier(int8_t signum, StandardPlural::Form plural) const { + return mods[getModIndex(signum, plural)]; } private: - const Modifier *mods[2 * StandardPlural::COUNT]; + const Modifier *mods[3 * StandardPlural::COUNT]; - inline static int32_t getModIndex(bool isNegative, StandardPlural::Form plural) { - return static_cast(plural) * 2 + (isNegative ? 1 : 0); + inline static int32_t getModIndex(int8_t signum, StandardPlural::Form plural) { + return static_cast(plural) * 3 + (signum + 1); } }; diff --git a/deps/icu-small/source/i18n/number_notation.cpp b/deps/icu-small/source/i18n/number_notation.cpp index ff0cd9505de299..f4ad333354d0c7 100644 --- a/deps/icu-small/source/i18n/number_notation.cpp +++ b/deps/icu-small/source/i18n/number_notation.cpp @@ -54,13 +54,13 @@ Notation Notation::simple() { ScientificNotation ScientificNotation::withMinExponentDigits(int32_t minExponentDigits) const { - if (minExponentDigits >= 0 && minExponentDigits < kMaxIntFracSig) { + if (minExponentDigits >= 1 && minExponentDigits <= kMaxIntFracSig) { ScientificSettings settings = fUnion.scientific; - settings.fMinExponentDigits = (int8_t) minExponentDigits; + settings.fMinExponentDigits = static_cast(minExponentDigits); NotationUnion union_ = {settings}; return {NTN_SCIENTIFIC, union_}; } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } diff --git a/deps/icu-small/source/i18n/number_padding.cpp b/deps/icu-small/source/i18n/number_padding.cpp index a478af60541dde..b1db3490cd4489 100644 --- a/deps/icu-small/source/i18n/number_padding.cpp +++ b/deps/icu-small/source/i18n/number_padding.cpp @@ -43,7 +43,7 @@ Padder Padder::codePoints(UChar32 cp, int32_t targetWidth, UNumberFormatPadPosit if (targetWidth >= 0) { return {cp, targetWidth, position}; } else { - return {U_NUMBER_PADDING_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } diff --git a/deps/icu-small/source/i18n/number_patternmodifier.cpp b/deps/icu-small/source/i18n/number_patternmodifier.cpp index 0599f92a4f343b..e182104c9116e7 100644 --- a/deps/icu-small/source/i18n/number_patternmodifier.cpp +++ b/deps/icu-small/source/i18n/number_patternmodifier.cpp @@ -38,8 +38,8 @@ MutablePatternModifier::setSymbols(const DecimalFormatSymbols *symbols, const Cu this->rules = rules; } -void MutablePatternModifier::setNumberProperties(bool isNegative, StandardPlural::Form plural) { - this->isNegative = isNegative; +void MutablePatternModifier::setNumberProperties(int8_t signum, StandardPlural::Form plural) { + this->signum = signum; this->plural = plural; } @@ -74,10 +74,12 @@ MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator *paren if (needsPlurals()) { // Slower path when we require the plural keyword. for (StandardPlural::Form plural : STANDARD_PLURAL_VALUES) { - setNumberProperties(false, plural); - pm->adoptSignPluralModifier(false, plural, createConstantModifier(status)); - setNumberProperties(true, plural); - pm->adoptSignPluralModifier(true, plural, createConstantModifier(status)); + setNumberProperties(1, plural); + pm->adoptSignPluralModifier(1, plural, createConstantModifier(status)); + setNumberProperties(0, plural); + pm->adoptSignPluralModifier(0, plural, createConstantModifier(status)); + setNumberProperties(-1, plural); + pm->adoptSignPluralModifier(-1, plural, createConstantModifier(status)); } if (U_FAILURE(status)) { delete pm; @@ -86,11 +88,13 @@ MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator *paren return new ImmutablePatternModifier(pm, rules, parent); // adopts pm } else { // Faster path when plural keyword is not needed. - setNumberProperties(false, StandardPlural::Form::COUNT); + setNumberProperties(1, StandardPlural::Form::COUNT); Modifier *positive = createConstantModifier(status); - setNumberProperties(true, StandardPlural::Form::COUNT); + setNumberProperties(0, StandardPlural::Form::COUNT); + Modifier *zero = createConstantModifier(status); + setNumberProperties(-1, StandardPlural::Form::COUNT); Modifier *negative = createConstantModifier(status); - pm->adoptPositiveNegativeModifiers(positive, negative); + pm->adoptPositiveNegativeModifiers(positive, zero, negative); if (U_FAILURE(status)) { delete pm; return nullptr; @@ -105,9 +109,9 @@ ConstantMultiFieldModifier *MutablePatternModifier::createConstantModifier(UErro insertPrefix(a, 0, status); insertSuffix(b, 0, status); if (patternInfo->hasCurrencySign()) { - return new CurrencySpacingEnabledModifier(a, b, fStrong, *symbols, status); + return new CurrencySpacingEnabledModifier(a, b, !patternInfo->hasBody(), fStrong, *symbols, status); } else { - return new ConstantMultiFieldModifier(a, b, fStrong); + return new ConstantMultiFieldModifier(a, b, !patternInfo->hasBody(), fStrong); } } @@ -123,13 +127,13 @@ void ImmutablePatternModifier::processQuantity(DecimalQuantity &quantity, MicroP void ImmutablePatternModifier::applyToMicros(MicroProps µs, DecimalQuantity &quantity) const { if (rules == nullptr) { - micros.modMiddle = pm->getModifier(quantity.isNegative()); + micros.modMiddle = pm->getModifier(quantity.signum()); } else { // TODO: Fix this. Avoid the copy. DecimalQuantity copy(quantity); copy.roundToInfinity(); StandardPlural::Form plural = copy.getStandardPlural(rules); - micros.modMiddle = pm->getModifier(quantity.isNegative(), plural); + micros.modMiddle = pm->getModifier(quantity.signum(), plural); } } @@ -149,9 +153,9 @@ void MutablePatternModifier::processQuantity(DecimalQuantity &fq, MicroProps &mi // TODO: Fix this. Avoid the copy. DecimalQuantity copy(fq); micros.rounding.apply(copy, status); - nonConstThis->setNumberProperties(fq.isNegative(), copy.getStandardPlural(rules)); + nonConstThis->setNumberProperties(fq.signum(), copy.getStandardPlural(rules)); } else { - nonConstThis->setNumberProperties(fq.isNegative(), StandardPlural::Form::COUNT); + nonConstThis->setNumberProperties(fq.signum(), StandardPlural::Form::COUNT); } micros.modMiddle = this; } @@ -163,9 +167,23 @@ int32_t MutablePatternModifier::apply(NumberStringBuilder &output, int32_t leftI auto nonConstThis = const_cast(this); int32_t prefixLen = nonConstThis->insertPrefix(output, leftIndex, status); int32_t suffixLen = nonConstThis->insertSuffix(output, rightIndex + prefixLen, status); + // If the pattern had no decimal stem body (like #,##0.00), overwrite the value. + int32_t overwriteLen = 0; + if (!patternInfo->hasBody()) { + overwriteLen = output.splice( + leftIndex + prefixLen, rightIndex + prefixLen, + UnicodeString(), 0, 0, UNUM_FIELD_COUNT, + status); + } CurrencySpacingEnabledModifier::applyCurrencySpacing( - output, leftIndex, prefixLen, rightIndex + prefixLen, suffixLen, *symbols, status); - return prefixLen + suffixLen; + output, + leftIndex, + prefixLen, + rightIndex + overwriteLen + prefixLen, + suffixLen, + *symbols, + status); + return prefixLen + overwriteLen + suffixLen; } int32_t MutablePatternModifier::getPrefixLength(UErrorCode &status) const { @@ -230,13 +248,16 @@ UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const { } else if (unitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_HIDDEN) { return UnicodeString(); } else { + UCurrNameStyle selector = (unitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_NARROW) + ? UCurrNameStyle::UCURR_NARROW_SYMBOL_NAME + : UCurrNameStyle::UCURR_SYMBOL_NAME; UErrorCode status = U_ZERO_ERROR; UBool isChoiceFormat = FALSE; int32_t symbolLen = 0; const char16_t *symbol = ucurr_getName( currencyCode, symbols->getLocale().getName(), - UCurrNameStyle::UCURR_SYMBOL_NAME, + selector, &isChoiceFormat, &symbolLen, &status); @@ -278,14 +299,17 @@ void MutablePatternModifier::enterCharSequenceMode(bool isPrefix) { inCharSequenceMode = true; // Should the output render '+' where '-' would normally appear in the pattern? - plusReplacesMinusSign = !isNegative && ( - signDisplay == UNUM_SIGN_ALWAYS || - signDisplay == UNUM_SIGN_ACCOUNTING_ALWAYS) && - patternInfo->positiveHasPlusSign() == false; + plusReplacesMinusSign = signum != -1 + && (signDisplay == UNUM_SIGN_ALWAYS + || signDisplay == UNUM_SIGN_ACCOUNTING_ALWAYS + || (signum == 1 + && (signDisplay == UNUM_SIGN_EXCEPT_ZERO + || signDisplay == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO))) + && patternInfo->positiveHasPlusSign() == false; // Should we use the affix from the negative subpattern? (If not, we will use the positive subpattern.) bool useNegativeAffixPattern = patternInfo->hasNegativeSubpattern() && ( - isNegative || (patternInfo->negativeHasMinusSign() && plusReplacesMinusSign)); + signum == -1 || (patternInfo->negativeHasMinusSign() && plusReplacesMinusSign)); // Resolve the flags for the affix pattern. fFlags = 0; @@ -303,7 +327,7 @@ void MutablePatternModifier::enterCharSequenceMode(bool isPrefix) { // Should we prepend a sign to the pattern? if (!isPrefix || useNegativeAffixPattern) { prependSign = false; - } else if (isNegative) { + } else if (signum == -1) { prependSign = signDisplay != UNUM_SIGN_NEVER; } else { prependSign = plusReplacesMinusSign; diff --git a/deps/icu-small/source/i18n/number_patternmodifier.h b/deps/icu-small/source/i18n/number_patternmodifier.h index 705037f0ba7173..9c8b95f7764436 100644 --- a/deps/icu-small/source/i18n/number_patternmodifier.h +++ b/deps/icu-small/source/i18n/number_patternmodifier.h @@ -18,8 +18,8 @@ U_NAMESPACE_BEGIN // Export an explicit template instantiation of the LocalPointer that is used as a // data member of ParameterizedModifier. -// (MSVC requires this, even though it should not be necessary.) -#if defined (_MSC_VER) +// (When building DLLs for Windows this is required.) +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN // Ignore warning 4661 as LocalPointerBase does not use operator== or operator!= #pragma warning(suppress: 4661) template class U_I18N_API LocalPointerBase; @@ -125,13 +125,13 @@ class U_I18N_API MutablePatternModifier /** * Sets attributes of the current number being processed. * - * @param isNegative - * Whether the number is negative. + * @param signum + * -1 if negative; +1 if positive; or 0 if zero. * @param plural - * The plural form of the number, required only if the pattern contains the triple currency sign, "¤¤¤" - * (and as indicated by {@link #needsPlurals()}). + * The plural form of the number, required only if the pattern contains the triple + * currency sign, "¤¤¤" (and as indicated by {@link #needsPlurals()}). */ - void setNumberProperties(bool isNegative, StandardPlural::Form plural); + void setNumberProperties(int8_t signum, StandardPlural::Form plural); /** * Returns true if the pattern represented by this MurkyModifier requires a plural keyword in order to localize. @@ -211,7 +211,7 @@ class U_I18N_API MutablePatternModifier const PluralRules *rules; // Number details (initialized in setNumberProperties) - bool isNegative; + int8_t signum; StandardPlural::Form plural; // QuantityChain details (initialized in addToChain) diff --git a/deps/icu-small/source/i18n/number_patternstring.cpp b/deps/icu-small/source/i18n/number_patternstring.cpp index c67e3541816547..20178824b0e20a 100644 --- a/deps/icu-small/source/i18n/number_patternstring.cpp +++ b/deps/icu-small/source/i18n/number_patternstring.cpp @@ -95,6 +95,10 @@ bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode &st return AffixUtils::containsType(UnicodeStringCharSequence(pattern), type, status); } +bool ParsedPatternInfo::hasBody() const { + return positive.integerTotal > 0; +} + ///////////////////////////////////////////////////// /// BEGIN RECURSIVE DESCENT PARSER IMPLEMENTATION /// ///////////////////////////////////////////////////// diff --git a/deps/icu-small/source/i18n/number_patternstring.h b/deps/icu-small/source/i18n/number_patternstring.h index 6e1bb7f44ddc02..ec44290d66397c 100644 --- a/deps/icu-small/source/i18n/number_patternstring.h +++ b/deps/icu-small/source/i18n/number_patternstring.h @@ -84,6 +84,8 @@ struct U_I18N_API ParsedPatternInfo : public AffixPatternProvider, public UMemor bool containsSymbolType(AffixPatternType type, UErrorCode &status) const U_OVERRIDE; + bool hasBody() const U_OVERRIDE; + private: struct U_I18N_API ParserState { const UnicodeString &pattern; // reference to the parent diff --git a/deps/icu-small/source/i18n/number_rounding.cpp b/deps/icu-small/source/i18n/number_rounding.cpp index 5c494f09544425..fd4dafdf983b61 100644 --- a/deps/icu-small/source/i18n/number_rounding.cpp +++ b/deps/icu-small/source/i18n/number_rounding.cpp @@ -58,7 +58,7 @@ FractionRounder Rounder::fixedFraction(int32_t minMaxFractionPlaces) { if (minMaxFractionPlaces >= 0 && minMaxFractionPlaces <= kMaxIntFracSig) { return constructFraction(minMaxFractionPlaces, minMaxFractionPlaces); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } @@ -66,7 +66,7 @@ FractionRounder Rounder::minFraction(int32_t minFractionPlaces) { if (minFractionPlaces >= 0 && minFractionPlaces <= kMaxIntFracSig) { return constructFraction(minFractionPlaces, -1); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } @@ -74,7 +74,7 @@ FractionRounder Rounder::maxFraction(int32_t maxFractionPlaces) { if (maxFractionPlaces >= 0 && maxFractionPlaces <= kMaxIntFracSig) { return constructFraction(0, maxFractionPlaces); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } @@ -83,40 +83,40 @@ FractionRounder Rounder::minMaxFraction(int32_t minFractionPlaces, int32_t maxFr minFractionPlaces <= maxFractionPlaces) { return constructFraction(minFractionPlaces, maxFractionPlaces); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } Rounder Rounder::fixedDigits(int32_t minMaxSignificantDigits) { - if (minMaxSignificantDigits >= 0 && minMaxSignificantDigits <= kMaxIntFracSig) { + if (minMaxSignificantDigits >= 1 && minMaxSignificantDigits <= kMaxIntFracSig) { return constructSignificant(minMaxSignificantDigits, minMaxSignificantDigits); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } Rounder Rounder::minDigits(int32_t minSignificantDigits) { - if (minSignificantDigits >= 0 && minSignificantDigits <= kMaxIntFracSig) { + if (minSignificantDigits >= 1 && minSignificantDigits <= kMaxIntFracSig) { return constructSignificant(minSignificantDigits, -1); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } Rounder Rounder::maxDigits(int32_t maxSignificantDigits) { - if (maxSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig) { - return constructSignificant(0, maxSignificantDigits); + if (maxSignificantDigits >= 1 && maxSignificantDigits <= kMaxIntFracSig) { + return constructSignificant(1, maxSignificantDigits); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } Rounder Rounder::minMaxDigits(int32_t minSignificantDigits, int32_t maxSignificantDigits) { - if (minSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig && + if (minSignificantDigits >= 1 && maxSignificantDigits <= kMaxIntFracSig && minSignificantDigits <= maxSignificantDigits) { return constructSignificant(minSignificantDigits, maxSignificantDigits); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } @@ -124,7 +124,7 @@ IncrementRounder Rounder::increment(double roundingIncrement) { if (roundingIncrement > 0.0) { return constructIncrement(roundingIncrement, 0); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } @@ -139,19 +139,19 @@ Rounder Rounder::withMode(RoundingMode roundingMode) const { Rounder FractionRounder::withMinDigits(int32_t minSignificantDigits) const { if (fType == RND_ERROR) { return *this; } // no-op in error state - if (minSignificantDigits >= 0 && minSignificantDigits <= kMaxIntFracSig) { + if (minSignificantDigits >= 1 && minSignificantDigits <= kMaxIntFracSig) { return constructFractionSignificant(*this, minSignificantDigits, -1); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } Rounder FractionRounder::withMaxDigits(int32_t maxSignificantDigits) const { if (fType == RND_ERROR) { return *this; } // no-op in error state - if (maxSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig) { + if (maxSignificantDigits >= 1 && maxSignificantDigits <= kMaxIntFracSig) { return constructFractionSignificant(*this, -1, maxSignificantDigits); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } @@ -185,14 +185,14 @@ Rounder IncrementRounder::withMinFraction(int32_t minFrac) const { if (minFrac >= 0 && minFrac <= kMaxIntFracSig) { return constructIncrement(fUnion.increment.fIncrement, minFrac); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } FractionRounder Rounder::constructFraction(int32_t minFrac, int32_t maxFrac) { FractionSignificantSettings settings; - settings.fMinFrac = static_cast (minFrac); - settings.fMaxFrac = static_cast (maxFrac); + settings.fMinFrac = static_cast(minFrac); + settings.fMaxFrac = static_cast(maxFrac); settings.fMinSig = -1; settings.fMaxSig = -1; RounderUnion union_; @@ -204,8 +204,8 @@ Rounder Rounder::constructSignificant(int32_t minSig, int32_t maxSig) { FractionSignificantSettings settings; settings.fMinFrac = -1; settings.fMaxFrac = -1; - settings.fMinSig = static_cast(minSig); - settings.fMaxSig = static_cast(maxSig); + settings.fMinSig = static_cast(minSig); + settings.fMaxSig = static_cast(maxSig); RounderUnion union_; union_.fracSig = settings; return {RND_SIGNIFICANT, union_, kDefaultMode}; @@ -214,8 +214,8 @@ Rounder Rounder::constructSignificant(int32_t minSig, int32_t maxSig) { Rounder Rounder::constructFractionSignificant(const FractionRounder &base, int32_t minSig, int32_t maxSig) { FractionSignificantSettings settings = base.fUnion.fracSig; - settings.fMinSig = static_cast(minSig); - settings.fMaxSig = static_cast(maxSig); + settings.fMinSig = static_cast(minSig); + settings.fMaxSig = static_cast(maxSig); RounderUnion union_; union_.fracSig = settings; return {RND_FRACTION_SIGNIFICANT, union_, kDefaultMode}; @@ -224,7 +224,7 @@ Rounder::constructFractionSignificant(const FractionRounder &base, int32_t minSi IncrementRounder Rounder::constructIncrement(double increment, int32_t minFrac) { IncrementSettings settings; settings.fIncrement = increment; - settings.fMinFrac = minFrac; + settings.fMinFrac = static_cast(minFrac); RounderUnion union_; union_.increment = settings; return {RND_INCREMENT, union_, kDefaultMode}; @@ -251,28 +251,39 @@ void Rounder::setLocaleData(const CurrencyUnit ¤cy, UErrorCode &status) { int32_t Rounder::chooseMultiplierAndApply(impl::DecimalQuantity &input, const impl::MultiplierProducer &producer, UErrorCode &status) { - // TODO: Make a better and more efficient implementation. - // TODO: Avoid the object creation here. - DecimalQuantity copy(input); - + // Do not call this method with zero. U_ASSERT(!input.isZero()); - int32_t magnitude = input.getMagnitude(); - int32_t multiplier = producer.getMultiplier(magnitude); + + // Perform the first attempt at rounding. + int magnitude = input.getMagnitude(); + int multiplier = producer.getMultiplier(magnitude); input.adjustMagnitude(multiplier); apply(input, status); - // If the number turned to zero when rounding, do not re-attempt the rounding. - if (!input.isZero() && input.getMagnitude() == magnitude + multiplier + 1) { - magnitude += 1; - input = copy; - multiplier = producer.getMultiplier(magnitude); - input.adjustMagnitude(multiplier); - U_ASSERT(input.getMagnitude() == magnitude + multiplier - 1); - apply(input, status); - U_ASSERT(input.getMagnitude() == magnitude + multiplier); + // If the number rounded to zero, exit. + if (input.isZero() || U_FAILURE(status)) { + return multiplier; + } + + // If the new magnitude after rounding is the same as it was before rounding, then we are done. + // This case applies to most numbers. + if (input.getMagnitude() == magnitude + multiplier) { + return multiplier; } - return multiplier; + // If the above case DIDN'T apply, then we have a case like 99.9 -> 100 or 999.9 -> 1000: + // The number rounded up to the next magnitude. Check if the multiplier changes; if it doesn't, + // we do not need to make any more adjustments. + int _multiplier = producer.getMultiplier(magnitude + 1); + if (multiplier == _multiplier) { + return multiplier; + } + + // We have a case like 999.9 -> 1000, where the correct output is "1K", not "1000". + // Fix the magnitude and re-apply the rounding strategy. + input.adjustMagnitude(_multiplier - multiplier); + apply(input, status); + return _multiplier; } /** This is the method that contains the actual rounding logic. */ @@ -331,6 +342,7 @@ void Rounder::apply(impl::DecimalQuantity &value, UErrorCode& status) const { case RND_CURRENCY: // Call .withCurrency() before .apply()! U_ASSERT(false); + break; case RND_PASS_THROUGH: break; diff --git a/deps/icu-small/source/i18n/number_stringbuilder.cpp b/deps/icu-small/source/i18n/number_stringbuilder.cpp index e6e86bd4291d6e..37159d7e53a60a 100644 --- a/deps/icu-small/source/i18n/number_stringbuilder.cpp +++ b/deps/icu-small/source/i18n/number_stringbuilder.cpp @@ -191,6 +191,30 @@ NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t return count; } +int32_t +NumberStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr, + int32_t startOther, int32_t endOther, Field field, UErrorCode& status) { + int32_t thisLength = endThis - startThis; + int32_t otherLength = endOther - startOther; + int32_t count = otherLength - thisLength; + int32_t position; + if (count > 0) { + // Overall, chars need to be added. + position = prepareForInsert(startThis, count, status); + } else { + // Overall, chars need to be removed or kept the same. + position = remove(startThis, -count); + } + if (U_FAILURE(status)) { + return count; + } + for (int32_t i = 0; i < otherLength; i++) { + getCharPtr()[position + i] = unistr.charAt(startOther + i); + getFieldPtr()[position + i] = field; + } + return count; +} + int32_t NumberStringBuilder::append(const NumberStringBuilder &other, UErrorCode &status) { return insert(fLength, other, status); } @@ -296,6 +320,19 @@ int32_t NumberStringBuilder::prepareForInsertHelper(int32_t index, int32_t count return fZero + index; } +int32_t NumberStringBuilder::remove(int32_t index, int32_t count) { + // TODO: Reset the heap here? (If the string after removal can fit on stack?) + int32_t position = index + fZero; + uprv_memmove2(getCharPtr() + position, + getCharPtr() + position + count, + sizeof(char16_t) * (fLength - index - count)); + uprv_memmove2(getFieldPtr() + position, + getFieldPtr() + position + count, + sizeof(Field) * (fLength - index - count)); + fLength -= count; + return position; +} + UnicodeString NumberStringBuilder::toUnicodeString() const { return UnicodeString(getCharPtr() + fZero, fLength); } diff --git a/deps/icu-small/source/i18n/number_stringbuilder.h b/deps/icu-small/source/i18n/number_stringbuilder.h index f08dcb1d1bed6a..a97cc9ca026ad0 100644 --- a/deps/icu-small/source/i18n/number_stringbuilder.h +++ b/deps/icu-small/source/i18n/number_stringbuilder.h @@ -77,6 +77,9 @@ class U_I18N_API NumberStringBuilder : public UMemory { int32_t insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, Field field, UErrorCode &status); + int32_t splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr, + int32_t startOther, int32_t endOther, Field field, UErrorCode& status); + int32_t append(const NumberStringBuilder &other, UErrorCode &status); int32_t insert(int32_t index, const NumberStringBuilder &other, UErrorCode &status); @@ -123,6 +126,8 @@ class U_I18N_API NumberStringBuilder : public UMemory { int32_t prepareForInsert(int32_t index, int32_t count, UErrorCode &status); int32_t prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status); + + int32_t remove(int32_t index, int32_t count); }; } // namespace impl diff --git a/deps/icu-small/source/i18n/number_types.h b/deps/icu-small/source/i18n/number_types.h index 2bc21bd40dcb18..c01765e2cea6c6 100644 --- a/deps/icu-small/source/i18n/number_types.h +++ b/deps/icu-small/source/i18n/number_types.h @@ -31,7 +31,7 @@ typedef UNumberFormatPadPosition PadPosition; typedef UNumberCompactStyle CompactStyle; // ICU4J Equivalent: RoundingUtils.MAX_INT_FRAC_SIG -static constexpr int32_t kMaxIntFracSig = 100; +static constexpr int32_t kMaxIntFracSig = 999; // ICU4J Equivalent: RoundingUtils.DEFAULT_ROUNDING_MODE static constexpr RoundingMode kDefaultMode = RoundingMode::UNUM_FOUND_HALFEVEN; @@ -42,10 +42,6 @@ static constexpr char16_t kFallbackPaddingString[] = u" "; // ICU4J Equivalent: NumberFormatterImpl.DEFAULT_CURRENCY static constexpr char16_t kDefaultCurrency[] = u"XXX"; -// FIXME: New error codes: -static constexpr UErrorCode U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR = U_ILLEGAL_ARGUMENT_ERROR; -static constexpr UErrorCode U_NUMBER_PADDING_WIDTH_OUTOFBOUNDS_ERROR = U_ILLEGAL_ARGUMENT_ERROR; - // Forward declarations: class Modifier; @@ -142,6 +138,13 @@ class U_I18N_API AffixPatternProvider { virtual bool negativeHasMinusSign() const = 0; virtual bool containsSymbolType(AffixPatternType, UErrorCode &) const = 0; + + /** + * True if the pattern has a number placeholder like "0" or "#,##0.00"; false if the pattern does not + * have one. This is used in cases like compact notation, where the pattern replaces the entire + * number instead of rendering the number. + */ + virtual bool hasBody() const = 0; }; /** @@ -230,10 +233,21 @@ class U_I18N_API MicroPropsGenerator { virtual void processQuantity(DecimalQuantity& quantity, MicroProps& micros, UErrorCode& status) const = 0; }; +/** + * An interface used by compact notation and scientific notation to choose a multiplier while rounding. + */ class MultiplierProducer { public: virtual ~MultiplierProducer() = default; + /** + * Maps a magnitude to a multiplier in powers of ten. For example, in compact notation in English, a magnitude of 5 + * (e.g., 100,000) should return a multiplier of -3, since the number is displayed in thousands. + * + * @param magnitude + * The power of ten of the input number. + * @return The shift in powers of ten. + */ virtual int32_t getMultiplier(int32_t magnitude) const = 0; }; diff --git a/deps/icu-small/source/i18n/rbnf.cpp b/deps/icu-small/source/i18n/rbnf.cpp index 5b54e303f3ab20..3385f300b11afe 100644 --- a/deps/icu-small/source/i18n/rbnf.cpp +++ b/deps/icu-small/source/i18n/rbnf.cpp @@ -1371,7 +1371,7 @@ RuleBasedNumberFormat::parse(const UnicodeString& text, ParsePosition working_pp(0); Formattable working_result; - rp->parse(workingText, working_pp, kMaxDouble, working_result); + rp->parse(workingText, working_pp, kMaxDouble, 0, working_result); if (working_pp.getIndex() > high_pp.getIndex()) { high_pp = working_pp; high_result = working_result; diff --git a/deps/icu-small/source/i18n/regexcmp.cpp b/deps/icu-small/source/i18n/regexcmp.cpp index 6cfa61f187e01e..410ff9513bb82a 100644 --- a/deps/icu-small/source/i18n/regexcmp.cpp +++ b/deps/icu-small/source/i18n/regexcmp.cpp @@ -4450,11 +4450,9 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB // See if the property looks like a Java "InBlockName", which // we will recast as "Block=BlockName" // - static const UChar IN[] = {0x49, 0x6E, 0}; // "In" - static const UChar BLOCK[] = {0x42, 0x6C, 0x6f, 0x63, 0x6b, 0x3d, 00}; // "Block=" - if (mPropName.startsWith(IN, 2) && propName.length()>=3) { + if (mPropName.startsWith(u"In", 2) && propName.length()>=3) { setExpr.truncate(4); // Leaves "[\p{", or "[\P{" - setExpr.append(BLOCK, -1); + setExpr.append(u"Block=", -1); setExpr.append(UnicodeString(mPropName, 2)); // Property with the leading "In" removed. setExpr.append(chRBrace); setExpr.append(chRBracket); diff --git a/deps/icu-small/source/i18n/rematch.cpp b/deps/icu-small/source/i18n/rematch.cpp index 1bdad187762a25..f2521822078169 100644 --- a/deps/icu-small/source/i18n/rematch.cpp +++ b/deps/icu-small/source/i18n/rematch.cpp @@ -5469,7 +5469,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu if (lbStartIdx < 0) { // First time through loop. lbStartIdx = fp->fInputIdx - minML; - if (lbStartIdx > 0) { + if (lbStartIdx > 0 && lbStartIdx < fInputLength) { U16_SET_CP_START(inputBuf, 0, lbStartIdx); } } else { @@ -5546,7 +5546,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu if (lbStartIdx < 0) { // First time through loop. lbStartIdx = fp->fInputIdx - minML; - if (lbStartIdx > 0) { + if (lbStartIdx > 0 && lbStartIdx < fInputLength) { U16_SET_CP_START(inputBuf, 0, lbStartIdx); } } else { diff --git a/deps/icu-small/source/i18n/simpletz.cpp b/deps/icu-small/source/i18n/simpletz.cpp index 557b02620bec00..9bce8ed55705ee 100644 --- a/deps/icu-small/source/i18n/simpletz.cpp +++ b/deps/icu-small/source/i18n/simpletz.cpp @@ -177,7 +177,7 @@ void SimpleTimeZone::construct(int32_t rawOffsetGMT, decodeRules(status); - if (savingsDST <= 0) { + if (savingsDST == 0) { status = U_ILLEGAL_ARGUMENT_ERROR; } } @@ -686,7 +686,7 @@ SimpleTimeZone::setRawOffset(int32_t offsetMillis) void SimpleTimeZone::setDSTSavings(int32_t millisSavedDuringDST, UErrorCode& status) { - if (millisSavedDuringDST <= 0) { + if (millisSavedDuringDST == 0) { status = U_ILLEGAL_ARGUMENT_ERROR; } else { diff --git a/deps/icu-small/source/i18n/ucol.cpp b/deps/icu-small/source/i18n/ucol.cpp index 34a394682f1c16..7d3392da223503 100644 --- a/deps/icu-small/source/i18n/ucol.cpp +++ b/deps/icu-small/source/i18n/ucol.cpp @@ -95,6 +95,7 @@ ucol_safeClone(const UCollator *coll, void * /*stackBuffer*/, int32_t * pBufferS Collator *newColl = Collator::fromUCollator(coll)->clone(); if (newColl == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; } else { *status = U_SAFECLONE_ALLOCATED_WARNING; } diff --git a/deps/icu-small/source/i18n/ucol_res.cpp b/deps/icu-small/source/i18n/ucol_res.cpp index 0f1d6d23b132ae..76975ecc01de63 100644 --- a/deps/icu-small/source/i18n/ucol_res.cpp +++ b/deps/icu-small/source/i18n/ucol_res.cpp @@ -451,6 +451,7 @@ CollationLoader::loadFromData(UErrorCode &errorCode) { const CollationCacheEntry *entry = new CollationCacheEntry(validLocale, t.getAlias()); if(entry == NULL) { errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; } else { t.orphan(); } diff --git a/deps/icu-small/source/i18n/udatpg.cpp b/deps/icu-small/source/i18n/udatpg.cpp index 9ba82b529c507b..febf73b3ce499f 100644 --- a/deps/icu-small/source/i18n/udatpg.cpp +++ b/deps/icu-small/source/i18n/udatpg.cpp @@ -181,6 +181,25 @@ udatpg_getAppendItemName(const UDateTimePatternGenerator *dtpg, return result.getBuffer(); } +U_CAPI int32_t U_EXPORT2 +udatpg_getFieldDisplayName(const UDateTimePatternGenerator *dtpg, + UDateTimePatternField field, + UDateTimePGDisplayWidth width, + UChar *fieldName, int32_t capacity, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) + return -1; + if (fieldName == NULL ? capacity != 0 : capacity < 0) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return -1; + } + UnicodeString result = ((const DateTimePatternGenerator *)dtpg)->getFieldDisplayName(field,width); + if (fieldName == NULL) { + return result.length(); + } + return result.extract(fieldName, capacity, *pErrorCode); +} + U_CAPI void U_EXPORT2 udatpg_setDateTimeFormat(const UDateTimePatternGenerator *dtpg, const UChar *dtFormat, int32_t length) { diff --git a/deps/icu-small/source/i18n/unicode/compactdecimalformat.h b/deps/icu-small/source/i18n/unicode/compactdecimalformat.h index 3fbe5da9cee4b2..d682d2d0e74278 100644 --- a/deps/icu-small/source/i18n/unicode/compactdecimalformat.h +++ b/deps/icu-small/source/i18n/unicode/compactdecimalformat.h @@ -16,7 +16,7 @@ #include "unicode/utypes.h" /** * \file - * \brief C++ API: Formats decimal numbers in compact form. + * \brief C++ API: Compatibility APIs for compact decimal number formatting. */ #if !UCONFIG_NO_FORMATTING @@ -30,6 +30,11 @@ U_NAMESPACE_BEGIN class PluralRules; /** + *

IMPORTANT: New users are strongly encouraged to see if + * numberformatter.h fits their use case. Although not deprecated, this header + * is provided for backwards compatibility only. + *


+ * * The CompactDecimalFormat produces abbreviated numbers, suitable for display in * environments will limited real estate. For example, 'Hits: 1.2B' instead of * 'Hits: 1,200,000,000'. The format will be appropriate for the given language, @@ -56,6 +61,9 @@ class U_I18N_API CompactDecimalFormat : public DecimalFormat { /** * Returns a compact decimal instance for specified locale. + *

+ * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @param inLocale the given locale. * @param style whether to use short or long style. * @param status error code returned here. diff --git a/deps/icu-small/source/i18n/unicode/datefmt.h b/deps/icu-small/source/i18n/unicode/datefmt.h index d70d8d1dd5cd99..c895183931546e 100644 --- a/deps/icu-small/source/i18n/unicode/datefmt.h +++ b/deps/icu-small/source/i18n/unicode/datefmt.h @@ -44,7 +44,8 @@ class TimeZone; class DateTimePatternGenerator; // explicit template instantiation. see digitlst.h -#if defined (_MSC_VER) +// (When building DLLs for Windows this is required.) +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN template class U_I18N_API EnumSet; diff --git a/deps/icu-small/source/i18n/unicode/dcfmtsym.h b/deps/icu-small/source/i18n/unicode/dcfmtsym.h index 4dc6f950f294ca..e58befa31bda88 100644 --- a/deps/icu-small/source/i18n/unicode/dcfmtsym.h +++ b/deps/icu-small/source/i18n/unicode/dcfmtsym.h @@ -80,10 +80,6 @@ U_NAMESPACE_BEGIN * If you supply a pattern with multiple grouping characters, the interval * between the last one and the end of the integer is the one that is * used. So "#,##,###,####" == "######,####" == "##,####,####". - *

- * This class only handles localized digits where the 10 digits are - * contiguous in Unicode, from 0 to 9. Other digits sets (such as - * superscripts) would need a different subclass. */ class U_I18N_API DecimalFormatSymbols : public UObject { public: @@ -396,6 +392,13 @@ class U_I18N_API DecimalFormatSymbols : public UObject { inline UBool isCustomIntlCurrencySymbol() const { return fIsCustomIntlCurrencySymbol; } + + /** + * @internal For ICU use only + */ + inline UChar32 getCodePointZero() const { + return fCodePointZero; + } #endif /* U_HIDE_INTERNAL_API */ /** @@ -410,9 +413,22 @@ class U_I18N_API DecimalFormatSymbols : public UObject { * @return the format symbol by the param 'symbol' * @internal */ - inline const UnicodeString &getConstSymbol(ENumberFormatSymbol symbol) const; + inline const UnicodeString& getConstSymbol(ENumberFormatSymbol symbol) const; #ifndef U_HIDE_INTERNAL_API + /** + * Returns the const UnicodeString reference, like getConstSymbol, + * corresponding to the digit with the given value. This is equivalent + * to accessing the symbol from getConstSymbol with the corresponding + * key, such as kZeroDigitSymbol or kOneDigitSymbol. + * + * @param digit The digit, an integer between 0 and 9 inclusive. + * If outside the range 0 to 9, the zero digit is returned. + * @return the format symbol for the given digit. + * @internal This API is currently for ICU use only. + */ + inline const UnicodeString& getConstDigitSymbol(int32_t digit) const; + /** * Returns that pattern stored in currecy info. Internal API for use by NumberFormat API. * @internal @@ -444,6 +460,22 @@ class U_I18N_API DecimalFormatSymbols : public UObject { */ UnicodeString fNoSymbol; + /** + * Dealing with code points is faster than dealing with strings when formatting. Because of + * this, we maintain a value containing the zero code point that is used whenever digitStrings + * represents a sequence of ten code points in order. + * + *

If the value stored here is positive, it means that the code point stored in this value + * corresponds to the digitStrings array, and codePointZero can be used instead of the + * digitStrings array for the purposes of efficient formatting; if -1, then digitStrings does + * *not* contain a sequence of code points, and it must be used directly. + * + *

It is assumed that codePointZero always shadows the value in digitStrings. codePointZero + * should never be set directly; rather, it should be updated only when digitStrings mutates. + * That is, the flow of information is digitStrings -> codePointZero, not the other way. + */ + UChar32 fCodePointZero; + Locale locale; char actualLocale[ULOC_FULLNAME_CAPACITY]; @@ -469,7 +501,7 @@ DecimalFormatSymbols::getSymbol(ENumberFormatSymbol symbol) const { return *strPtr; } -// See comments above for this function. Not hidden with #ifndef U_HIDE_INTERNAL_API +// See comments above for this function. Not hidden with #ifdef U_HIDE_INTERNAL_API inline const UnicodeString & DecimalFormatSymbols::getConstSymbol(ENumberFormatSymbol symbol) const { const UnicodeString *strPtr; @@ -481,6 +513,19 @@ DecimalFormatSymbols::getConstSymbol(ENumberFormatSymbol symbol) const { return *strPtr; } +#ifndef U_HIDE_INTERNAL_API +inline const UnicodeString& DecimalFormatSymbols::getConstDigitSymbol(int32_t digit) const { + if (digit < 0 || digit > 9) { + digit = 0; + } + if (digit == 0) { + return fSymbols[kZeroDigitSymbol]; + } + ENumberFormatSymbol key = static_cast(kOneDigitSymbol + digit - 1); + return fSymbols[key]; +} +#endif + // ------------------------------------- inline void @@ -497,14 +542,20 @@ DecimalFormatSymbols::setSymbol(ENumberFormatSymbol symbol, const UnicodeString // If the zero digit is being set to a known zero digit according to Unicode, // then we automatically set the corresponding 1-9 digits - if ( propogateDigits && symbol == kZeroDigitSymbol && value.countChar32() == 1 ) { + // Also record updates to fCodePointZero. Be conservative if in doubt. + if (symbol == kZeroDigitSymbol) { UChar32 sym = value.char32At(0); - if ( u_charDigitValue(sym) == 0 ) { + if ( propogateDigits && u_charDigitValue(sym) == 0 && value.countChar32() == 1 ) { + fCodePointZero = sym; for ( int8_t i = 1 ; i<= 9 ; i++ ) { sym++; fSymbols[(int)kOneDigitSymbol+i-1] = UnicodeString(sym); } + } else { + fCodePointZero = -1; } + } else if (symbol >= kOneDigitSymbol && symbol <= kNineDigitSymbol) { + fCodePointZero = -1; } } diff --git a/deps/icu-small/source/i18n/unicode/decimfmt.h b/deps/icu-small/source/i18n/unicode/decimfmt.h index 790053636d5957..b062208d9b1f93 100644 --- a/deps/icu-small/source/i18n/unicode/decimfmt.h +++ b/deps/icu-small/source/i18n/unicode/decimfmt.h @@ -30,7 +30,7 @@ #include "unicode/utypes.h" /** * \file - * \brief C++ API: Formats decimal numbers. + * \brief C++ API: Compatibility APIs for decimal formatting. */ #if !UCONFIG_NO_FORMATTING @@ -67,13 +67,19 @@ class PluralRules; class VisibleDigitsWithExponent; // explicit template instantiation. see digitlst.h -#if defined (_MSC_VER) +// (When building DLLs for Windows this is required.) +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN template class U_I18N_API EnumSet; #endif /** + *

IMPORTANT: New users are strongly encouraged to see if + * numberformatter.h fits their use case. Although not deprecated, this header + * is provided for backwards compatibility only. + *


+ * * DecimalFormat is a concrete subclass of NumberFormat that formats decimal * numbers. It has a variety of features designed to make it possible to parse * and format numbers in any locale, including support for Western, Arabic, or @@ -688,6 +694,9 @@ class U_I18N_API DecimalFormat: public NumberFormat { * on NumberFormat such as createInstance. These factories will * return the most appropriate sub-class of NumberFormat for a given * locale. + *

+ * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of DecimalFormat. * @param status Output param set to success/failure code. If the * pattern is invalid this will be set to a failure code. * @stable ICU 2.0 @@ -703,6 +712,9 @@ class U_I18N_API DecimalFormat: public NumberFormat { * on NumberFormat such as createInstance. These factories will * return the most appropriate sub-class of NumberFormat for a given * locale. + *

+ * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of DecimalFormat. * @param pattern A non-localized pattern string. * @param status Output param set to success/failure code. If the * pattern is invalid this will be set to a failure code. @@ -721,6 +733,9 @@ class U_I18N_API DecimalFormat: public NumberFormat { * createInstance or createCurrencyInstance. If you need only minor adjustments * to a standard format, you can modify the format returned by * a NumberFormat factory method. + *

+ * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of DecimalFormat. * * @param pattern a non-localized pattern string * @param symbolsToAdopt the set of symbols to be used. The caller should not @@ -826,6 +841,9 @@ class U_I18N_API DecimalFormat: public NumberFormat { * createInstance or createCurrencyInstance. If you need only minor adjustments * to a standard format, you can modify the format returned by * a NumberFormat factory method. + *

+ * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of DecimalFormat. * * @param pattern a non-localized pattern string * @param symbolsToAdopt the set of symbols to be used. The caller should not @@ -849,6 +867,9 @@ class U_I18N_API DecimalFormat: public NumberFormat { * createInstance or createCurrencyInstance. If you need only minor adjustments * to a standard format, you can modify the format returned by * a NumberFormat factory method. + *

+ * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of DecimalFormat. * * @param pattern a non-localized pattern string * @param symbols the set of symbols to be used @@ -1964,12 +1985,14 @@ class U_I18N_API DecimalFormat: public NumberFormat { UCurrencyUsage getCurrencyUsage() const; +#ifndef U_HIDE_DEPRECATED_API /** * The resource tags we use to retrieve decimal format data from * locale resource bundles. * @deprecated ICU 3.4. This string has no public purpose. Please don't use it. */ static const char fgNumberPatterns[]; +#endif // U_HIDE_DEPRECATED_API #ifndef U_HIDE_INTERNAL_API /** diff --git a/deps/icu-small/source/i18n/unicode/dtptngen.h b/deps/icu-small/source/i18n/unicode/dtptngen.h index 5712edbb9ff607..feb465e7997401 100644 --- a/deps/icu-small/source/i18n/unicode/dtptngen.h +++ b/deps/icu-small/source/i18n/unicode/dtptngen.h @@ -263,14 +263,29 @@ class U_I18N_API DateTimePatternGenerator : public UObject { /** * Getter corresponding to setAppendItemNames. Values below 0 or at or above - * UDATPG_FIELD_COUNT are illegal arguments. + * UDATPG_FIELD_COUNT are illegal arguments. Note: The more general method + * for getting date/time field display names is getFieldDisplayName. * * @param field such as UDATPG_ERA_FIELD. * @return name for field + * @see getFieldDisplayName * @stable ICU 3.8 */ const UnicodeString& getAppendItemName(UDateTimePatternField field) const; +#ifndef U_HIDE_DRAFT_API + /** + * The general interface to get a display name for a particular date/time field, + * in one of several possible display widths. + * + * @param field The desired UDateTimePatternField, such as UDATPG_ERA_FIELD. + * @param width The desired UDateTimePGDisplayWidth, such as UDATPG_ABBREVIATED. + * @return. The display name for field + * @draft ICU 61 + */ + UnicodeString getFieldDisplayName(UDateTimePatternField field, UDateTimePGDisplayWidth width) const; +#endif // U_HIDE_DRAFT_API + /** * The DateTimeFormat is a message format pattern used to compose date and * time patterns. The default pattern in the root locale is "{1} {0}", where @@ -507,13 +522,17 @@ class U_I18N_API DateTimePatternGenerator : public UObject { */ DateTimePatternGenerator& operator=(const DateTimePatternGenerator& other); + // TODO(ticket:13619): re-enable when UDATPG_NARROW no longer in draft mode. + // static const int32_t UDATPG_WIDTH_COUNT = UDATPG_NARROW + 1; + Locale pLocale; // pattern locale FormatParser *fp; DateTimeMatcher* dtMatcher; DistanceInfo *distanceInfo; PatternMap *patternMap; UnicodeString appendItemFormats[UDATPG_FIELD_COUNT]; - UnicodeString appendItemNames[UDATPG_FIELD_COUNT]; + // TODO(ticket:13619): [3] -> UDATPG_WIDTH_COUNT + UnicodeString fieldDisplayNames[UDATPG_FIELD_COUNT][3]; UnicodeString dateTimeFormat; UnicodeString decimal; DateTimeMatcher *skipMatcher; @@ -543,8 +562,11 @@ class U_I18N_API DateTimePatternGenerator : public UObject { void setDateTimeFromCalendar(const Locale& locale, UErrorCode& status); void setDecimalSymbols(const Locale& locale, UErrorCode& status); UDateTimePatternField getAppendFormatNumber(const char* field) const; - UDateTimePatternField getAppendNameNumber(const char* field) const; - UnicodeString& getMutableAppendItemName(UDateTimePatternField field); +#ifndef U_HIDE_DRAFT_API + UDateTimePatternField getFieldAndWidthIndices(const char* key, UDateTimePGDisplayWidth* widthP) const; + void setFieldDisplayName(UDateTimePatternField field, UDateTimePGDisplayWidth width, const UnicodeString& value); + UnicodeString& getMutableFieldDisplayName(UDateTimePatternField field, UDateTimePGDisplayWidth width); +#endif // U_HIDE_DRAFT_API void getAppendName(UDateTimePatternField field, UnicodeString& value); UnicodeString mapSkeletonMetacharacters(const UnicodeString& patternForm, int32_t* flags, UErrorCode& status); int32_t getCanonicalIndex(const UnicodeString& field); diff --git a/deps/icu-small/source/i18n/unicode/measfmt.h b/deps/icu-small/source/i18n/unicode/measfmt.h index 251fd213b50da7..14399dd59a700a 100644 --- a/deps/icu-small/source/i18n/unicode/measfmt.h +++ b/deps/icu-small/source/i18n/unicode/measfmt.h @@ -22,7 +22,7 @@ /** * \file - * \brief C++ API: Formatter for measure objects. + * \brief C++ API: Compatibility APIs for measure formatting. */ /** @@ -87,8 +87,9 @@ class ListFormatter; class DateFormat; /** - * - * A formatter for measure objects. + *

IMPORTANT: New users are strongly encouraged to see if + * numberformatter.h fits their use case. Although not deprecated, this header + * is provided for backwards compatibility only. * * @see Format * @author Alan Liu @@ -101,6 +102,9 @@ class U_I18N_API MeasureFormat : public Format { /** * Constructor. + *

+ * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @stable ICU 53 */ MeasureFormat( @@ -108,6 +112,9 @@ class U_I18N_API MeasureFormat : public Format { /** * Constructor. + *

+ * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @stable ICU 53 */ MeasureFormat( @@ -227,6 +234,9 @@ class U_I18N_API MeasureFormat : public Format { /** * Return a formatter for CurrencyAmount objects in the given * locale. + *

+ * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @param locale desired locale * @param ec input-output error code * @return a formatter object, or NULL upon error @@ -238,6 +248,9 @@ class U_I18N_API MeasureFormat : public Format { /** * Return a formatter for CurrencyAmount objects in the default * locale. + *

+ * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @param ec input-output error code * @return a formatter object, or NULL upon error * @stable ICU 3.0 diff --git a/deps/icu-small/source/i18n/unicode/measunit.h b/deps/icu-small/source/i18n/unicode/measunit.h index 08c8d6f588dac7..f7ddb4e20c559f 100644 --- a/deps/icu-small/source/i18n/unicode/measunit.h +++ b/deps/icu-small/source/i18n/unicode/measunit.h @@ -196,8 +196,8 @@ class U_I18N_API MeasureUnit: public UObject { * ICU use only. * @internal */ - static MeasureUnit *resolveUnitPerUnit( - const MeasureUnit &unit, const MeasureUnit &perUnit); + static MeasureUnit resolveUnitPerUnit( + const MeasureUnit &unit, const MeasureUnit &perUnit, bool* isResolved); #endif /* U_HIDE_INTERNAL_API */ // All code between the "Start generated createXXX methods" comment and @@ -832,15 +832,13 @@ class U_I18N_API MeasureUnit: public UObject { */ static MeasureUnit *createPicometer(UErrorCode &status); -#ifndef U_HIDE_DRAFT_API /** * Returns unit of length: point. * Caller owns returned value and must free it. * @param status ICU error code. - * @draft ICU 59 + * @stable ICU 59 */ static MeasureUnit *createPoint(UErrorCode &status); -#endif /* U_HIDE_DRAFT_API */ /** * Returns unit of length: yard. diff --git a/deps/icu-small/source/i18n/unicode/nounit.h b/deps/icu-small/source/i18n/unicode/nounit.h index 04fc84b33aa338..290e77e8806040 100644 --- a/deps/icu-small/source/i18n/unicode/nounit.h +++ b/deps/icu-small/source/i18n/unicode/nounit.h @@ -10,17 +10,17 @@ #ifndef __NOUNIT_H__ #define __NOUNIT_H__ +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/measunit.h" /** * \file * \brief C++ API: units for percent and permille */ - -#include "unicode/measunit.h" - -#if !UCONFIG_NO_FORMATTING - U_NAMESPACE_BEGIN #ifndef U_HIDE_DRAFT_API diff --git a/deps/icu-small/source/i18n/unicode/numberformatter.h b/deps/icu-small/source/i18n/unicode/numberformatter.h index 4a11c2f9157e61..3fbb33cceeabf7 100644 --- a/deps/icu-small/source/i18n/unicode/numberformatter.h +++ b/deps/icu-small/source/i18n/unicode/numberformatter.h @@ -88,10 +88,6 @@ * * *

- * * The narrow format for currencies is not currently supported; this is a known issue that will be fixed in a - * future version. See #11666 for more information. - * - *

* This enum is similar to {@link com.ibm.icu.text.MeasureFormat.FormatWidth}. * * @draft ICU 60 @@ -155,27 +151,122 @@ typedef enum UNumberUnitWidth { * * @draft ICU 60 */ - UNUM_UNIT_WIDTH_HIDDEN, + UNUM_UNIT_WIDTH_HIDDEN +#ifndef U_HIDE_INTERNAL_API + , /** * One more than the highest UNumberUnitWidth value. * * @internal ICU 60: The numeric value may change over time; see ICU ticket #12420. */ UNUM_UNIT_WIDTH_COUNT +#endif // U_HIDE_INTERNAL_API } UNumberUnitWidth; /** - * An enum declaring how to denote positive and negative numbers. Example outputs when formatting 123 and -123 in - * en-US: + * An enum declaring the strategy for when and how to display grouping separators (i.e., the + * separator, often a comma or period, after every 2-3 powers of ten). The choices are several + * pre-built strategies for different use cases that employ locale data whenever possible. Example + * outputs for 1234 and 1234567 in en-IN: + * + *

    + *
  • OFF: 1234 and 12345 + *
  • MIN2: 1234 and 12,34,567 + *
  • AUTO: 1,234 and 12,34,567 + *
  • ON_ALIGNED: 1,234 and 12,34,567 + *
  • THOUSANDS: 1,234 and 1,234,567 + *
* *

+ * The default is AUTO, which displays grouping separators unless the locale data says that grouping + * is not customary. To force grouping for all numbers greater than 1000 consistently across locales, + * use ON_ALIGNED. On the other hand, to display grouping less frequently than the default, use MIN2 + * or OFF. See the docs of each option for details. + * + *

+ * Note: This enum specifies the strategy for grouping sizes. To set which character to use as the + * grouping separator, use the "symbols" setter. + * + * @draft ICU 61 + */ +typedef enum UGroupingStrategy { + /** + * Do not display grouping separators in any locale. + * + * @draft ICU 61 + */ + UNUM_GROUPING_OFF, + + /** + * Display grouping using locale defaults, except do not show grouping on values smaller than + * 10000 (such that there is a minimum of two digits before the first separator). + * + *

+ * Note that locales may restrict grouping separators to be displayed only on 1 million or + * greater (for example, ee and hu) or disable grouping altogether (for example, bg currency). + * + *

+ * Locale data is used to determine whether to separate larger numbers into groups of 2 + * (customary in South Asia) or groups of 3 (customary in Europe and the Americas). + * + * @draft ICU 61 + */ + UNUM_GROUPING_MIN2, + + /** + * Display grouping using the default strategy for all locales. This is the default behavior. + * + *

+ * Note that locales may restrict grouping separators to be displayed only on 1 million or + * greater (for example, ee and hu) or disable grouping altogether (for example, bg currency). + * + *

+ * Locale data is used to determine whether to separate larger numbers into groups of 2 + * (customary in South Asia) or groups of 3 (customary in Europe and the Americas). + * + * @draft ICU 61 + */ + UNUM_GROUPING_AUTO, + + /** + * Always display the grouping separator on values of at least 1000. + * + *

+ * This option ignores the locale data that restricts or disables grouping, described in MIN2 and + * AUTO. This option may be useful to normalize the alignment of numbers, such as in a + * spreadsheet. + * + *

+ * Locale data is used to determine whether to separate larger numbers into groups of 2 + * (customary in South Asia) or groups of 3 (customary in Europe and the Americas). + * + * @draft ICU 61 + */ + UNUM_GROUPING_ON_ALIGNED, + + /** + * Use the Western defaults: groups of 3 and enabled for all numbers 1000 or greater. Do not use + * locale data for determining the grouping strategy. + * + * @draft ICU 61 + */ + UNUM_GROUPING_THOUSANDS + +} UGroupingStrategy; + +/** + * An enum declaring how to denote positive and negative numbers. Example outputs when formatting + * 123, 0, and -123 in en-US: + * *

    - *
  • AUTO: "123", "-123" - *
  • ALWAYS: "+123", "-123" - *
  • NEVER: "123", "123" - *
  • ACCOUNTING: "$123", "($123)" - *
  • ACCOUNTING_ALWAYS: "+$123", "($123)" + *
  • AUTO: "123", "0", and "-123" + *
  • ALWAYS: "+123", "+0", and "-123" + *
  • NEVER: "123", "0", and "123" + *
  • ACCOUNTING: "$123", "$0", and "($123)" + *
  • ACCOUNTING_ALWAYS: "+$123", "+$0", and "($123)" + *
  • EXCEPT_ZERO: "+123", "0", and "-123" + *
  • ACCOUNTING_EXCEPT_ZERO: "+$123", "$0", and "($123)" *
* *

@@ -190,21 +281,22 @@ typedef enum UNumberSignDisplay { * * @draft ICU 60 */ - UNUM_SIGN_AUTO, + UNUM_SIGN_AUTO, /** - * Show the minus sign on negative numbers and the plus sign on positive numbers. + * Show the minus sign on negative numbers and the plus sign on positive numbers, including zero. + * To hide the sign on zero, see {@link UNUM_SIGN_EXCEPT_ZERO}. * * @draft ICU 60 */ - UNUM_SIGN_ALWAYS, + UNUM_SIGN_ALWAYS, /** * Do not show the sign on positive or negative numbers. * * @draft ICU 60 */ - UNUM_SIGN_NEVER, + UNUM_SIGN_NEVER, /** * Use the locale-dependent accounting format on negative numbers, and do not show the sign on positive numbers. @@ -220,22 +312,44 @@ typedef enum UNumberSignDisplay { * * @draft ICU 60 */ - UNUM_SIGN_ACCOUNTING, + UNUM_SIGN_ACCOUNTING, /** - * Use the locale-dependent accounting format on negative numbers, and show the plus sign on positive numbers. - * For more information on the accounting format, see the ACCOUNTING sign display strategy. + * Use the locale-dependent accounting format on negative numbers, and show the plus sign on + * positive numbers, including zero. For more information on the accounting format, see the + * ACCOUNTING sign display strategy. To hide the sign on zero, see + * {@link UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO}. * * @draft ICU 60 */ - UNUM_SIGN_ACCOUNTING_ALWAYS, + UNUM_SIGN_ACCOUNTING_ALWAYS, + + /** + * Show the minus sign on negative numbers and the plus sign on positive numbers. Do not show a + * sign on zero. + * + * @draft ICU 61 + */ + UNUM_SIGN_EXCEPT_ZERO, + + /** + * Use the locale-dependent accounting format on negative numbers, and show the plus sign on + * positive numbers. Do not show a sign on zero. For more information on the accounting format, + * see the ACCOUNTING sign display strategy. + * + * @draft ICU 61 + */ + UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO +#ifndef U_HIDE_INTERNAL_API + , /** * One more than the highest UNumberSignDisplay value. * * @internal ICU 60: The numeric value may change over time; see ICU ticket #12420. */ - UNUM_SIGN_COUNT + UNUM_SIGN_COUNT +#endif // U_HIDE_INTERNAL_API } UNumberSignDisplay; /** @@ -261,14 +375,17 @@ typedef enum UNumberDecimalSeparatorDisplay { * * @draft ICU 60 */ - UNUM_DECIMAL_SEPARATOR_ALWAYS, + UNUM_DECIMAL_SEPARATOR_ALWAYS +#ifndef U_HIDE_INTERNAL_API + , /** * One more than the highest UNumberDecimalSeparatorDisplay value. * * @internal ICU 60: The numeric value may change over time; see ICU ticket #12420. */ UNUM_DECIMAL_SEPARATOR_COUNT +#endif // U_HIDE_INTERNAL_API } UNumberDecimalMarkDisplay; U_NAMESPACE_BEGIN namespace number { // icu::number @@ -283,11 +400,27 @@ class Rounder; class FractionRounder; class CurrencyRounder; class IncrementRounder; -class Grouper; class IntegerWidth; namespace impl { +#ifndef U_HIDE_INTERNAL_API +/** + * Datatype for minimum/maximum fraction digits. Must be able to hold kMaxIntFracSig. + * + * @internal + */ +typedef int16_t digits_t; + +/** + * Use a default threshold of 3. This means that the third time .format() is called, the data structures get built + * using the "safe" code path. The first two calls to .format() will trigger the unsafe code path. + * + * @internal + */ +static constexpr int32_t DEFAULT_THRESHOLD = 3; +#endif // U_HIDE_INTERNAL_API + // Forward declarations: class Padder; struct MacroProps; @@ -471,7 +604,7 @@ class U_I18N_API Notation : public UMemory { struct ScientificSettings { int8_t fEngineeringInterval; bool fRequireMinInt; - int8_t fMinExponentDigits; + impl::digits_t fMinExponentDigits; UNumberSignDisplay fExponentSignDisplay; } scientific; @@ -786,14 +919,14 @@ class U_I18N_API Rounder : public UMemory { union RounderUnion { struct FractionSignificantSettings { // For RND_FRACTION, RND_SIGNIFICANT, and RND_FRACTION_SIGNIFICANT - int8_t fMinFrac; - int8_t fMaxFrac; - int8_t fMinSig; - int8_t fMaxSig; + impl::digits_t fMinFrac; + impl::digits_t fMaxFrac; + impl::digits_t fMinSig; + impl::digits_t fMaxSig; } fracSig; struct IncrementSettings { double fIncrement; - int32_t fMinFrac; + impl::digits_t fMinFrac; } increment; // For RND_INCREMENT UCurrencyUsage currencyUsage; // For RND_CURRENCY UErrorCode errorCode; // For RND_ERROR @@ -836,6 +969,20 @@ class U_I18N_API Rounder : public UMemory { /** Version of {@link #apply} that obeys minInt constraints. Used for scientific notation compatibility mode. */ void apply(impl::DecimalQuantity &value, int32_t minInt, UErrorCode status); + /** + * Rounding endpoint used by Engineering and Compact notation. Chooses the most appropriate multiplier (magnitude + * adjustment), applies the adjustment, rounds, and returns the chosen multiplier. + * + *

+ * In most cases, this is simple. However, when rounding the number causes it to cross a multiplier boundary, we + * need to re-do the rounding. For example, to display 999,999 in Engineering notation with 2 sigfigs, first you + * guess the multiplier to be -3. However, then you end up getting 1000E3, which is not the correct output. You then + * change your multiplier to be -6, and you get 1.0E6, which is correct. + * + * @param input The quantity to process. + * @param producer Function to call to return a multiplier based on a magnitude. + * @return The number of orders of magnitude the input was adjusted by this method. + */ int32_t chooseMultiplierAndApply(impl::DecimalQuantity &input, const impl::MultiplierProducer &producer, UErrorCode &status); @@ -1002,53 +1149,6 @@ class U_I18N_API IncrementRounder : public Rounder { friend class Rounder; }; -/** - * @internal This API is a technical preview. It is likely to change in an upcoming release. - */ -class U_I18N_API Grouper : public UMemory { - public: - /** - * @internal This API is a technical preview. It is likely to change in an upcoming release. - */ - static Grouper defaults(); - - /** - * @internal This API is a technical preview. It is likely to change in an upcoming release. - */ - static Grouper minTwoDigits(); - - /** - * @internal This API is a technical preview. It is likely to change in an upcoming release. - */ - static Grouper none(); - - private: - int8_t fGrouping1; // -3 means "bogus"; -2 means "needs locale data"; -1 means "no grouping" - int8_t fGrouping2; - bool fMin2; - - Grouper(int8_t grouping1, int8_t grouping2, bool min2) - : fGrouping1(grouping1), fGrouping2(grouping2), fMin2(min2) {} - - Grouper() : fGrouping1(-3) {}; - - bool isBogus() const { - return fGrouping1 == -3; - } - - /** NON-CONST: mutates the current instance. */ - void setLocaleData(const impl::ParsedPatternInfo &patternInfo); - - bool groupAtPosition(int32_t position, const impl::DecimalQuantity &value) const; - - // To allow MacroProps/MicroProps to initialize empty instances: - friend struct impl::MacroProps; - friend struct impl::MicroProps; - - // To allow NumberFormatterImpl to access isBogus() and perform other operations: - friend class impl::NumberFormatterImpl; -}; - /** * A class that defines the strategy for padding and truncating integers before the decimal separator. * @@ -1080,7 +1180,8 @@ class U_I18N_API IntegerWidth : public UMemory { * For example, with maxInt=3, the number 1234 will get printed as "234". * * @param maxInt - * The maximum number of places before the decimal separator. + * The maximum number of places before the decimal separator. maxInt == -1 means no + * truncation. * @return An IntegerWidth for passing to the NumberFormatter integerWidth() setter. * @draft ICU 60 * @see NumberFormatter @@ -1090,14 +1191,14 @@ class U_I18N_API IntegerWidth : public UMemory { private: union { struct { - int8_t fMinInt; - int8_t fMaxInt; + impl::digits_t fMinInt; + impl::digits_t fMaxInt; } minMaxInt; UErrorCode errorCode; } fUnion; bool fHasError = false; - IntegerWidth(int8_t minInt, int8_t maxInt); + IntegerWidth(impl::digits_t minInt, impl::digits_t maxInt); IntegerWidth(UErrorCode errorCode) { // NOLINT fUnion.errorCode = errorCode; @@ -1132,14 +1233,7 @@ class U_I18N_API IntegerWidth : public UMemory { namespace impl { -/** - * Use a default threshold of 3. This means that the third time .format() is called, the data structures get built - * using the "safe" code path. The first two calls to .format() will trigger the unsafe code path. - * - * @internal - */ -static constexpr int32_t DEFAULT_THRESHOLD = 3; - +// Do not enclose entire SymbolsWrapper with #ifndef U_HIDE_INTERNAL_API, needed for a protected field /** @internal */ class U_I18N_API SymbolsWrapper : public UMemory { public: @@ -1155,6 +1249,7 @@ class U_I18N_API SymbolsWrapper : public UMemory { /** @internal */ SymbolsWrapper &operator=(const SymbolsWrapper &other); +#ifndef U_HIDE_INTERNAL_API /** * The provided object is copied, but we do not adopt it. * @internal @@ -1202,6 +1297,7 @@ class U_I18N_API SymbolsWrapper : public UMemory { } return FALSE; } +#endif // U_HIDE_INTERNAL_API private: enum SymbolsPointerType { @@ -1218,14 +1314,72 @@ class U_I18N_API SymbolsWrapper : public UMemory { void doCleanup(); }; +// Do not enclose entire Grouper with #ifndef U_HIDE_INTERNAL_API, needed for a protected field +/** @internal */ +class U_I18N_API Grouper : public UMemory { + public: +#ifndef U_HIDE_INTERNAL_API + /** @internal */ + static Grouper forStrategy(UGroupingStrategy grouping); + + // Future: static Grouper forProperties(DecimalFormatProperties& properties); + + /** @internal */ + Grouper(int16_t grouping1, int16_t grouping2, int16_t minGrouping) + : fGrouping1(grouping1), fGrouping2(grouping2), fMinGrouping(minGrouping) {} +#endif // U_HIDE_INTERNAL_API + + private: + /** + * The grouping sizes, with the following special values: + *

    + *
  • -1 = no grouping + *
  • -2 = needs locale data + *
  • -4 = fall back to Western grouping if not in locale + *
+ */ + int16_t fGrouping1; + int16_t fGrouping2; + + /** + * The minimum gropuing size, with the following special values: + *
    + *
  • -2 = needs locale data + *
  • -3 = no less than 2 + *
+ */ + int16_t fMinGrouping; + + Grouper() : fGrouping1(-3) {}; + + bool isBogus() const { + return fGrouping1 == -3; + } + + /** NON-CONST: mutates the current instance. */ + void setLocaleData(const impl::ParsedPatternInfo &patternInfo, const Locale& locale); + + bool groupAtPosition(int32_t position, const impl::DecimalQuantity &value) const; + + // To allow MacroProps/MicroProps to initialize empty instances: + friend struct MacroProps; + friend struct MicroProps; + + // To allow NumberFormatterImpl to access isBogus() and perform other operations: + friend class NumberFormatterImpl; +}; + +// Do not enclose entire Padder with #ifndef U_HIDE_INTERNAL_API, needed for a protected field /** @internal */ class U_I18N_API Padder : public UMemory { public: +#ifndef U_HIDE_INTERNAL_API /** @internal */ static Padder none(); /** @internal */ static Padder codePoints(UChar32 cp, int32_t targetWidth, UNumberFormatPadPosition position); +#endif // U_HIDE_INTERNAL_API private: UChar32 fWidth; // -3 = error; -2 = bogus; -1 = no padding @@ -1275,6 +1429,7 @@ class U_I18N_API Padder : public UMemory { friend class impl::NumberFormatterImpl; }; +// Do not enclose entire MacroProps with #ifndef U_HIDE_INTERNAL_API, needed for a protected field /** @internal */ struct U_I18N_API MacroProps : public UMemory { /** @internal */ @@ -1283,6 +1438,9 @@ struct U_I18N_API MacroProps : public UMemory { /** @internal */ MeasureUnit unit; // = NoUnit::base(); + /** @internal */ + MeasureUnit perUnit; // = NoUnit::base(); + /** @internal */ Rounder rounder; // = Rounder(); (bogus) @@ -1375,29 +1533,30 @@ class U_I18N_API NumberFormatterSettings { *
  • Percent: "12.3%" * * - *

    * All units will be properly localized with locale data, and all units are compatible with notation styles, * rounding strategies, and other number formatter settings. * - *

    - * Pass this method any instance of {@link MeasureUnit}. For units of measure: + * Pass this method any instance of {@link MeasureUnit}. For units of measure (which often involve the + * factory methods that return a pointer): * *

    -     * NumberFormatter.with().adoptUnit(MeasureUnit::createMeter(status))
    +     * NumberFormatter::with().adoptUnit(MeasureUnit::createMeter(status))
          * 
    * * Currency: * *
    -     * NumberFormatter.with()::unit(CurrencyUnit(u"USD", status))
    +     * NumberFormatter::with().unit(CurrencyUnit(u"USD", status))
          * 
    * * Percent: * *
    -     * NumberFormatter.with()::unit(NoUnit.percent())
    +     * NumberFormatter::with().unit(NoUnit.percent())
          * 
    * + * See {@link #perUnit} for information on how to format strings like "5 meters per second". + * * The default is to render without units (equivalent to NoUnit.base()). * * @param unit @@ -1406,22 +1565,65 @@ class U_I18N_API NumberFormatterSettings { * @see MeasureUnit * @see Currency * @see NoUnit + * @see #perUnit * @draft ICU 60 */ Derived unit(const icu::MeasureUnit &unit) const; /** * Like unit(), but takes ownership of a pointer. Convenient for use with the MeasureFormat factory - * methods, which return pointers that need ownership. + * methods, which return pointers that need ownership. Example: + * + *
    +     * NumberFormatter::with().adoptUnit(MeasureUnit::createMeter(status))
    +     * 
    * * @param unit - * The unit to render. + * The unit to render. * @return The fluent chain. * @see #unit * @see MeasureUnit * @draft ICU 60 */ - Derived adoptUnit(const icu::MeasureUnit *unit) const; + Derived adoptUnit(icu::MeasureUnit *unit) const; + + /** + * Sets a unit to be used in the denominator. For example, to format "3 m/s", pass METER to the unit and SECOND to + * the perUnit. + * + * Pass this method any instance of {@link MeasureUnit}. Since MeasureUnit factory methods return pointers, the + * {@link #adoptPerUnit} version of this method is often more useful. + * + * The default is not to display any unit in the denominator. + * + * If a per-unit is specified without a primary unit via {@link #unit}, the behavior is undefined. + * + * @param perUnit + * The unit to render in the denominator. + * @return The fluent chain + * @see #unit + * @draft ICU 61 + */ + Derived perUnit(const icu::MeasureUnit &perUnit) const; + + /** + * Like perUnit(), but takes ownership of a pointer. Convenient for use with the MeasureFormat factory + * methods, which return pointers that need ownership. Example: + * + *
    +     * NumberFormatter::with()
    +     *      .adoptUnit(MeasureUnit::createMeter(status))
    +     *      .adoptPerUnit(MeasureUnit::createSecond(status))
    +     * 
    + * + * @param perUnit + * The unit to render in the denominator. + * @return The fluent chain. + * @see #perUnit + * @see MeasureUnit + * @draft ICU 61 + */ + Derived adoptPerUnit(icu::MeasureUnit *perUnit) const; /** * Specifies the rounding strategy to use when formatting numbers. @@ -1456,8 +1658,6 @@ class U_I18N_API NumberFormatterSettings { */ Derived rounding(const Rounder &rounder) const; -#ifndef U_HIDE_INTERNAL_API - /** * Specifies the grouping strategy to use when formatting numbers. * @@ -1471,25 +1671,21 @@ class U_I18N_API NumberFormatterSettings { * The exact grouping widths will be chosen based on the locale. * *

    - * Pass this method the return value of one of the factory methods on {@link Grouper}. For example: + * Pass this method an element from the {@link UGroupingStrategy} enum. For example: * *

    -     * NumberFormatter::with().grouping(Grouper::min2())
    +     * NumberFormatter::with().grouping(UNUM_GROUPING_MIN2)
          * 
    * - * The default is to perform grouping without concern for the minimum grouping digits. + * The default is to perform grouping according to locale data; most locales, but not all locales, + * enable it by default. * - * @param grouper + * @param strategy * The grouping strategy to use. * @return The fluent chain. - * @see Grouper - * @see Notation - * @internal - * @internal ICU 60: This API is technical preview. + * @draft ICU 61 */ - Derived grouping(const Grouper &grouper) const; - -#endif /* U_HIDE_INTERNAL_API */ + Derived grouping(const UGroupingStrategy &strategy) const; /** * Specifies the minimum and maximum number of digits to render before the decimal mark. @@ -1592,7 +1788,7 @@ class U_I18N_API NumberFormatterSettings { * @see NumberingSystem * @draft ICU 60 */ - Derived adoptSymbols(const NumberingSystem *symbols) const; + Derived adoptSymbols(NumberingSystem *symbols) const; /** * Sets the width of the unit (measure unit or currency). Most common values: diff --git a/deps/icu-small/source/i18n/unicode/numfmt.h b/deps/icu-small/source/i18n/unicode/numfmt.h index d6b2a6d53c081d..1332f5256628e3 100644 --- a/deps/icu-small/source/i18n/unicode/numfmt.h +++ b/deps/icu-small/source/i18n/unicode/numfmt.h @@ -28,7 +28,7 @@ /** * \file - * \brief C++ API: Abstract base class for all number formats. + * \brief C++ API: Compatibility APIs for number formatting. */ #if !UCONFIG_NO_FORMATTING @@ -53,16 +53,16 @@ class StringEnumeration; #endif /** + *

    IMPORTANT: New users are strongly encouraged to see if + * numberformatter.h fits their use case. Although not deprecated, this header + * is provided for backwards compatibility only. + *


    * * Abstract base class for all number formats. Provides interface for * formatting and parsing a number. Also provides methods for * determining which locales have number formats, and what their names * are. * - *

    NOTE: Starting in ICU 60, there is a new set of APIs for localized number - * formatting that are designed to be an improvement over DecimalFormat. New users are discouraged - * from using DecimalFormat. For more information, see numberformatter.h. - * * \headerfile unicode/numfmt.h "unicode/numfmt.h" *

    * NumberFormat helps you to format and parse numbers for any locale. @@ -708,6 +708,9 @@ class U_I18N_API NumberFormat : public Format { /** * Create a default style NumberFormat for the current default locale. * The default formatting style is locale dependent. + *

    + * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @stable ICU 2.0 */ static NumberFormat* U_EXPORT2 createInstance(UErrorCode&); @@ -716,6 +719,9 @@ class U_I18N_API NumberFormat : public Format { * Create a default style NumberFormat for the specified locale. * The default formatting style is locale dependent. * @param inLocale the given locale. + *

    + * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @stable ICU 2.0 */ static NumberFormat* U_EXPORT2 createInstance(const Locale& inLocale, @@ -723,6 +729,9 @@ class U_I18N_API NumberFormat : public Format { /** * Create a specific style NumberFormat for the specified locale. + *

    + * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @param desiredLocale the given locale. * @param style the given style. * @param errorCode Output param filled with success/failure status. @@ -759,12 +768,18 @@ class U_I18N_API NumberFormat : public Format { /** * Returns a currency format for the current default locale. + *

    + * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @stable ICU 2.0 */ static NumberFormat* U_EXPORT2 createCurrencyInstance(UErrorCode&); /** * Returns a currency format for the specified locale. + *

    + * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @param inLocale the given locale. * @stable ICU 2.0 */ @@ -773,12 +788,18 @@ class U_I18N_API NumberFormat : public Format { /** * Returns a percentage format for the current default locale. + *

    + * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @stable ICU 2.0 */ static NumberFormat* U_EXPORT2 createPercentInstance(UErrorCode&); /** * Returns a percentage format for the specified locale. + *

    + * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @param inLocale the given locale. * @stable ICU 2.0 */ @@ -787,12 +808,18 @@ class U_I18N_API NumberFormat : public Format { /** * Returns a scientific format for the current default locale. + *

    + * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @stable ICU 2.0 */ static NumberFormat* U_EXPORT2 createScientificInstance(UErrorCode&); /** * Returns a scientific format for the specified locale. + *

    + * NOTE: New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @param inLocale the given locale. * @stable ICU 2.0 */ diff --git a/deps/icu-small/source/i18n/unicode/simpletz.h b/deps/icu-small/source/i18n/unicode/simpletz.h index 1b23ab79d11383..7f5f1664cca503 100644 --- a/deps/icu-small/source/i18n/unicode/simpletz.h +++ b/deps/icu-small/source/i18n/unicode/simpletz.h @@ -647,7 +647,8 @@ class U_I18N_API SimpleTimeZone: public BasicTimeZone { * Sets the amount of time in ms that the clock is advanced during DST. * @param millisSavedDuringDST the number of milliseconds the time is * advanced with respect to standard time when the daylight savings rules - * are in effect. A positive number, typically one hour (3600000). + * are in effect. Typically one hour (+3600000). The amount could be negative, + * but not 0. * @param status An UErrorCode to receive the status. * @stable ICU 2.0 */ @@ -657,7 +658,8 @@ class U_I18N_API SimpleTimeZone: public BasicTimeZone { * Returns the amount of time in ms that the clock is advanced during DST. * @return the number of milliseconds the time is * advanced with respect to standard time when the daylight savings rules - * are in effect. A positive number, typically one hour (3600000). + * are in effect. Typically one hour (+3600000). The amount could be negative, + * but not 0. * @stable ICU 2.0 */ virtual int32_t getDSTSavings(void) const; diff --git a/deps/icu-small/source/i18n/unicode/udatpg.h b/deps/icu-small/source/i18n/unicode/udatpg.h index 9e3bdd411490b6..54f1254346d10d 100644 --- a/deps/icu-small/source/i18n/unicode/udatpg.h +++ b/deps/icu-small/source/i18n/unicode/udatpg.h @@ -95,6 +95,21 @@ typedef enum UDateTimePatternField { UDATPG_FIELD_COUNT } UDateTimePatternField; +#ifndef U_HIDE_DRAFT_API +/** + * Field display name width constants for udatpg_getFieldDisplayName(). + * @draft ICU 61 + */ +typedef enum UDateTimePGDisplayWidth { + /** @draft ICU 61 */ + UDATPG_WIDE, + /** @draft ICU 61 */ + UDATPG_ABBREVIATED, + /** @draft ICU 61 */ + UDATPG_NARROW +} UDateTimePGDisplayWidth; +#endif // U_HIDE_DRAFT_API + /** * Masks to control forcing the length of specified fields in the returned * pattern to match those in the skeleton (when this would not happen @@ -410,12 +425,14 @@ udatpg_setAppendItemName(UDateTimePatternGenerator *dtpg, /** * Getter corresponding to setAppendItemNames. Values below 0 or at or above - * UDATPG_FIELD_COUNT are illegal arguments. + * UDATPG_FIELD_COUNT are illegal arguments. Note: The more general function + * for getting date/time field display names is udatpg_getFieldDisplayName. * * @param dtpg a pointer to UDateTimePatternGenerator. * @param field UDateTimePatternField, such as UDATPG_ERA_FIELD * @param pLength A pointer that will receive the length of the name for field. * @return name for field + * @see udatpg_getFieldDisplayName * @stable ICU 3.8 */ U_STABLE const UChar * U_EXPORT2 @@ -423,6 +440,40 @@ udatpg_getAppendItemName(const UDateTimePatternGenerator *dtpg, UDateTimePatternField field, int32_t *pLength); +#ifndef U_HIDE_DRAFT_API +/** + * The general interface to get a display name for a particular date/time field, + * in one of several possible display widths. + * + * @param dtpg + * A pointer to the UDateTimePatternGenerator object with the localized + * display names. + * @param field + * The desired UDateTimePatternField, such as UDATPG_ERA_FIELD. + * @param width + * The desired UDateTimePGDisplayWidth, such as UDATPG_ABBREVIATED. + * @param fieldName + * A pointer to a buffer to receive the NULL-terminated display name. If the name + * fits into fieldName but cannot be NULL-terminated (length == capacity) then + * the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the name doesn't + * fit into fieldName then the error code is set to U_BUFFER_OVERFLOW_ERROR. + * @param capacity + * The size of fieldName (in UChars). + * @param pErrorCode + * A pointer to a UErrorCode to receive any errors + * @return + * The full length of the name; if greater than capacity, fieldName contains a + * truncated result. + * @draft ICU 61 + */ +U_DRAFT int32_t U_EXPORT2 +udatpg_getFieldDisplayName(const UDateTimePatternGenerator *dtpg, + UDateTimePatternField field, + UDateTimePGDisplayWidth width, + UChar *fieldName, int32_t capacity, + UErrorCode *pErrorCode); +#endif // U_HIDE_DRAFT_API + /** * The DateTimeFormat is a message format pattern used to compose date and * time patterns. The default pattern in the root locale is "{1} {0}", where diff --git a/deps/icu-small/source/i18n/unicode/unum.h b/deps/icu-small/source/i18n/unicode/unum.h index 9154bce661ae03..0e7b9fffbab2b2 100644 --- a/deps/icu-small/source/i18n/unicode/unum.h +++ b/deps/icu-small/source/i18n/unicode/unum.h @@ -33,6 +33,9 @@ * *

    Number Format C API

    * + *

    IMPORTANT: New users with C++ capabilities are + * strongly encouraged to see if numberformatter.h fits their use case. + * * Number Format C API Provides functions for * formatting and parsing a number. Also provides methods for * determining which locales have number formats, and what their names @@ -559,7 +562,6 @@ unum_formatDouble( const UNumberFormat* fmt, UFieldPosition *pos, /* 0 if ignore */ UErrorCode* status); -#ifndef U_HIDE_DRAFT_API /** * Format a double using a UNumberFormat according to the UNumberFormat's locale, * and initialize a UFieldPositionIterator that enumerates the subcomponents of @@ -600,9 +602,9 @@ unum_formatDouble( const UNumberFormat* fmt, * @see unum_parseDouble * @see UFieldPositionIterator * @see UNumberFormatFields -* @draft ICU 59 +* @stable ICU 59 */ -U_DRAFT int32_t U_EXPORT2 +U_STABLE int32_t U_EXPORT2 unum_formatDoubleForFields(const UNumberFormat* format, double number, UChar* result, @@ -610,7 +612,6 @@ unum_formatDoubleForFields(const UNumberFormat* format, UFieldPositionIterator* fpositer, UErrorCode* status); -#endif /* U_HIDE_DRAFT_API */ /** * Format a decimal number using a UNumberFormat. diff --git a/deps/icu-small/source/i18n/unicode/upluralrules.h b/deps/icu-small/source/i18n/unicode/upluralrules.h index 99d93a4e0517ab..690846bc89cd01 100644 --- a/deps/icu-small/source/i18n/unicode/upluralrules.h +++ b/deps/icu-small/source/i18n/unicode/upluralrules.h @@ -175,7 +175,6 @@ uplrules_selectWithFormat(const UPluralRules *uplrules, #endif /* U_HIDE_INTERNAL_API */ -#ifndef U_HIDE_DRAFT_API /** * Creates a string enumeration of all plural rule keywords used in this * UPluralRules object. The rule "other" is always present by default. @@ -184,12 +183,11 @@ uplrules_selectWithFormat(const UPluralRules *uplrules, * @param status A pointer to a UErrorCode to receive any errors. * @return a string enumeration over plural rule keywords, or NULL * upon error. The caller is responsible for closing the result. - * @draft ICU 59 + * @stable ICU 59 */ -U_DRAFT UEnumeration* U_EXPORT2 +U_STABLE UEnumeration* U_EXPORT2 uplrules_getKeywords(const UPluralRules *uplrules, UErrorCode *status); -#endif /* U_HIDE_DRAFT_API */ #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/unicode/utrans.h b/deps/icu-small/source/i18n/unicode/utrans.h index a4158726ca08c6..7672b4428fba3c 100644 --- a/deps/icu-small/source/i18n/unicode/utrans.h +++ b/deps/icu-small/source/i18n/unicode/utrans.h @@ -382,7 +382,7 @@ utrans_openIDs(UErrorCode *pErrorCode); U_STABLE void U_EXPORT2 utrans_trans(const UTransliterator* trans, UReplaceable* rep, - UReplaceableCallbacks* repFunc, + const UReplaceableCallbacks* repFunc, int32_t start, int32_t* limit, UErrorCode* status); @@ -433,7 +433,7 @@ utrans_trans(const UTransliterator* trans, U_STABLE void U_EXPORT2 utrans_transIncremental(const UTransliterator* trans, UReplaceable* rep, - UReplaceableCallbacks* repFunc, + const UReplaceableCallbacks* repFunc, UTransPosition* pos, UErrorCode* status); diff --git a/deps/icu-small/source/i18n/uregex.cpp b/deps/icu-small/source/i18n/uregex.cpp index a5dee6241dce24..370384363483e7 100644 --- a/deps/icu-small/source/i18n/uregex.cpp +++ b/deps/icu-small/source/i18n/uregex.cpp @@ -1465,8 +1465,10 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp, int32_t groupNum = 0; U_ASSERT(c == DOLLARSIGN); - UChar32 c32; - U16_GET(replacementText, 0, replIdx, replacementLength, c32); + UChar32 c32 = -1; + if (replIdx < replacementLength) { + U16_GET(replacementText, 0, replIdx, replacementLength, c32); + } if (u_isdigit(c32)) { int32_t numDigits = 0; int32_t numCaptureGroups = m->fPattern->fGroupMap->size(); diff --git a/deps/icu-small/source/i18n/utrans.cpp b/deps/icu-small/source/i18n/utrans.cpp index 62fd630d9e83d3..31070dd43fbe4e 100644 --- a/deps/icu-small/source/i18n/utrans.cpp +++ b/deps/icu-small/source/i18n/utrans.cpp @@ -41,12 +41,12 @@ U_NAMESPACE_BEGIN class ReplaceableGlue : public Replaceable { UReplaceable *rep; - UReplaceableCallbacks *func; + const UReplaceableCallbacks *func; public: ReplaceableGlue(UReplaceable *replaceable, - UReplaceableCallbacks *funcCallback); + const UReplaceableCallbacks *funcCallback); virtual ~ReplaceableGlue(); @@ -88,7 +88,7 @@ class ReplaceableGlue : public Replaceable { UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ReplaceableGlue) ReplaceableGlue::ReplaceableGlue(UReplaceable *replaceable, - UReplaceableCallbacks *funcCallback) + const UReplaceableCallbacks *funcCallback) : Replaceable() { this->rep = replaceable; @@ -398,7 +398,7 @@ utrans_openIDs(UErrorCode *pErrorCode) { U_CAPI void U_EXPORT2 utrans_trans(const UTransliterator* trans, UReplaceable* rep, - UReplaceableCallbacks* repFunc, + const UReplaceableCallbacks* repFunc, int32_t start, int32_t* limit, UErrorCode* status) { @@ -418,7 +418,7 @@ utrans_trans(const UTransliterator* trans, U_CAPI void U_EXPORT2 utrans_transIncremental(const UTransliterator* trans, UReplaceable* rep, - UReplaceableCallbacks* repFunc, + const UReplaceableCallbacks* repFunc, UTransPosition* pos, UErrorCode* status) { diff --git a/deps/icu-small/source/tools/escapesrc/escapesrc.cpp b/deps/icu-small/source/tools/escapesrc/escapesrc.cpp index 13bfbd3789fda7..f51a86ea96fc1b 100644 --- a/deps/icu-small/source/tools/escapesrc/escapesrc.cpp +++ b/deps/icu-small/source/tools/escapesrc/escapesrc.cpp @@ -4,39 +4,71 @@ #include #include #include -#include #include #include #include #include -// with caution: +// We only use U8_* macros, which are entirely inline. #include "unicode/utf8.h" +// This contains a codepage and ISO 14882:1998 illegality table. +// Use "make gen-table" to rebuild it. +#include "cptbl.h" + +/** + * What is this? + * + * "This" is a preprocessor that makes an attempt to convert fully valid C++11 source code + * in utf-8 into something consumable by certain compilers (Solaris, xlC) + * which aren't quite standards compliant. + * + * - u"" or u'' gets converted to u"\uNNNN" or u'\uNNNN' + * - u8"" gets converted to "\xAA\xBB\xCC\xDD" etc. + * (some compilers do not support the u8 prefix correctly.) + * - if the system is EBCDIC-based, that is used to correct the input characters. + * + * Usage: + * escapesrc infile.cpp outfile.cpp + * Normally this is invoked by the build stage, with a rule such as: + * + * _%.cpp: $(srcdir)/%.cpp + * @$(BINDIR)/escapesrc$(EXEEXT) $< $@ + * %.o: _%.cpp + * $(COMPILE.cc) ... $@ $< + * + * In the Makefiles, SKIP_ESCAPING=YES is used to prevent escapesrc.cpp + * from being itself escaped. + */ + + static const char kSPACE = 0x20, kTAB = 0x09, kLF = 0x0A, kCR = 0x0D; - // kHASH = 0x23, - // kSLASH = 0x2f, - // kSTAR = 0x2A, - -# include "cptbl.h" +// For convenience # define cp1047_to_8859(c) cp1047_8859_1[c] +// Our app's name std::string prog; +/** + * Give the usual 1-line documentation and exit + */ void usage() { fprintf(stderr, "%s: usage: %s infile.cpp outfile.cpp\n", prog.c_str(), prog.c_str()); } - +/** + * Delete the output file (if any) + * We want to delete even if we didn't generate, because it might be stale. + */ int cleanup(const std::string &outfile) { const char *outstr = outfile.c_str(); if(outstr && *outstr) { - int rc = unlink(outstr); + int rc = std::remove(outstr); if(rc == 0) { fprintf(stderr, "%s: deleted %s\n", prog.c_str(), outstr); return 0; @@ -44,7 +76,7 @@ int cleanup(const std::string &outfile) { if( errno == ENOENT ) { return 0; // File did not exist - no error. } else { - perror("unlink"); + perror("std::remove"); return 1; } } @@ -52,16 +84,12 @@ int cleanup(const std::string &outfile) { return 0; } -// inline bool hasNonAscii(const char *line, size_t len) { -// const unsigned char *uline = reinterpret_cast(line); -// for(size_t i=0;i 0x7F) { -// return true; -// } -// } -// return false; -// } - +/** + * Skip across any known whitespace. + * @param p startpoint + * @param e limit + * @return first non-whitespace char + */ inline const char *skipws(const char *p, const char *e) { for(;p0) && (pos = linestr.rfind("u\"", pos)) != std::string::npos) { @@ -345,6 +355,12 @@ bool fixLine(int /*no*/, std::string &linestr) { return false; } +/** + * Convert a whole file + * @param infile + * @param outfile + * @return 1 on err, 0 otherwise + */ int convert(const std::string &infile, const std::string &outfile) { fprintf(stderr, "escapesrc: %s -> %s\n", infile.c_str(), outfile.c_str()); @@ -386,6 +402,9 @@ int convert(const std::string &infile, const std::string &outfile) { return 0; } +/** + * Main function + */ int main(int argc, const char *argv[]) { prog = argv[0]; @@ -399,6 +418,3 @@ int main(int argc, const char *argv[]) { return convert(infile, outfile); } - - -#include "utf_impl.cpp" diff --git a/doc/api/addons.md b/doc/api/addons.md index bc3cf2d41f56a0..e52e43153e6982 100644 --- a/doc/api/addons.md +++ b/doc/api/addons.md @@ -102,7 +102,7 @@ Addon module name is `addon`. Once the source code has been written, it must be compiled into the binary `addon.node` file. To do so, create a file called `binding.gyp` in the top-level of the project describing the build configuration of the module -using a JSON-like format. This file is used by [node-gyp][] -- a tool written +using a JSON-like format. This file is used by [node-gyp][] — a tool written specifically to compile Node.js Addons. ```json diff --git a/doc/api/async_hooks.md b/doc/api/async_hooks.md index 20a5c9759ded70..4287aa78ced8e0 100644 --- a/doc/api/async_hooks.md +++ b/doc/api/async_hooks.md @@ -636,7 +636,7 @@ asyncResource.emitAfter(); * `type` {string} The type of async event. * `options` {Object} * `triggerAsyncId` {number} The ID of the execution context that created this - async event. **Default:** `executionAsyncId()` + async event. **Default:** `executionAsyncId()`. * `requireManualDestroy` {boolean} Disables automatic `emitDestroy` when the object is garbage collected. This usually does not need to be set (even if `emitDestroy` is called manually), unless the resource's asyncId is retrieved diff --git a/doc/api/buffer.md b/doc/api/buffer.md index e8977fa04620cb..2b63b4eb2e13f2 100644 --- a/doc/api/buffer.md +++ b/doc/api/buffer.md @@ -5,15 +5,14 @@ > Stability: 2 - Stable -Prior to the introduction of [`TypedArray`] in [`ECMAScript 2015`] (ES6), the -JavaScript language had no mechanism for reading or manipulating streams -of binary data. The `Buffer` class was introduced as part of the Node.js -API to make it possible to interact with octet streams in the context of things -like TCP streams and file system operations. +Prior to the introduction of [`TypedArray`], the JavaScript language had no +mechanism for reading or manipulating streams of binary data. The `Buffer` class +was introduced as part of the Node.js API to make it possible to interact with +octet streams in the context of things like TCP streams and file system +operations. -Now that [`TypedArray`] has been added in ES6, the `Buffer` class implements the -[`Uint8Array`] API in a manner that is more optimized and suitable for Node.js' -use cases. +With [`TypedArray`] now available, the `Buffer` class implements the +[`Uint8Array`] API in a manner that is more optimized and suitable for Node.js. Instances of the `Buffer` class are similar to arrays of integers but correspond to fixed-sized, raw memory allocations outside the V8 heap. @@ -60,7 +59,7 @@ differently based on what arguments are provided: entire `Buffer`. While this behavior is *intentional* to improve performance, development experience has demonstrated that a more explicit distinction is required between creating a fast-but-uninitialized `Buffer` versus creating a - slower-but-safer `Buffer`. Starting in Node.js 8.0.0, `Buffer(num)` and + slower-but-safer `Buffer`. Starting in Node.js 8.0.0, `Buffer(num)` and `new Buffer(num)` will return a `Buffer` with initialized memory. * Passing a string, array, or `Buffer` as the first argument copies the passed object's data into the `Buffer`. @@ -111,13 +110,13 @@ added: v5.10.0 --> Node.js can be started using the `--zero-fill-buffers` command line option to -force all newly allocated `Buffer` instances created using either -`new Buffer(size)`, [`Buffer.allocUnsafe()`], [`Buffer.allocUnsafeSlow()`] or -`new SlowBuffer(size)` to be *automatically zero-filled* upon creation. Use of -this flag *changes the default behavior* of these methods and *can have a significant -impact* on performance. Use of the `--zero-fill-buffers` option is recommended -only when necessary to enforce that newly allocated `Buffer` instances cannot -contain old data that is potentially sensitive. +cause all newly allocated `Buffer` instances to be zero-filled upon creation by +default, including buffers returned by `new Buffer(size)`, +[`Buffer.allocUnsafe()`], [`Buffer.allocUnsafeSlow()`], and `new +SlowBuffer(size)`. Use of this flag can have a significant negative impact on +performance. Use of the `--zero-fill-buffers` option is recommended only when +necessary to enforce that newly allocated `Buffer` instances cannot contain old +data that is potentially sensitive. ```txt $ node --zero-fill-buffers @@ -149,10 +148,8 @@ changes: description: Removed the deprecated `raw` and `raws` encodings. --> -`Buffer` instances are commonly used to represent sequences of encoded characters -such as UTF-8, UCS2, Base64, or even Hex-encoded data. It is possible to -convert back and forth between `Buffer` instances and ordinary JavaScript strings -by using an explicit character encoding. +When string data is stored in or extracted out of a `Buffer` instance, a +character encoding may be specified. ```js const buf = Buffer.from('hello world', 'ascii'); @@ -161,6 +158,11 @@ console.log(buf.toString('hex')); // Prints: 68656c6c6f20776f726c64 console.log(buf.toString('base64')); // Prints: aGVsbG8gd29ybGQ= + +console.log(Buffer.from('fhqwhgads', 'ascii')); +// Prints: +console.log(Buffer.from('fhqwhgads', 'utf16le')); +// Prints: ``` The character encodings currently supported by Node.js include: @@ -204,11 +206,10 @@ changes: --> `Buffer` instances are also [`Uint8Array`] instances. However, there are subtle -incompatibilities with the TypedArray specification in [`ECMAScript 2015`]. -For example, while [`ArrayBuffer#slice()`] creates a copy of the slice, the -implementation of [`Buffer#slice()`][`buf.slice()`] creates a view over the -existing `Buffer` without copying, making [`Buffer#slice()`][`buf.slice()`] far -more efficient. +incompatibilities with [`TypedArray`]. For example, while +[`ArrayBuffer#slice()`] creates a copy of the slice, the implementation of +[`Buffer#slice()`][`buf.slice()`] creates a view over the existing `Buffer` +without copying, making [`Buffer#slice()`][`buf.slice()`] far more efficient. It is also possible to create new [`TypedArray`] instances from a `Buffer` with the following caveats: @@ -275,10 +276,9 @@ function: * [`Buffer.from(arrayBuffer[, byteOffset [, length]])`][`Buffer.from(arrayBuffer)`] * [`Buffer.from(string[, encoding])`][`Buffer.from(string)`] -## Buffers and ES6 iteration +## Buffers and iteration -`Buffer` instances can be iterated over using the [`ECMAScript 2015`] (ES6) `for..of` -syntax. +`Buffer` instances can be iterated over using `for..of` syntax: ```js const buf = Buffer.from([1, 2, 3]); @@ -426,7 +426,7 @@ changes: * `size` {integer} The desired length of the new `Buffer`. -Allocates a new `Buffer` of `size` bytes. If the `size` is larger than +Allocates a new `Buffer` of `size` bytes. If the `size` is larger than [`buffer.constants.MAX_LENGTH`] or smaller than 0, a [`RangeError`] will be thrown. A zero-length `Buffer` will be created if `size` is 0. @@ -434,7 +434,7 @@ Prior to Node.js 8.0.0, the underlying memory for `Buffer` instances created in this way is *not initialized*. The contents of a newly created `Buffer` are unknown and *may contain sensitive data*. Use [`Buffer.alloc(size)`][`Buffer.alloc()`] instead to initialize a `Buffer` -to zeroes. +with zeroes. ```js const buf = new Buffer(10); @@ -461,8 +461,8 @@ changes: * `string` {string} String to encode. * `encoding` {string} The encoding of `string`. **Default:** `'utf8'` -Creates a new `Buffer` containing the given JavaScript string `string`. If -provided, the `encoding` parameter identifies the character encoding of `string`. +Creates a new `Buffer` containing `string`. The `encoding` parameter identifies +the character encoding of `string`. ```js const buf1 = new Buffer('this is a tést'); @@ -502,7 +502,7 @@ console.log(buf); // Prints: ``` -Allocates a new `Buffer` of `size` bytes. If the `size` is larger than +Allocates a new `Buffer` of `size` bytes. If the `size` is larger than [`buffer.constants.MAX_LENGTH`] or smaller than 0, a [`RangeError`] will be thrown. A zero-length `Buffer` will be created if `size` is 0. @@ -543,14 +543,14 @@ changes: * `size` {integer} The desired length of the new `Buffer`. -Allocates a new `Buffer` of `size` bytes. If the `size` is larger than +Allocates a new `Buffer` of `size` bytes. If the `size` is larger than [`buffer.constants.MAX_LENGTH`] or smaller than 0, a [`RangeError`] will be thrown. A zero-length `Buffer` will be created if `size` is 0. The underlying memory for `Buffer` instances created in this way is *not initialized*. The contents of the newly created `Buffer` are unknown and *may contain sensitive data*. Use [`Buffer.alloc()`] instead to initialize -`Buffer` instances to zeroes. +`Buffer` instances with zeroes. ```js const buf = Buffer.allocUnsafe(10); @@ -587,26 +587,26 @@ added: v5.12.0 * `size` {integer} The desired length of the new `Buffer`. -Allocates a new `Buffer` of `size` bytes. If the `size` is larger than +Allocates a new `Buffer` of `size` bytes. If the `size` is larger than [`buffer.constants.MAX_LENGTH`] or smaller than 0, a [`RangeError`] will be thrown. A zero-length `Buffer` will be created if `size` is 0. The underlying memory for `Buffer` instances created in this way is *not initialized*. The contents of the newly created `Buffer` are unknown and -*may contain sensitive data*. Use [`buf.fill(0)`][`buf.fill()`] to initialize such -`Buffer` instances to zeroes. +*may contain sensitive data*. Use [`buf.fill(0)`][`buf.fill()`] to initialize +such `Buffer` instances with zeroes. When using [`Buffer.allocUnsafe()`] to allocate new `Buffer` instances, -allocations under 4KB are, by default, sliced from a single pre-allocated -`Buffer`. This allows applications to avoid the garbage collection overhead of -creating many individually allocated `Buffer` instances. This approach improves -both performance and memory usage by eliminating the need to track and cleanup as -many `Persistent` objects. +allocations under 4KB are sliced from a single pre-allocated `Buffer`. This +allows applications to avoid the garbage collection overhead of creating many +individually allocated `Buffer` instances. This approach improves both +performance and memory usage by eliminating the need to track and clean up as +many persistent objects. However, in the case where a developer may need to retain a small chunk of memory from a pool for an indeterminate amount of time, it may be appropriate -to create an un-pooled `Buffer` instance using `Buffer.allocUnsafeSlow()` then -copy out the relevant bits. +to create an un-pooled `Buffer` instance using `Buffer.allocUnsafeSlow()` and +then copying out the relevant bits. ```js // Need to keep around a few small chunks of memory @@ -625,8 +625,8 @@ socket.on('readable', () => { }); ``` -Use of `Buffer.allocUnsafeSlow()` should be used only as a last resort *after* -a developer has observed undue memory retention in their applications. +`Buffer.allocUnsafeSlow()` should be used only as a last resort after a +developer has observed undue memory retention in their applications. A `TypeError` will be thrown if `size` is not a number. @@ -839,8 +839,8 @@ added: v5.10.0 * `string` {string} A string to encode. * `encoding` {string} The encoding of `string`. **Default:** `'utf8'` -Creates a new `Buffer` containing the given JavaScript string `string`. If -provided, the `encoding` parameter identifies the character encoding of `string`. +Creates a new `Buffer` containing `string`. The `encoding` parameter identifies +the character encoding of `string`. ```js const buf1 = Buffer.from('this is a tést'); @@ -1268,11 +1268,11 @@ console.log(buf.indexOf(Buffer.from('a buffer example'))); console.log(buf.indexOf(Buffer.from('a buffer example').slice(0, 8))); // Prints: 8 -const utf16Buffer = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2'); +const utf16Buffer = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', 'utf16le'); -console.log(utf16Buffer.indexOf('\u03a3', 0, 'ucs2')); +console.log(utf16Buffer.indexOf('\u03a3', 0, 'utf16le')); // Prints: 4 -console.log(utf16Buffer.indexOf('\u03a3', -4, 'ucs2')); +console.log(utf16Buffer.indexOf('\u03a3', -4, 'utf16le')); // Prints: 6 ``` @@ -1366,11 +1366,11 @@ console.log(buf.lastIndexOf('buffer', 5)); console.log(buf.lastIndexOf('buffer', 4)); // Prints: -1 -const utf16Buffer = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2'); +const utf16Buffer = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', 'utf16le'); -console.log(utf16Buffer.lastIndexOf('\u03a3', undefined, 'ucs2')); +console.log(utf16Buffer.lastIndexOf('\u03a3', undefined, 'utf16le')); // Prints: 6 -console.log(utf16Buffer.lastIndexOf('\u03a3', -5, 'ucs2')); +console.log(utf16Buffer.lastIndexOf('\u03a3', -5, 'utf16le')); // Prints: 4 ``` @@ -2458,13 +2458,14 @@ deprecated: v6.0.0 * `size` {integer} The desired length of the new `SlowBuffer`. -Allocates a new `Buffer` of `size` bytes. If the `size` is larger than +Allocates a new `Buffer` of `size` bytes. If the `size` is larger than [`buffer.constants.MAX_LENGTH`] or smaller than 0, a [`RangeError`] will be thrown. A zero-length `Buffer` will be created if `size` is 0. The underlying memory for `SlowBuffer` instances is *not initialized*. The contents of a newly created `SlowBuffer` are unknown and may contain sensitive -data. Use [`buf.fill(0)`][`buf.fill()`] to initialize a `SlowBuffer` to zeroes. +data. Use [`buf.fill(0)`][`buf.fill()`] to initialize a `SlowBuffer` with +zeroes. ```js const { SlowBuffer } = require('buffer'); @@ -2549,5 +2550,4 @@ This value may depend on the JS engine that is being used. [RFC1345]: https://tools.ietf.org/html/rfc1345 [RFC4648, Section 5]: https://tools.ietf.org/html/rfc4648#section-5 [WHATWG Encoding Standard]: https://encoding.spec.whatwg.org/ -[`ECMAScript 2015`]: https://www.ecma-international.org/ecma-262/6.0/index.html [iterator]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Iteration_protocols diff --git a/doc/api/child_process.md b/doc/api/child_process.md index ab751ff126549d..b85961a794c579 100644 --- a/doc/api/child_process.md +++ b/doc/api/child_process.md @@ -41,7 +41,7 @@ the event loop until the spawned process either exits or is terminated. For convenience, the `child_process` module provides a handful of synchronous and asynchronous alternatives to [`child_process.spawn()`][] and -[`child_process.spawnSync()`][]. *Note that each of these alternatives are +[`child_process.spawnSync()`][]. *Note that each of these alternatives are implemented on top of [`child_process.spawn()`][] or [`child_process.spawnSync()`][].* * [`child_process.exec()`][]: spawns a shell and runs a command within that shell, @@ -146,8 +146,8 @@ changes: [Shell Requirements][] and [Default Windows Shell][]. * `timeout` {number} **Default:** `0` * `maxBuffer` {number} Largest amount of data in bytes allowed on stdout or - stderr. **Default:** `200*1024`. If exceeded, the child process is terminated. - See caveat at [`maxBuffer` and Unicode][]. + stderr. If exceeded, the child process is terminated. See caveat at + [`maxBuffer` and Unicode][]. **Default:** `200 * 1024`. * `killSignal` {string|integer} **Default:** `'SIGTERM'` * `uid` {number} Sets the user identity of the process (see setuid(2)). * `gid` {number} Sets the group identity of the process (see setgid(2)). @@ -189,7 +189,7 @@ exec('cat *.js bad_file | wc -l', (error, stdout, stderr) => { ``` If a `callback` function is provided, it is called with the arguments -`(error, stdout, stderr)`. On success, `error` will be `null`. On error, +`(error, stdout, stderr)`. On success, `error` will be `null`. On error, `error` will be an instance of [`Error`][]. The `error.code` property will be the exit code of the child process while `error.signal` will be set to the signal that terminated the process. Any exit code other than `0` is considered @@ -258,8 +258,8 @@ changes: * `encoding` {string} **Default:** `'utf8'` * `timeout` {number} **Default:** `0` * `maxBuffer` {number} Largest amount of data in bytes allowed on stdout or - stderr. **Default:** `200*1024` If exceeded, the child process is terminated. - See caveat at [`maxBuffer` and Unicode][]. + stderr. If exceeded, the child process is terminated. See caveat at + [`maxBuffer` and Unicode][]. **Default:** `200 * 1024`. * `killSignal` {string|integer} **Default:** `'SIGTERM'` * `uid` {number} Sets the user identity of the process (see setuid(2)). * `gid` {number} Sets the group identity of the process (see setgid(2)). @@ -538,7 +538,7 @@ disabled*. On non-Windows platforms, if `options.detached` is set to `true`, the child process will be made the leader of a new process group and session. Note that child processes may continue running after the parent exits regardless of -whether they are detached or not. See setsid(2) for more information. +whether they are detached or not. See setsid(2) for more information. By default, the parent will wait for the detached child to exit. To prevent the parent from waiting for a given `subprocess`, use the `subprocess.unref()` @@ -712,9 +712,10 @@ changes: * `killSignal` {string|integer} The signal value to be used when the spawned process will be killed. **Default:** `'SIGTERM'` * `maxBuffer` {number} Largest amount of data in bytes allowed on stdout or - stderr. **Default:** `200*1024` If exceeded, the child process is terminated. - See caveat at [`maxBuffer` and Unicode][]. - * `encoding` {string} The encoding used for all stdio inputs and outputs. **Default:** `'buffer'` + stderr. If exceeded, the child process is terminated. See caveat at + [`maxBuffer` and Unicode][]. **Default:** `200 * 1024`. + * `encoding` {string} The encoding used for all stdio inputs and outputs. + **Default:** `'buffer'`. * `windowsHide` {boolean} Hide the subprocess console window that would normally be created on Windows systems. **Default:** `false`. * `shell` {boolean|string} If `true`, runs `command` inside of a shell. Uses @@ -773,8 +774,8 @@ changes: * `killSignal` {string|integer} The signal value to be used when the spawned process will be killed. **Default:** `'SIGTERM'` * `maxBuffer` {number} Largest amount of data in bytes allowed on stdout or - stderr. **Default:** `200*1024` If exceeded, the child process is terminated. - See caveat at [`maxBuffer` and Unicode][]. + stderr. If exceeded, the child process is terminated. See caveat at + [`maxBuffer` and Unicode][]. **Default:** `200 * 1024`. * `encoding` {string} The encoding used for all stdio inputs and outputs. **Default:** `'buffer'` * `windowsHide` {boolean} Hide the subprocess console window that would @@ -785,12 +786,12 @@ The `child_process.execSync()` method is generally identical to [`child_process.exec()`][] with the exception that the method will not return until the child process has fully closed. When a timeout has been encountered and `killSignal` is sent, the method won't return until the process has completely -exited. *Note that if the child process intercepts and handles the `SIGTERM` +exited. *Note that if the child process intercepts and handles the `SIGTERM` signal and doesn't exit, the parent process will wait until the child process has exited.* If the process times out or has a non-zero exit code, this method ***will*** -throw. The [`Error`][] object will contain the entire result from +throw. The [`Error`][] object will contain the entire result from [`child_process.spawnSync()`][] **Never pass unsanitized user input to this function. Any input containing shell @@ -830,8 +831,8 @@ changes: * `killSignal` {string|integer} The signal value to be used when the spawned process will be killed. **Default:** `'SIGTERM'` * `maxBuffer` {number} Largest amount of data in bytes allowed on stdout or - stderr. **Default:** `200*1024` If exceeded, the child process is terminated. - See caveat at [`maxBuffer` and Unicode][]. + stderr. If exceeded, the child process is terminated. See caveat at + [`maxBuffer` and Unicode][]. **Default:** `200 * 1024`. * `encoding` {string} The encoding used for all stdio inputs and outputs. **Default:** `'buffer'` * `shell` {boolean|string} If `true`, runs `command` inside of a shell. Uses @@ -1168,7 +1169,7 @@ properties: Defaults to `false`. The optional `callback` is a function that is invoked after the message is -sent but before the child may have received it. The function is called with a +sent but before the child may have received it. The function is called with a single argument: `null` on success, or an [`Error`][] object on failure. If no `callback` function is provided and the message cannot be sent, an diff --git a/doc/api/cli.md b/doc/api/cli.md index 84377fcf6a4096..83dc4459352ca7 100644 --- a/doc/api/cli.md +++ b/doc/api/cli.md @@ -517,7 +517,7 @@ added: v3.0.0 Path to the file used to store the persistent REPL history. The default path is `~/.node_repl_history`, which is overridden by this variable. Setting the value -to an empty string (`""` or `" "`) disables persistent REPL history. +to an empty string (`''` or `' '`) disables persistent REPL history. ### `NODE_EXTRA_CA_CERTS=file` @@ -602,7 +602,7 @@ reason any of these APIs takes a long time, other (seemingly unrelated) APIs that run in libuv's threadpool will experience degraded performance. In order to mitigate this issue, one potential solution is to increase the size of libuv's threadpool by setting the `'UV_THREADPOOL_SIZE'` environment variable to a value -greater than `4` (its current default value). For more information, see the +greater than `4` (its current default value). For more information, see the [libuv threadpool documentation][]. [`--openssl-config`]: #cli_openssl_config_file diff --git a/doc/api/cluster.md b/doc/api/cluster.md index 3b6546dc76bf6a..0e33cfd412886b 100644 --- a/doc/api/cluster.md +++ b/doc/api/cluster.md @@ -93,7 +93,7 @@ Node.js process and a cluster worker differs: process. 3. `server.listen(0)` Normally, this will cause servers to listen on a random port. However, in a cluster, each worker will receive the - same "random" port each time they do `listen(0)`. In essence, the + same "random" port each time they do `listen(0)`. In essence, the port is random the first time, but predictable thereafter. To listen on a unique port, generate a port number based on the cluster worker ID. @@ -103,8 +103,8 @@ things like sessions and login. Because workers are all separate processes, they can be killed or re-spawned depending on a program's needs, without affecting other -workers. As long as there are some workers still alive, the server will -continue to accept connections. If no workers are alive, existing connections +workers. As long as there are some workers still alive, the server will +continue to accept connections. If no workers are alive, existing connections will be dropped and new connections will be refused. Node.js does not automatically manage the number of workers, however. It is the application's responsibility to manage the worker pool based on its own needs. @@ -465,8 +465,8 @@ Emitted after the worker IPC channel has disconnected. This can occur when a worker exits gracefully, is killed, or is disconnected manually (such as with worker.disconnect()). -There may be a delay between the `'disconnect'` and `'exit'` events. These -events can be used to detect if the process is stuck in a cleanup or if there +There may be a delay between the `'disconnect'` and `'exit'` events. These +events can be used to detect if the process is stuck in a cleanup or if there are long-living connections. ```js @@ -556,7 +556,7 @@ The `addressType` is one of: * `4` (TCPv4) * `6` (TCPv6) * `-1` (unix domain socket) -* `"udp4"` or `"udp6"` (UDP v4 or v6) +* `'udp4'` or `'udp6'` (UDP v4 or v6) ## Event: 'message' -* Returns {number} the `SO_RCVBUF` socket receive buffer size in bytes. +* Returns: {number} the `SO_RCVBUF` socket receive buffer size in bytes. ### socket.getSendBufferSize() -* Returns {number} the `SO_SNDBUF` socket send buffer size in bytes. +* Returns: {number} the `SO_SNDBUF` socket send buffer size in bytes. ### socket.ref() Sometimes, the domain in use is not the one that ought to be used for a -specific event emitter. Or, the event emitter could have been created +specific event emitter. Or, the event emitter could have been created in the context of one domain, but ought to instead be bound to some other domain. @@ -278,7 +278,7 @@ Returns a new Domain object. The Domain class encapsulates the functionality of routing errors and uncaught exceptions to the active Domain object. -Domain is a child class of [`EventEmitter`][]. To handle the errors that it +Domain is a child class of [`EventEmitter`][]. To handle the errors that it catches, listen to its `'error'` event. ### domain.members @@ -292,13 +292,13 @@ to the domain. * `emitter` {EventEmitter|Timer} emitter or timer to be added to the domain -Explicitly adds an emitter to the domain. If any event handlers called by +Explicitly adds an emitter to the domain. If any event handlers called by the emitter throw an error, or if the emitter emits an `'error'` event, it will be routed to the domain's `'error'` event, just like with implicit binding. This also works with timers that are returned from [`setInterval()`][] and -[`setTimeout()`][]. If their callback function throws, it will be caught by +[`setTimeout()`][]. If their callback function throws, it will be caught by the domain 'error' handler. If the Timer or EventEmitter was already bound to a domain, it is removed @@ -310,7 +310,7 @@ from that one, and bound to this one instead. * Returns: {Function} The bound function The returned function will be a wrapper around the supplied callback -function. When the returned function is called, any errors that are +function. When the returned function is called, any errors that are thrown will be routed to the domain's `'error'` event. #### Example @@ -365,7 +365,7 @@ single domain. * `callback` {Function} The callback function * Returns: {Function} The intercepted function -This method is almost identical to [`domain.bind(callback)`][]. However, in +This method is almost identical to [`domain.bind(callback)`][]. However, in addition to catching thrown errors, it will also intercept [`Error`][] objects sent as the first argument to the function. @@ -402,7 +402,7 @@ d.on('error', (er) => { * `emitter` {EventEmitter|Timer} emitter or timer to be removed from the domain -The opposite of [`domain.add(emitter)`][]. Removes domain handling from the +The opposite of [`domain.add(emitter)`][]. Removes domain handling from the specified emitter. ### domain.run(fn[, ...args]) diff --git a/doc/api/errors.md b/doc/api/errors.md index c6e4382e39785f..e1ee3a3fce8d3e 100644 --- a/doc/api/errors.md +++ b/doc/api/errors.md @@ -107,7 +107,7 @@ pass or fail). For *all* [`EventEmitter`][] objects, if an `'error'` event handler is not provided, the error will be thrown, causing the Node.js process to report an -unhandled exception and crash unless either: The [`domain`][domains] module is +unhandled exception and crash unless either: The [`domain`][domains] module is used appropriately or a handler has been registered for the [`process.on('uncaughtException')`][] event. @@ -133,7 +133,7 @@ exactly how errors raised by those methods are propagated. Most asynchronous methods exposed by the Node.js core API follow an idiomatic -pattern referred to as an _error-first callback_ (sometimes referred to as +pattern referred to as an _error-first callback_ (sometimes referred to as a _Node.js style callback_). With this pattern, a callback function is passed to the method as an argument. When the operation either completes or an error is raised, the callback function is called with @@ -156,7 +156,7 @@ fs.readFile('/some/file/that/does-exist', errorFirstCallback); ``` The JavaScript `try / catch` mechanism **cannot** be used to intercept errors -generated by asynchronous APIs. A common mistake for beginners is to try to +generated by asynchronous APIs. A common mistake for beginners is to try to use `throw` inside an error-first callback: ```js @@ -209,7 +209,7 @@ provided text message. If an object is passed as `message`, the text message is generated by calling `message.toString()`. The `error.stack` property will represent the point in the code at which `new Error()` was called. Stack traces are dependent on [V8's stack trace API][]. Stack traces extend only to either -(a) the beginning of *synchronous code execution*, or (b) the number of frames +(a) the beginning of *synchronous code execution*, or (b) the number of frames given by the property `Error.stackTraceLimit`, whichever is smaller. ### Error.captureStackTrace(targetObject[, constructorOpt]) @@ -521,7 +521,7 @@ found [here][online]. - `EACCES` (Permission denied): An attempt was made to access a file in a way forbidden by its file access permissions. -- `EADDRINUSE` (Address already in use): An attempt to bind a server +- `EADDRINUSE` (Address already in use): An attempt to bind a server ([`net`][], [`http`][], or [`https`][]) to a local address failed due to another server on the local system already occupying that address. @@ -549,14 +549,14 @@ found [here][online]. `ulimit -n 2048` in the same shell that will run the Node.js process. - `ENOENT` (No such file or directory): Commonly raised by [`fs`][] operations - to indicate that a component of the specified pathname does not exist -- no + to indicate that a component of the specified pathname does not exist — no entity (file or directory) could be found by the given path. - `ENOTDIR` (Not a directory): A component of the given pathname existed, but was not a directory as expected. Commonly raised by [`fs.readdir`][]. - `ENOTEMPTY` (Directory not empty): A directory with entries was the target - of an operation that requires an empty directory -- usually [`fs.unlink`][]. + of an operation that requires an empty directory — usually [`fs.unlink`][]. - `EPERM` (Operation not permitted): An attempt was made to perform an operation that requires elevated privileges. @@ -568,7 +568,7 @@ found [here][online]. - `ETIMEDOUT` (Operation timed out): A connect or send request failed because the connected party did not properly respond after a period of time. Usually - encountered by [`http`][] or [`net`][] -- often a sign that a `socket.end()` + encountered by [`http`][] or [`net`][] — often a sign that a `socket.end()` was not properly called. diff --git a/doc/api/esm.md b/doc/api/esm.md index 9fcda68776a9bf..cfbb5dff35f337 100644 --- a/doc/api/esm.md +++ b/doc/api/esm.md @@ -138,12 +138,12 @@ module. This can be one of the following: | `format` | Description | | --- | --- | -| `"esm"` | Load a standard JavaScript module | -| `"cjs"` | Load a node-style CommonJS module | -| `"builtin"` | Load a node builtin CommonJS module | -| `"json"` | Load a JSON file | -| `"addon"` | Load a [C++ Addon][addons] | -| `"dynamic"` | Use a [dynamic instantiate hook][] | +| `'esm'` | Load a standard JavaScript module | +| `'cjs'` | Load a node-style CommonJS module | +| `'builtin'` | Load a node builtin CommonJS module | +| `'json'` | Load a JSON file | +| `'addon'` | Load a [C++ Addon][addons] | +| `'dynamic'` | Use a [dynamic instantiate hook][] | For example, a dummy loader to load JavaScript restricted to browser resolution rules with only JS file extension and Node builtin modules support could @@ -200,7 +200,7 @@ would load the module `x.js` as an ES module with relative resolution support To create a custom dynamic module that doesn't correspond to one of the existing `format` interpretations, the `dynamicInstantiate` hook can be used. -This hook is called only for modules that return `format: "dynamic"` from +This hook is called only for modules that return `format: 'dynamic'` from the `resolve` hook. ```js diff --git a/doc/api/fs.md b/doc/api/fs.md index 7944f91dafb1b1..79ac32fe022071 100644 --- a/doc/api/fs.md +++ b/doc/api/fs.md @@ -75,7 +75,7 @@ fs.rename('/tmp/hello', '/tmp/world', (err) => { In busy processes, the programmer is _strongly encouraged_ to use the asynchronous versions of these calls. The synchronous versions will block -the entire process until they complete--halting all connections. +the entire process until they complete — halting all connections. While it is not recommended, most fs functions allow the callback argument to be omitted, in which case a default callback is used that rethrows errors. To @@ -635,16 +635,16 @@ representation. The times in the stat object have the following semantics: -* `atime` "Access Time" - Time when file data last accessed. Changed +* `atime` "Access Time" - Time when file data last accessed. Changed by the mknod(2), utimes(2), and read(2) system calls. * `mtime` "Modified Time" - Time when file data last modified. Changed by the mknod(2), utimes(2), and write(2) system calls. * `ctime` "Change Time" - Time when file status was last changed - (inode data modification). Changed by the chmod(2), chown(2), + (inode data modification). Changed by the chmod(2), chown(2), link(2), mknod(2), rename(2), unlink(2), utimes(2), read(2), and write(2) system calls. -* `birthtime` "Birth Time" - Time of file creation. Set once when the - file is created. On filesystems where birthtime is not available, +* `birthtime` "Birth Time" - Time of file creation. Set once when the + file is created. On filesystems where birthtime is not available, this field may instead hold either the `ctime` or `1970-01-01T00:00Z` (ie, unix epoch timestamp `0`). Note that this value may be greater than `atime` or `mtime` in this case. On Darwin @@ -652,8 +652,8 @@ The times in the stat object have the following semantics: set to an earlier value than the current `birthtime` using the utimes(2) system call. -Prior to Node v0.12, the `ctime` held the `birthtime` on Windows -systems. Note that as of v0.12, `ctime` is not "creation time", and +Prior to Node.js v0.12, the `ctime` held the `birthtime` on Windows +systems. Note that as of v0.12, `ctime` is not "creation time", and on Unix systems, it never was. ## Class: fs.WriteStream @@ -1124,7 +1124,7 @@ changes: * `callback` {Function} * `err` {Error} -Asynchronous close(2). No arguments other than a possible exception are given +Asynchronous close(2). No arguments other than a possible exception are given to the completion callback. ## fs.closeSync(fd) @@ -1272,7 +1272,7 @@ const defaults = { ``` `options` can include `start` and `end` values to read a range of bytes from -the file instead of the entire file. Both `start` and `end` are inclusive and +the file instead of the entire file. Both `start` and `end` are inclusive and start counting at 0. If `fd` is specified and `start` is omitted or `undefined`, `fs.createReadStream()` reads sequentially from the current file position. The `encoding` can be any one of those accepted by [`Buffer`][]. @@ -1343,7 +1343,7 @@ const defaults = { ``` `options` may also include a `start` option to allow writing data at -some position past the beginning of the file. Modifying a file rather +some position past the beginning of the file. Modifying a file rather than replacing it may require a `flags` mode of `r+` rather than the default mode `w`. The `encoding` can be any one of those accepted by [`Buffer`][]. @@ -1379,7 +1379,7 @@ deprecated: v1.0.0 * `exists` {boolean} Test whether or not the given path exists by checking with the file system. -Then call the `callback` argument with either true or false. Example: +Then call the `callback` argument with either true or false. Example: ```js fs.exists('/etc/passwd', (exists) => { @@ -2085,7 +2085,7 @@ to a non-existent file. The exclusive flag may or may not work with network file systems. `flags` can also be a number as documented by open(2); commonly used constants -are available from `fs.constants`. On Windows, flags are translated to +are available from `fs.constants`. On Windows, flags are translated to their equivalent ones where applicable, e.g. `O_WRONLY` to `FILE_GENERIC_WRITE`, or `O_EXCL|O_CREAT` to `CREATE_NEW`, as accepted by CreateFileW. @@ -2205,7 +2205,7 @@ changes: * `err` {Error} * `files` {string[]|Buffer[]} -Asynchronous readdir(3). Reads the contents of a directory. +Asynchronous readdir(3). Reads the contents of a directory. The callback gets two arguments `(err, files)` where `files` is an array of the names of the files in the directory excluding `'.'` and `'..'`. @@ -2499,7 +2499,7 @@ the path passed to the callback. If the `encoding` is set to `'buffer'`, the path returned will be passed as a `Buffer` object. On Linux, when Node.js is linked against musl libc, the procfs file system must -be mounted on `/proc` in order for this function to work. Glibc does not have +be mounted on `/proc` in order for this function to work. Glibc does not have this restriction. ## fs.realpathSync(path[, options]) @@ -2568,7 +2568,7 @@ the path passed to the callback. If the `encoding` is set to `'buffer'`, the path returned will be passed as a `Buffer` object. On Linux, when Node.js is linked against musl libc, the procfs file system must -be mounted on `/proc` in order for this function to work. Glibc does not have +be mounted on `/proc` in order for this function to work. Glibc does not have this restriction. ## fs.rename(oldPath, newPath, callback) @@ -2859,7 +2859,7 @@ Calling `fs.unwatchFile()` with a filename that is not being watched is a no-op, not an error. Using [`fs.watch()`][] is more efficient than `fs.watchFile()` and -`fs.unwatchFile()`. `fs.watch()` should be used instead of `fs.watchFile()` +`fs.unwatchFile()`. `fs.watch()` should be used instead of `fs.watchFile()` and `fs.unwatchFile()` when possible. ## fs.utimes(path, atime, mtime, callback) @@ -2950,14 +2950,14 @@ changes: * `filename` {string|Buffer} Watch for changes on `filename`, where `filename` is either a file or a -directory. The returned object is a [`fs.FSWatcher`][]. +directory. The returned object is a [`fs.FSWatcher`][]. The second argument is optional. If `options` is provided as a string, it specifies the `encoding`. Otherwise `options` should be passed as an object. -The listener callback gets two arguments `(eventType, filename)`. `eventType` -is either `'rename'` or `'change'`, and `filename` is the name of the file which -triggered the event. +The listener callback gets two arguments `(eventType, filename)`. `eventType` +is either `'rename'` or `'change'`, and `filename` is the name of the file +which triggered the event. Note that on most platforms, `'rename'` is emitted whenever a filename appears or disappears in the directory. @@ -3017,7 +3017,7 @@ content, and one for truncation). Providing `filename` argument in the callback is only supported on Linux, -macOS, Windows, and AIX. Even on supported platforms, `filename` is not always +macOS, Windows, and AIX. Even on supported platforms, `filename` is not always guaranteed to be provided. Therefore, don't assume that `filename` argument is always provided in the callback, and have some fallback logic if it is null. @@ -3166,7 +3166,7 @@ changes: * `written` {integer} * `string` {string} -Write `string` to the file specified by `fd`. If `string` is not a string, then +Write `string` to the file specified by `fd`. If `string` is not a string, then the value will be coerced to one. `position` refers to the offset from the beginning of the file where this data diff --git a/doc/api/http.md b/doc/api/http.md index 297f68e462e435..048c9b6b9a6714 100644 --- a/doc/api/http.md +++ b/doc/api/http.md @@ -9,7 +9,7 @@ To use the HTTP server and client one must `require('http')`. The HTTP interfaces in Node.js are designed to support many features of the protocol which have been traditionally difficult to use. In particular, large, possibly chunk-encoded, messages. The interface is -careful to never buffer entire requests or responses--the +careful to never buffer entire requests or responses — the user is able to stream data. HTTP message headers are represented by an object like this: @@ -33,7 +33,7 @@ parse the actual headers or the body. See [`message.headers`][] for details on how duplicate headers are handled. The raw headers as they were received are retained in the `rawHeaders` -property, which is an array of `[key, value, key2, value2, ...]`. For +property, which is an array of `[key, value, key2, value2, ...]`. For example, the previous message header object might have a `rawHeaders` list like the following: @@ -124,8 +124,8 @@ added: v0.3.4 * `maxSockets` {number} Maximum number of sockets to allow per host. Defaults to `Infinity`. * `maxFreeSockets` {number} Maximum number of sockets to leave open - in a free state. Only relevant if `keepAlive` is set to `true`. - Defaults to `256`. + in a free state. Only relevant if `keepAlive` is set to `true`. + **Default:** `256`. The default [`http.globalAgent`][] that is used by [`http.request()`][] has all of these values set to their respective defaults. @@ -203,9 +203,9 @@ added: v0.11.4 Destroy any sockets that are currently in use by the agent. -It is usually not necessary to do this. However, if using an +It is usually not necessary to do this. However, if using an agent with `keepAlive` enabled, then it is best to explicitly shut down -the agent when it will no longer be used. Otherwise, +the agent when it will no longer be used. Otherwise, sockets may hang open for quite a long time before the server terminates them. @@ -217,7 +217,7 @@ added: v0.11.4 * {Object} An object which contains arrays of sockets currently awaiting use by -the agent when `keepAlive` is enabled. Do not modify. +the agent when `keepAlive` is enabled. Do not modify. ### agent.getName(options) -This object is created internally and returned from [`http.request()`][]. It -represents an _in-progress_ request whose header has already been queued. The +This object is created internally and returned from [`http.request()`][]. It +represents an _in-progress_ request whose header has already been queued. The header is still mutable using the [`setHeader(name, value)`][], - [`getHeader(name)`][], [`removeHeader(name)`][] API. The actual header will + [`getHeader(name)`][], [`removeHeader(name)`][] API. The actual header will be sent along with the first data chunk or when calling [`request.end()`][]. To get the response, add a listener for [`'response'`][] to the request object. [`'response'`][] will be emitted from the request object when the response -headers have been received. The [`'response'`][] event is executed with one +headers have been received. The [`'response'`][] event is executed with one argument which is an instance of [`http.IncomingMessage`][]. During the [`'response'`][] event, one can add listeners to the response object; particularly to listen for the `'data'` event. If no [`'response'`][] handler is added, then the response will be -entirely discarded. However, if a [`'response'`][] event handler is added, +entirely discarded. However, if a [`'response'`][] event handler is added, then the data from the response object **must** be consumed, either by calling `response.read()` whenever there is a `'readable'` event, or by adding a `'data'` handler, or by calling the `.resume()` method. -Until the data is consumed, the `'end'` event will not fire. Also, until +Until the data is consumed, the `'end'` event will not fire. Also, until the data is read it will consume memory that can eventually lead to a 'process out of memory' error. @@ -542,7 +542,7 @@ For efficiency reasons, Node.js normally buffers the request headers until then tries to pack the request headers and data into a single TCP packet. That's usually desired (it saves a TCP round-trip), but not when the first -data is not sent until possibly much later. `request.flushHeaders()` bypasses +data is not sent until possibly much later. `request.flushHeaders()` bypasses the optimization and kickstarts the request. ### request.getHeader(name) @@ -671,9 +671,9 @@ added: v0.1.29 * `encoding` {string} * `callback` {Function} -Sends a chunk of the body. By calling this method +Sends a chunk of the body. By calling this method many times, a request body can be sent to a -server--in that case it is suggested to use the +server — in that case it is suggested to use the `['Transfer-Encoding', 'chunked']` header line when creating the request. @@ -859,7 +859,7 @@ added: v0.1.90 * `callback` {Function} -Stops the server from accepting new connections. See [`net.Server.close()`][]. +Stops the server from accepting new connections. See [`net.Server.close()`][]. ### server.listen() @@ -902,7 +902,7 @@ If there is a `'timeout'` event listener on the Server object, then it will be called with the timed-out socket as an argument. By default, the Server's timeout value is 2 minutes, and sockets are -destroyed automatically if they time out. However, if a callback is assigned +destroyed automatically if they time out. However, if a callback is assigned to the Server's `'timeout'` event, timeouts must be handled explicitly. Returns `server`. @@ -948,7 +948,7 @@ affects new connections to the server, not any existing connections. added: v0.1.17 --> -This object is created internally by an HTTP server--not by the user. It is +This object is created internally by an HTTP server — not by the user. It is passed as the second parameter to the [`'request'`][] event. The response implements, but does not inherit from, the [Writable Stream][] @@ -1168,8 +1168,8 @@ added: v0.4.0 * `name` {string} * `value` {string | string[]} -Sets a single header value for implicit headers. If this header already exists -in the to-be-sent headers, its value will be replaced. Use an array of strings +Sets a single header value for implicit headers. If this header already exists +in the to-be-sent headers, its value will be replaced. Use an array of strings here to send multiple headers with the same name. Example: @@ -1209,12 +1209,12 @@ added: v0.9.12 * `msecs` {number} * `callback` {Function} -Sets the Socket's timeout value to `msecs`. If a callback is +Sets the Socket's timeout value to `msecs`. If a callback is provided, then it is added as a listener on the `'timeout'` event on the response object. If no `'timeout'` listener is added to the request, the response, or -the server, then sockets are destroyed when they time out. If a handler is +the server, then sockets are destroyed when they time out. If a handler is assigned to the request, the response, or the server's `'timeout'` events, timed out sockets must be handled explicitly. @@ -1495,8 +1495,8 @@ added: v0.11.6 The raw request/response headers list exactly as they were received. -Note that the keys and values are in the same list. It is *not* a -list of tuples. So, the even-numbered offsets are key values, and the +Note that the keys and values are in the same list. It is *not* a +list of tuples. So, the even-numbered offsets are key values, and the odd-numbered offsets are the associated values. Header names are not lowercased, and duplicates are not merged. @@ -1523,7 +1523,7 @@ added: v0.11.6 * {Array} The raw request/response trailer keys and values exactly as they were -received. Only populated at the `'end'` event. +received. Only populated at the `'end'` event. ### message.setTimeout(msecs, callback) -This object is created internally by an HTTP server--not by the user. It is +This object is created internally by an HTTP server — not by the user. It is passed as the second parameter to the [`'request'`][] event. The response implements, but does not inherit from, the [Writable Stream][] @@ -2776,8 +2776,8 @@ added: v8.4.0 * `name` {string} * `value` {string|string[]} -Sets a single header value for implicit headers. If this header already exists -in the to-be-sent headers, its value will be replaced. Use an array of strings +Sets a single header value for implicit headers. If this header already exists +in the to-be-sent headers, its value will be replaced. Use an array of strings here to send multiple headers with the same name. Example: @@ -2817,7 +2817,7 @@ added: v8.4.0 * `msecs` {number} * `callback` {Function} -Sets the [`Http2Stream`]()'s timeout value to `msecs`. If a callback is +Sets the [`Http2Stream`]()'s timeout value to `msecs`. If a callback is provided, then it is added as a listener on the `'timeout'` event on the response object. @@ -2976,7 +2976,7 @@ response.writeHead(200, { ``` Note that Content-Length is given in bytes not characters. The -`Buffer.byteLength()` API may be used to determine the number of bytes in a +`Buffer.byteLength()` API may be used to determine the number of bytes in a given encoding. On outbound messages, Node.js does not check if Content-Length and the length of the body being transmitted are equal or not. However, when receiving messages, Node.js will automatically reject messages when the diff --git a/doc/api/https.md b/doc/api/https.md index 1274a287643e0c..c9c4b86bc7360f 100644 --- a/doc/api/https.md +++ b/doc/api/https.md @@ -12,7 +12,7 @@ separate module. added: v0.4.5 --> -An [`Agent`][] object for HTTPS similar to [`http.Agent`][]. See +An [`Agent`][] object for HTTPS similar to [`http.Agent`][]. See [`https.request()`][] for more information. ## Class: https.Server diff --git a/doc/api/modules.md b/doc/api/modules.md index aa03b414c35dfa..277dff8f6b6fb9 100644 --- a/doc/api/modules.md +++ b/doc/api/modules.md @@ -123,12 +123,12 @@ the version that is symlinked into Furthermore, to make the module lookup process even more optimal, rather than putting packages directly in `/usr/lib/node`, we could put them in -`/usr/lib/node_modules//`. Then Node.js will not bother +`/usr/lib/node_modules//`. Then Node.js will not bother looking for missing dependencies in `/usr/node_modules` or `/node_modules`. In order to make modules available to the Node.js REPL, it might be useful to also add the `/usr/lib/node_modules` folder to the `$NODE_PATH` environment -variable. Since the module lookups using `node_modules` folders are all +variable. Since the module lookups using `node_modules` folders are all relative, and based on the real path of the files making the calls to `require()`, the packages themselves can be anywhere. @@ -196,12 +196,12 @@ NODE_MODULES_PATHS(START) -Modules are cached after the first time they are loaded. This means +Modules are cached after the first time they are loaded. This means (among other things) that every call to `require('foo')` will get exactly the same object returned, if it would resolve to the same file. Multiple calls to `require('foo')` may not cause the module code to be -executed multiple times. This is an important feature. With it, +executed multiple times. This is an important feature. With it, "partially done" objects can be returned, thus allowing transitive dependencies to be loaded even when they would cause cycles. @@ -212,7 +212,7 @@ that function. -Modules are cached based on their resolved filename. Since modules may +Modules are cached based on their resolved filename. Since modules may resolve to a different filename based on the location of the calling module (loading from `node_modules` folders), it is not a *guarantee* that `require('foo')` will always return the exact same object, if it @@ -228,14 +228,14 @@ irrespective of whether or not `./foo` and `./FOO` are the same file. -Node.js has several modules compiled into the binary. These modules are +Node.js has several modules compiled into the binary. These modules are described in greater detail elsewhere in this documentation. The core modules are defined within Node.js's source and are located in the `lib/` folder. Core modules are always preferentially loaded if their identifier is -passed to `require()`. For instance, `require('http')` will always +passed to `require()`. For instance, `require('http')` will always return the built in HTTP module, even if there is a file by that name. ## Cycles @@ -275,13 +275,13 @@ console.log('b done'); console.log('main starting'); const a = require('./a.js'); const b = require('./b.js'); -console.log('in main, a.done=%j, b.done=%j', a.done, b.done); +console.log('in main, a.done = %j, b.done = %j', a.done, b.done); ``` -When `main.js` loads `a.js`, then `a.js` in turn loads `b.js`. At that -point, `b.js` tries to load `a.js`. In order to prevent an infinite +When `main.js` loads `a.js`, then `a.js` in turn loads `b.js`. At that +point, `b.js` tries to load `a.js`. In order to prevent an infinite loop, an **unfinished copy** of the `a.js` exports object is returned to the -`b.js` module. `b.js` then finishes loading, and its `exports` object is +`b.js` module. `b.js` then finishes loading, and its `exports` object is provided to the `a.js` module. By the time `main.js` has loaded both modules, they're both finished. @@ -296,7 +296,7 @@ in b, a.done = false b done in a, b.done = true a done -in main, a.done=true, b.done=true +in main, a.done = true, b.done = true ``` Careful planning is required to allow cyclic module dependencies to work @@ -314,7 +314,7 @@ required filename with the added extensions: `.js`, `.json`, and finally parsed as JSON text files. `.node` files are interpreted as compiled addon modules loaded with `dlopen`. -A required module prefixed with `'/'` is an absolute path to the file. For +A required module prefixed with `'/'` is an absolute path to the file. For example, `require('/home/marco/foo.js')` will load the file at `/home/marco/foo.js`. @@ -338,7 +338,7 @@ There are three ways in which a folder may be passed to `require()` as an argument. The first is to create a `package.json` file in the root of the folder, -which specifies a `main` module. An example package.json file might +which specifies a `main` module. An example package.json file might look like this: ```json @@ -352,7 +352,7 @@ If this was in a folder at `./some-library`, then This is the extent of Node.js's awareness of package.json files. -If the file specified by the `"main"` entry of `package.json` is missing and +If the file specified by the `'main'` entry of `package.json` is missing and can not be resolved, Node.js will report the entire module as missing with the default error: @@ -362,7 +362,7 @@ Error: Cannot find module 'some-library' If there is no package.json file present in the directory, then Node.js will attempt to load an `index.js` or `index.node` file out of that -directory. For example, if there was no package.json file in the above +directory. For example, if there was no package.json file in the above example, then `require('./some-library')` would attempt to load: * `./some-library/index.js` @@ -415,7 +415,7 @@ varying paths before the current [module resolution][] algorithm was frozen. `NODE_PATH` is still supported, but is less necessary now that the Node.js ecosystem has settled on a convention for locating dependent modules. Sometimes deployments that rely on `NODE_PATH` show surprising behavior -when people are unaware that `NODE_PATH` must be set. Sometimes a +when people are unaware that `NODE_PATH` must be set. Sometimes a module's dependencies change, causing a different version (or even a different module) to be loaded as the `NODE_PATH` is searched. @@ -583,14 +583,14 @@ Process files with the extension `.sjs` as `.js`: require.extensions['.sjs'] = require.extensions['.js']; ``` -**Deprecated** In the past, this list has been used to load +**Deprecated** In the past, this list has been used to load non-JavaScript modules into Node.js by compiling them on-demand. However, in practice, there are much better ways to do this, such as loading modules via some other Node.js program, or compiling them to JavaScript ahead of time. Since the module system is locked, this feature will probably never go -away. However, it may have subtle bugs and complexities that are best +away. However, it may have subtle bugs and complexities that are best left untouched. Note that the number of file system operations that the module system @@ -680,7 +680,7 @@ added: v0.1.16 * {Object} In each module, the `module` free variable is a reference to the object -representing the current module. For convenience, `module.exports` is +representing the current module. For convenience, `module.exports` is also accessible via the `exports` module-global. `module` is not actually a global but rather local to each module. @@ -731,7 +731,7 @@ a.on('ready', () => { Note that assignment to `module.exports` must be done immediately. It cannot be -done in any callbacks. This does not work: +done in any callbacks. This does not work: x.js: @@ -811,7 +811,7 @@ added: v0.1.16 * {string} -The identifier for the module. Typically this is the fully resolved +The identifier for the module. Typically this is the fully resolved filename. ### module.loaded @@ -867,7 +867,7 @@ added: v0.3.7 * {Object} Provides general utility methods when interacting with instances of -`Module` -- the `module` variable often seen in file modules. Accessed +`Module` — the `module` variable often seen in file modules. Accessed via `require('module')`. ### module.builtinModules diff --git a/doc/api/n-api.md b/doc/api/n-api.md index ad2ea435a7d795..a2589673eb0bdf 100644 --- a/doc/api/n-api.md +++ b/doc/api/n-api.md @@ -14,7 +14,7 @@ compiled for one version to run on later versions of Node.js without recompilation. Addons are built/packaged with the same approach/tools -outlined in the section titled [C++ Addons](addons.html). +outlined in the section titled [C++ Addons](addons.html). The only difference is the set of APIs that are used by the native code. Instead of using the V8 or [Native Abstractions for Node.js][] APIs, the functions available in the N-API are used. @@ -310,7 +310,7 @@ where the native code can catch the exception, take the appropriate action, and then continue. This is only recommended in specific cases where it is known that the exception can be safely handled. In these cases [`napi_get_and_clear_last_exception`][] can be used to get and -clear the exception. On success, result will contain the handle to +clear the exception. On success, result will contain the handle to the last JavaScript Object thrown. If it is determined, after retrieving the exception, the exception cannot be handled after all it can be re-thrown it with [`napi_throw`][] where error is the @@ -318,7 +318,7 @@ JavaScript Error object to be thrown. The following utility functions are also available in case native code needs to throw an exception or determine if a `napi_value` is an instance -of a JavaScript `Error` object: [`napi_throw_error`][], +of a JavaScript `Error` object: [`napi_throw_error`][], [`napi_throw_type_error`][], [`napi_throw_range_error`][] and [`napi_is_error`][]. @@ -329,7 +329,7 @@ where result is the napi_value that refers to the newly created JavaScript Error object. The Node.js project is adding error codes to all of the errors -generated internally. The goal is for applications to use these +generated internally. The goal is for applications to use these error codes for all error checking. The associated error messages will remain, but will only be meant to be used for logging and display with the expectation that the message can change without @@ -337,7 +337,7 @@ SemVer applying. In order to support this model with N-API, both in internal functionality and for module specific functionality (as its good practice), the `throw_` and `create_` functions take an optional code parameter which is the string for the code -to be added to the error object. If the optional parameter is NULL +to be added to the error object. If the optional parameter is NULL then no code will be associated with the error. If a code is provided, the name associated with the error is also updated to be: @@ -346,7 +346,7 @@ originalName [code] ``` where originalName is the original name associated with the error -and code is the code that was provided. For example if the code +and code is the code that was provided. For example if the code is 'ERR_ERROR_1' and a TypeError is being created the name will be: ```text @@ -2409,7 +2409,7 @@ They can be one or more of the following bitflags: - `napi_default` - Used to indicate that no explicit attributes are set on the given property. By default, a property is read only, not enumerable and not configurable. -- `napi_writable` - Used to indicate that a given property is writable. +- `napi_writable` - Used to indicate that a given property is writable. - `napi_enumerable` - Used to indicate that a given property is enumerable. - `napi_configurable` - Used to indicate that a given property is configurable, as defined in [Section 6.1.7.1][] of the [ECMAScript Language Specification][]. @@ -2439,7 +2439,7 @@ typedef struct { encoded as UTF8. One of `utf8name` or `name` must be provided for the property. - `name`: Optional napi_value that points to a JavaScript string or symbol -to be used as the key for the property. One of `utf8name` or `name` must +to be used as the key for the property. One of `utf8name` or `name` must be provided for the property. - `value`: The value that's retrieved by a get access of the property if the property is a data property. If this is passed in, set `getter`, `setter`, @@ -2891,7 +2891,7 @@ napi_value Init(napi_env env, napi_value exports) { napi_status status; napi_value fn; - status = napi_create_function(env, NULL, 0, SayHello, NULL, &fn); + status = napi_create_function(env, NULL, 0, SayHello, NULL, &fn); if (status != napi_ok) return NULL; status = napi_set_named_property(env, exports, "sayHello", fn); @@ -3259,7 +3259,7 @@ napi_status napi_queue_async_work(napi_env env, napi_async_work work); ``` -[`napi_cancel_async_work`][] can be used if the work needs +[`napi_cancel_async_work`][] can be used if the work needs to be cancelled before the work has started execution. After calling [`napi_cancel_async_work`][], the `complete` callback @@ -3364,7 +3364,7 @@ napi_status napi_cancel_async_work(napi_env env, Returns `napi_ok` if the API succeeded. This API cancels queued work if it has not yet -been started. If it has already started executing, it cannot be +been started. If it has already started executing, it cannot be cancelled and `napi_generic_failure` will be returned. If successful, the `complete` callback will be invoked with a status value of `napi_cancelled`. The work should not be deleted before the `complete` @@ -3481,7 +3481,7 @@ from [`napi_async_init`][]. There are cases (for example resolving promises) where it is necessary to have the equivalent of the scope associated with a callback -in place when making certain N-API calls. If there is no other script on +in place when making certain N-API calls. If there is no other script on the stack the [`napi_open_callback_scope`][] and [`napi_close_callback_scope`][] functions can be used to open/close the required scope. @@ -3544,7 +3544,7 @@ napi_status napi_get_version(napi_env env, Returns `napi_ok` if the API succeeded. This API returns the highest N-API version supported by the -Node.js runtime. N-API is planned to be additive such that +Node.js runtime. N-API is planned to be additive such that newer releases of Node.js may support additional API functions. In order to allow an addon to use a newer function when running with versions of Node.js that support it, while providing diff --git a/doc/api/net.md b/doc/api/net.md index 4b3897f61cb757..723582f5dc4d90 100644 --- a/doc/api/net.md +++ b/doc/api/net.md @@ -186,8 +186,8 @@ Possible signatures: * [`server.listen([port][, host][, backlog][, callback])`][`server.listen(port, host)`] for TCP servers -This function is asynchronous. When the server starts listening, the -[`'listening'`][] event will be emitted. The last parameter `callback` +This function is asynchronous. When the server starts listening, the +[`'listening'`][] event will be emitted. The last parameter `callback` will be added as a listener for the [`'listening'`][] event. All `listen()` methods can take a `backlog` parameter to specify the maximum @@ -418,8 +418,8 @@ added: v0.1.90 * {Buffer} -Emitted when data is received. The argument `data` will be a `Buffer` or -`String`. Encoding of data is set by [`socket.setEncoding()`][]. +Emitted when data is received. The argument `data` will be a `Buffer` or +`String`. Encoding of data is set by [`socket.setEncoding()`][]. Note that the **data will be lost** if there is no listener when a `Socket` emits a `'data'` event. @@ -456,7 +456,7 @@ added: v0.1.90 * {Error} -Emitted when an error occurs. The `'close'` event will be called directly +Emitted when an error occurs. The `'close'` event will be called directly following this event. ### Event: 'lookup' @@ -471,9 +471,9 @@ changes: Emitted after resolving the hostname but before connecting. Not applicable to UNIX sockets. -* `err` {Error|null} The error object. See [`dns.lookup()`][]. +* `err` {Error|null} The error object. See [`dns.lookup()`][]. * `address` {string} The IP address. -* `family` {string|null} The address type. See [`dns.lookup()`][]. +* `family` {string|null} The address type. See [`dns.lookup()`][]. * `host` {string} The hostname. ### Event: 'timeout' @@ -819,7 +819,7 @@ added: v0.1.90 --> Sends data on the socket. The second parameter specifies the encoding in the -case of a string--it defaults to UTF8 encoding. +case of a string — it defaults to UTF8 encoding. Returns `true` if the entire data was flushed successfully to the kernel buffer. Returns `false` if all or part of the data was queued in user memory. diff --git a/doc/api/os.md b/doc/api/os.md index 40dd948d730ffb..d9015d5a0aa7a6 100644 --- a/doc/api/os.md +++ b/doc/api/os.md @@ -223,7 +223,7 @@ The `os.loadavg()` method returns an array containing the 1, 5, and 15 minute load averages. The load average is a measure of system activity, calculated by the operating -system and expressed as a fractional number. As a rule of thumb, the load +system and expressed as a fractional number. As a rule of thumb, the load average should ideally be less than the number of logical CPUs in the system. The load average is a UNIX-specific concept with no real equivalent on @@ -402,7 +402,7 @@ added: v6.0.0 * Returns: {Object} The `os.userInfo()` method returns information about the currently effective -user -- on POSIX platforms, this is typically a subset of the password file. The +user — on POSIX platforms, this is typically a subset of the password file. The returned object includes the `username`, `uid`, `gid`, `shell`, and `homedir`. On Windows, the `uid` and `gid` fields are `-1`, and `shell` is `null`. diff --git a/doc/api/path.md b/doc/api/path.md index 5cfeba53402e10..51202195f3c590 100644 --- a/doc/api/path.md +++ b/doc/api/path.md @@ -159,7 +159,7 @@ changes: The `path.extname()` method returns the extension of the `path`, from the last occurrence of the `.` (period) character to end of string in the last portion of -the `path`. If there is no `.` in the last portion of the `path`, or if the +the `path`. If there is no `.` in the last portion of the `path`, or if the first character of the basename of `path` (see `path.basename()`) is `.`, then an empty string is returned. @@ -388,7 +388,7 @@ path.parse('/home/user/dir/file.txt'); │ root │ │ name │ ext │ " / home/user/dir / file .txt " └──────┴──────────────┴──────┴─────┘ -(all spaces in the "" line should be ignored -- they are purely for formatting) +(all spaces in the "" line should be ignored — they are purely for formatting) ``` On Windows: @@ -410,7 +410,7 @@ path.parse('C:\\path\\dir\\file.txt'); │ root │ │ name │ ext │ " C:\ path\dir \ file .txt " └──────┴──────────────┴──────┴─────┘ -(all spaces in the "" line should be ignored -- they are purely for formatting) +(all spaces in the "" line should be ignored — they are purely for formatting) ``` A [`TypeError`][] is thrown if `path` is not a string. diff --git a/doc/api/process.md b/doc/api/process.md index 0f5c6f8b65a41c..6435c4d727ee23 100644 --- a/doc/api/process.md +++ b/doc/api/process.md @@ -185,7 +185,7 @@ process will exit with a non-zero exit code and the stack trace will be printed. This is to avoid infinite recursion. Attempting to resume normally after an uncaught exception can be similar to -pulling out of the power cord when upgrading a computer -- nine out of ten +pulling out of the power cord when upgrading a computer — nine out of ten times nothing happens - but the 10th time, the system becomes corrupted. The correct use of `'uncaughtException'` is to perform synchronous cleanup @@ -207,11 +207,11 @@ changes: description: Not handling Promise rejections has been deprecated. - version: v6.6.0 pr-url: https://github.com/nodejs/node/pull/8223 - description: Unhandled Promise rejections have been will now emit + description: Unhandled Promise rejections will now emit a process warning. --> -The `'unhandledRejection`' event is emitted whenever a `Promise` is rejected and +The `'unhandledRejection'` event is emitted whenever a `Promise` is rejected and no error handler is attached to the promise within a turn of the event loop. When programming with Promises, exceptions are encapsulated as "rejected promises". Rejections can be caught and handled using [`promise.catch()`][] and @@ -353,7 +353,7 @@ The name of each event will be the uppercase common name for the signal (e.g. process.stdin.resume(); process.on('SIGINT', () => { - console.log('Received SIGINT. Press Control-D to exit.'); + console.log('Received SIGINT. Press Control-D to exit.'); }); // Using a single function to handle multiple signals @@ -366,7 +366,7 @@ process.on('SIGTERM', handle); ``` * `SIGUSR1` is reserved by Node.js to start the [debugger][]. It's possible to - install a listener but doing so will _not_ stop the debugger from starting. + install a listener but doing so might interfere with the debugger. * `SIGTERM` and `SIGINT` have default handlers on non-Windows platforms that reset the terminal mode before exiting with code `128 + signal number`. If one of these signals has a listener installed, its default behavior will be @@ -437,7 +437,7 @@ added: v0.1.27 The `process.argv` property returns an array containing the command line arguments passed when the Node.js process was launched. The first element will be [`process.execPath`]. See `process.argv0` if access to the original value of -`argv[0]` is needed. The second element will be the path to the JavaScript +`argv[0]` is needed. The second element will be the path to the JavaScript file being executed. The remaining elements will be any additional command line arguments. @@ -971,7 +971,7 @@ added: v0.1.13 The `process.exit()` method instructs Node.js to terminate the process synchronously with an exit status of `code`. If `code` is omitted, exit uses either the 'success' code `0` or the value of `process.exitCode` if it has been -set. Node.js will not terminate until all the [`'exit'`] event listeners are +set. Node.js will not terminate until all the [`'exit'`] event listeners are called. To exit with a 'failure' code: @@ -1223,7 +1223,7 @@ Windows platforms will throw an error if the `pid` is used to kill a process group. Even though the name of this function is `process.kill()`, it is really just a -signal sender, like the `kill` system call. The signal sent may do something +signal sender, like the `kill` system call. The signal sent may do something other than kill the target process. ```js @@ -1321,7 +1321,7 @@ Once the current turn of the event loop turn runs to completion, all callbacks currently in the next tick queue will be called. This is *not* a simple alias to [`setTimeout(fn, 0)`][]. It is much more -efficient. It runs before any additional I/O events (including +efficient. It runs before any additional I/O events (including timers) fire in subsequent ticks of the event loop. ```js @@ -1356,7 +1356,7 @@ thing.getReadyForStuff(); ``` It is very important for APIs to be either 100% synchronous or 100% -asynchronous. Consider this example: +asynchronous. Consider this example: ```js // WARNING! DO NOT USE! BAD UNSAFE HAZARD! @@ -1398,7 +1398,7 @@ function definitelyAsync(arg, cb) { ``` The next tick queue is completely drained on each pass of the event loop -**before** additional I/O is processed. As a result, recursively setting +**before** additional I/O is processed. As a result, recursively setting nextTick callbacks will block any I/O from happening, just like a `while(true);` loop. @@ -1500,7 +1500,7 @@ tarball. builds of Node.js and will be missing on all other platforms._ * `lts` {string} a string label identifying the [LTS][] label for this release. This property only exists for LTS releases and is `undefined` for all other - release types, including _Current_ releases. Currently the valid values are: + release types, including _Current_ releases. Currently the valid values are: - `'Argon'` for the 4.x LTS line beginning with 4.2.0. - `'Boron'` for the 6.x LTS line beginning with 6.9.0. - `'Carbon'` for the 8.x LTS line beginning with 8.9.1. @@ -1578,7 +1578,7 @@ added: v2.0.0 The `process.seteuid()` method sets the effective user identity of the process. (See seteuid(2).) The `id` can be passed as either a numeric ID or a username -string. If a username is specified, the method blocks while resolving the +string. If a username is specified, the method blocks while resolving the associated numeric ID. ```js @@ -1604,7 +1604,7 @@ added: v0.1.31 * `id` {string|number} The group name or ID The `process.setgid()` method sets the group identity of the process. (See -setgid(2).) The `id` can be passed as either a numeric ID or a group name +setgid(2).) The `id` can be passed as either a numeric ID or a group name string. If a group name is specified, this method blocks while resolving the associated numeric ID. @@ -1645,7 +1645,7 @@ added: v0.1.28 --> The `process.setuid(id)` method sets the user identity of the process. (See -setuid(2).) The `id` can be passed as either a numeric ID or a username string. +setuid(2).) The `id` can be passed as either a numeric ID or a username string. If a username is specified, the method blocks while resolving the associated numeric ID. @@ -1826,7 +1826,7 @@ When a new value is assigned, different platforms will impose different maximum length restrictions on the title. Usually such restrictions are quite limited. For instance, on Linux and macOS, `process.title` is limited to the size of the binary name plus the length of the command line arguments because setting the -`process.title` overwrites the `argv` memory of the process. Node.js v0.8 +`process.title` overwrites the `argv` memory of the process. Node.js v0.8 allowed for longer process title strings by also overwriting the `environ` memory but that was potentially insecure and confusing in some (rather obscure) cases. @@ -1937,7 +1937,7 @@ Will generate an object similar to: ## Exit Codes Node.js will normally exit with a `0` status code when no more async -operations are pending. The following status codes are used in other +operations are pending. The following status codes are used in other cases: * `1` **Uncaught Fatal Exception** - There was an uncaught exception, @@ -1945,12 +1945,12 @@ cases: handler. * `2` - Unused (reserved by Bash for builtin misuse) * `3` **Internal JavaScript Parse Error** - The JavaScript source code - internal in Node.js's bootstrapping process caused a parse error. This + internal in Node.js's bootstrapping process caused a parse error. This is extremely rare, and generally can only happen during development of Node.js itself. * `4` **Internal JavaScript Evaluation Failure** - The JavaScript source code internal in Node.js's bootstrapping process failed to - return a function value when evaluated. This is extremely rare, and + return a function value when evaluated. This is extremely rare, and generally can only happen during development of Node.js itself. * `5` **Fatal Error** - There was a fatal unrecoverable error in V8. Typically a message will be printed to stderr with the prefix `FATAL @@ -1960,22 +1960,22 @@ cases: function was somehow set to a non-function, and could not be called. * `7` **Internal Exception Handler Run-Time Failure** - There was an uncaught exception, and the internal fatal exception handler - function itself threw an error while attempting to handle it. This + function itself threw an error while attempting to handle it. This can happen, for example, if a [`'uncaughtException'`][] or `domain.on('error')` handler throws an error. -* `8` - Unused. In previous versions of Node.js, exit code 8 sometimes +* `8` - Unused. In previous versions of Node.js, exit code 8 sometimes indicated an uncaught exception. * `9` - **Invalid Argument** - Either an unknown option was specified, or an option requiring a value was provided without a value. * `10` **Internal JavaScript Run-Time Failure** - The JavaScript source code internal in Node.js's bootstrapping process threw an error - when the bootstrapping function was called. This is extremely rare, + when the bootstrapping function was called. This is extremely rare, and generally can only happen during development of Node.js itself. * `12` **Invalid Debug Argument** - The `--inspect` and/or `--inspect-brk` options were set, but the port number chosen was invalid or unavailable. * `>128` **Signal Exits** - If Node.js receives a fatal signal such as `SIGKILL` or `SIGHUP`, then its exit code will be `128` plus the - value of the signal code. This is a standard POSIX practice, since + value of the signal code. This is a standard POSIX practice, since exit codes are defined to be 7-bit integers, and signal exits set the high-order bit, and then contain the value of the signal code. For example, signal `SIGABRT` has value `6`, so the expected exit diff --git a/doc/api/readline.md b/doc/api/readline.md index 4a9d6437af5a1c..548a51f4c571fe 100644 --- a/doc/api/readline.md +++ b/doc/api/readline.md @@ -285,7 +285,7 @@ added: v0.1.98 * `shift` {boolean} `true` to indicate the `` key. * `name` {string} The name of the a key. -The `rl.write()` method will write either `data` or a key sequence identified +The `rl.write()` method will write either `data` or a key sequence identified by `key` to the `output`. The `key` argument is supported only if `output` is a [TTY][] text terminal. diff --git a/doc/api/repl.md b/doc/api/repl.md index a93d438d8c3cfb..b1469bdcda455f 100644 --- a/doc/api/repl.md +++ b/doc/api/repl.md @@ -438,8 +438,8 @@ changes: Defaults to checking the value of the `isTTY` property on the `output` stream upon instantiation. * `eval` {Function} The function to be used when evaluating each given line - of input. Defaults to an async wrapper for the JavaScript `eval()` - function. An `eval` function can error with `repl.Recoverable` to indicate + of input. **Default:** an async wrapper for the JavaScript `eval()` + function. An `eval` function can error with `repl.Recoverable` to indicate the input was incomplete and prompt for additional lines. * `useColors` {boolean} If `true`, specifies that the default `writer` function should include ANSI color styling to REPL output. If a custom @@ -508,7 +508,7 @@ environment variables: - `NODE_REPL_HISTORY` - When a valid path is given, persistent REPL history will be saved to the specified file rather than `.node_repl_history` in the - user's home directory. Setting this value to `""` will disable persistent + user's home directory. Setting this value to `''` will disable persistent REPL history. Whitespace will be trimmed from the value. - `NODE_REPL_HISTORY_SIZE` - Defaults to `1000`. Controls how many lines of history will be persisted if history is available. Must be a positive number. @@ -521,7 +521,7 @@ environment variables: By default, the Node.js REPL will persist history between `node` REPL sessions by saving inputs to a `.node_repl_history` file located in the user's home directory. This can be disabled by setting the environment variable -`NODE_REPL_HISTORY=""`. +`NODE_REPL_HISTORY=''`. #### NODE_REPL_HISTORY_FILE Returns the bound address, the address family name, and port of the -server as reported by the operating system. See [`net.Server.address()`][] for +server as reported by the operating system. See [`net.Server.address()`][] for more information. ### server.close([callback]) @@ -637,7 +637,7 @@ For example: raw: < RAW DER buffer >, pubkey: < RAW DER buffer >, valid_from: 'Nov 11 09:52:22 2009 GMT', - valid_to: 'Nov 6 09:52:22 2029 GMT', + valid_to: 'Nov 6 09:52:22 2029 GMT', fingerprint: '2A:7A:C2:DD:E5:F9:CC:53:72:35:99:7A:02:5A:71:38:52:EC:8A:DF', fingerprint256: '2A:7A:C2:DD:E5:F9:CC:53:72:35:99:7A:02:5A:71:38:52:EC:8A:DF:00:11:22:33:44:55:66:77:88:99:AA:BB', serialNumber: 'B9B0D332A1AA5635' } @@ -860,7 +860,7 @@ changes: rather than creating a new socket. Typically, this is an instance of [`net.Socket`][], but any `Duplex` stream is allowed. If this option is specified, `path`, `host` and `port` are ignored, - except for certificate validation. Usually, a socket is already connected + except for certificate validation. Usually, a socket is already connected when passed to `tls.connect()`, but it can be connected later. Note that connection/disconnection/destruction of `socket` is the user's responsibility, calling `tls.connect()` will not cause `net.connect()` to be @@ -1024,7 +1024,7 @@ changes: it is not. * `key` {string|string[]|Buffer|Buffer[]|Object[]} Optional private keys in PEM format. PEM allows the option of private keys being encrypted. Encrypted - keys will be decrypted with `options.passphrase`. Multiple keys using + keys will be decrypted with `options.passphrase`. Multiple keys using different algorithms can be provided either as an array of unencrypted key strings or buffers, or an array of objects in the form `{pem: [, passphrase: ]}`. The object form can only occur in @@ -1037,7 +1037,7 @@ changes: consist of the PEM formatted certificate for a provided private `key`, followed by the PEM formatted intermediate certificates (if any), in order, and not including the root CA (the root CA must be pre-known to the peer, - see `ca`). When providing multiple cert chains, they do not have to be in + see `ca`). When providing multiple cert chains, they do not have to be in the same order as their private keys in `key`. If the intermediate certificates are not provided, the peer will not be able to validate the certificate, and the handshake will fail. @@ -1047,7 +1047,7 @@ changes: using this option. The value can be a string or Buffer, or an Array of strings and/or Buffers. Any string or Buffer can contain multiple PEM CAs concatenated together. The peer's certificate must be chainable to a CA - trusted by the server for the connection to be authenticated. When using + trusted by the server for the connection to be authenticated. When using certificates that are not chainable to a well-known CA, the certificate's CA must be explicitly specified as a trusted or the connection will fail to authenticate. @@ -1057,7 +1057,7 @@ changes: For self-signed certificates, the certificate is its own CA, and must be provided. * `ciphers` {string} Optional cipher suite specification, replacing the - default. For more information, see [modifying the default cipher suite][]. + default. For more information, see [modifying the default cipher suite][]. * `honorCipherOrder` {boolean} Attempt to use the server's cipher suite preferences instead of the client's. When `true`, causes `SSL_OP_CIPHER_SERVER_PREFERENCE` to be set in `secureOptions`, see @@ -1169,7 +1169,7 @@ changes: servers, the identity options (`pfx` or `key`/`cert`) are usually required. * `secureConnectionListener` {Function} -Creates a new [tls.Server][]. The `secureConnectionListener`, if provided, is +Creates a new [tls.Server][]. The `secureConnectionListener`, if provided, is automatically set as a listener for the [`'secureConnection'`][] event. The `ticketKeys` options is automatically shared between `cluster` module diff --git a/doc/api/url.md b/doc/api/url.md index 513cb559b065c6..557b1a9dd81a9f 100644 --- a/doc/api/url.md +++ b/doc/api/url.md @@ -51,7 +51,7 @@ WHATWG URL's `origin` property includes `protocol` and `host`, but not ├─────────────┴─────────────────────┴─────────────────────┴──────────┴────────────────┴───────┤ │ href │ └─────────────────────────────────────────────────────────────────────────────────────────────┘ -(all spaces in the "" line should be ignored -- they are purely for formatting) +(all spaces in the "" line should be ignored — they are purely for formatting) ``` Parsing the URL string using the WHATWG API: @@ -556,7 +556,7 @@ Instantiate a new `URLSearchParams` object with an iterable map in a way that is similar to [`Map`][]'s constructor. `iterable` can be an Array or any iterable object. That means `iterable` can be another `URLSearchParams`, in which case the constructor will simply create a clone of the provided -`URLSearchParams`. Elements of `iterable` are key-value pairs, and can +`URLSearchParams`. Elements of `iterable` are key-value pairs, and can themselves be any iterable object. Duplicate keys are allowed. diff --git a/doc/api/util.md b/doc/api/util.md index c3f25ea70a28cb..366da705ecb817 100644 --- a/doc/api/util.md +++ b/doc/api/util.md @@ -78,9 +78,9 @@ added: v0.11.3 The `util.debuglog()` method is used to create a function that conditionally writes debug messages to `stderr` based on the existence of the `NODE_DEBUG` -environment variable. If the `section` name appears within the value of that +environment variable. If the `section` name appears within the value of that environment variable, then the returned function operates similar to -[`console.error()`][]. If not, then the returned function is a no-op. +[`console.error()`][]. If not, then the returned function is a no-op. ```js const util = require('util'); @@ -96,7 +96,7 @@ it will output something like: FOO 3245: hello from foo [123] ``` -where `3245` is the process id. If it is not run with that +where `3245` is the process id. If it is not run with that environment variable set, then it will not print anything. The `section` supports wildcard also: @@ -178,7 +178,7 @@ corresponding argument. Supported placeholders are: * `%d` - Number (integer or floating point value). * `%i` - Integer. * `%f` - Floating point value. -* `%j` - JSON. Replaced with the string `'[Circular]'` if the argument +* `%j` - JSON. Replaced with the string `'[Circular]'` if the argument contains circular references. * `%o` - Object. A string representation of an object with generic JavaScript object formatting. @@ -266,7 +266,7 @@ that the two styles are [semantically incompatible][]. * `constructor` {Function} * `superConstructor` {Function} -Inherit the prototype methods from one [constructor][] into another. The +Inherit the prototype methods from one [constructor][] into another. The prototype of `constructor` will be set to a new object created from `superConstructor`. diff --git a/doc/api/v8.md b/doc/api/v8.md index 12fc9e6c71aac5..e9d3d15d014507 100644 --- a/doc/api/v8.md +++ b/doc/api/v8.md @@ -141,7 +141,7 @@ after the VM has started may result in unpredictable behavior, including crashes and data loss; or it may simply do nothing. The V8 options available for a version of Node.js may be determined by running -`node --v8-options`. An unofficial, community-maintained list of options +`node --v8-options`. An unofficial, community-maintained list of options and their effects is available [here][]. Usage: diff --git a/doc/api/vm.md b/doc/api/vm.md index 1358325fb1a3da..f350acf1bca06c 100644 --- a/doc/api/vm.md +++ b/doc/api/vm.md @@ -267,7 +267,7 @@ The URL of the current module, as set in the constructor. will be thrown. * `breakOnSigint` {boolean} If `true`, the execution will be terminated when `SIGINT` (Ctrl+C) is received. Existing handlers for the event that have - been attached via `process.on("SIGINT")` will be disabled during script + been attached via `process.on('SIGINT')` will be disabled during script execution, but will continue to work after that. If execution is interrupted, an [`Error`][] will be thrown. * Returns: {Promise} @@ -428,7 +428,7 @@ changes: will be thrown. * `breakOnSigint`: if `true`, the execution will be terminated when `SIGINT` (Ctrl+C) is received. Existing handlers for the - event that have been attached via `process.on("SIGINT")` will be disabled + event that have been attached via `process.on('SIGINT')` will be disabled during script execution, but will continue to work after that. If execution is terminated, an [`Error`][] will be thrown. @@ -856,7 +856,7 @@ associating it with the `sandbox` object is what this document refers to as [`eval()`]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/eval [`script.runInContext()`]: #vm_script_runincontext_contextifiedsandbox_options [`script.runInThisContext()`]: #vm_script_runinthiscontext_options -[`url.origin`]: https://nodejs.org/api/url.html#url_url_origin +[`url.origin`]: url.html#url_url_origin [`vm.createContext()`]: #vm_vm_createcontext_sandbox_options [`vm.runInContext()`]: #vm_vm_runincontext_code_contextifiedsandbox_options [`vm.runInThisContext()`]: #vm_vm_runinthiscontext_code_options @@ -865,10 +865,10 @@ associating it with the `sandbox` object is what this document refers to as [Evaluate() concrete method]: https://tc39.github.io/ecma262/#sec-moduleevaluation [HostResolveImportedModule]: https://tc39.github.io/ecma262/#sec-hostresolveimportedmodule [Instantiate() concrete method]: https://tc39.github.io/ecma262/#sec-moduledeclarationinstantiation +[Source Text Module Record]: https://tc39.github.io/ecma262/#sec-source-text-module-records [V8 Embedder's Guide]: https://github.com/v8/v8/wiki/Embedder's%20Guide#contexts [command line option]: cli.html [contextified]: #vm_what_does_it_mean_to_contextify_an_object [global object]: https://es5.github.io/#x15.1 [indirect `eval()` call]: https://es5.github.io/#x10.4.2 [origin]: https://developer.mozilla.org/en-US/docs/Glossary/Origin -[Source Text Module Record]: https://tc39.github.io/ecma262/#sec-source-text-module-records diff --git a/doc/api/zlib.md b/doc/api/zlib.md index 5c02c3a409dbb1..3ba0ab66059c8c 100644 --- a/doc/api/zlib.md +++ b/doc/api/zlib.md @@ -165,7 +165,7 @@ The memory requirements for deflate are (in bytes): (1 << (windowBits + 2)) + (1 << (memLevel + 9)) ``` -That is: 128K for windowBits=15 + 128K for memLevel = 8 +That is: 128K for windowBits = 15 + 128K for memLevel = 8 (default values) plus a few kilobytes for small objects. For example, to reduce the default memory requirements from 256K to 128K, the @@ -178,20 +178,20 @@ const options = { windowBits: 14, memLevel: 7 }; This will, however, generally degrade compression. The memory requirements for inflate are (in bytes) `1 << windowBits`. -That is, 32K for windowBits=15 (default value) plus a few kilobytes +That is, 32K for windowBits = 15 (default value) plus a few kilobytes for small objects. This is in addition to a single internal output slab buffer of size `chunkSize`, which defaults to 16K. The speed of `zlib` compression is affected most dramatically by the -`level` setting. A higher level will result in better compression, but -will take longer to complete. A lower level will result in less +`level` setting. A higher level will result in better compression, but +will take longer to complete. A lower level will result in less compression, but will be much faster. In general, greater memory usage options will mean that Node.js has to make fewer calls to `zlib` because it will be able to process more data on -each `write` operation. So, this is another factor that affects the +each `write` operation. So, this is another factor that affects the speed, at the cost of memory usage. ## Flushing @@ -233,9 +233,9 @@ added: v0.5.8 All of the constants defined in `zlib.h` are also defined on `require('zlib').constants`. In the normal course of operations, it will not be -necessary to use these constants. They are documented so that their presence is +necessary to use these constants. They are documented so that their presence is not surprising. This section is taken almost directly from the -[zlib documentation][]. See for more +[zlib documentation][]. See for more details. Previously, the constants were available directly from `require('zlib')`, for @@ -298,7 +298,7 @@ changes: -Each class takes an `options` object. All options are optional. +Each class takes an `options` object. All options are optional. Note that some options are only relevant when compressing, and are ignored by the decompression classes. diff --git a/doc/changelogs/CHANGELOG_V9.md b/doc/changelogs/CHANGELOG_V9.md index 50cefbb97ba8bd..e8757799e3d93a 100644 --- a/doc/changelogs/CHANGELOG_V9.md +++ b/doc/changelogs/CHANGELOG_V9.md @@ -9,6 +9,7 @@ +9.11.0
    9.10.1
    9.10.0
    9.9.0
    @@ -39,6 +40,106 @@ * [io.js](CHANGELOG_IOJS.md) * [Archive](CHANGELOG_ARCHIVE.md) + +## 2018-04-04, Version 9.11.0 (Current), @MylesBorins prepared by @targos + +### Notable Changes + +* **deps**: + - Updated ICU to 61.1 (Steven R. Loomis) [#19621](https://github.com/nodejs/node/pull/19621) + Includes CLDR 33 (many new languages and data improvements). +* **fs**: + - Emit 'ready' event for `ReadStream` and `WriteStream` (Sameer Srivastava) [#19408](https://github.com/nodejs/node/pull/19408) +* **n-api**: + - Bump version of n-api supported (Michael Dawson) [#19497](https://github.com/nodejs/node/pull/19497) +* **net**: + - Emit 'ready' event for `Socket` (Sameer Srivastava) [#19408](https://github.com/nodejs/node/pull/19408) +* **Added new collaborators** + - [mafintosh](https://github.com/mafintosh) Mathias Buus + +### Commits + +* [[`0bd78dc391`](https://github.com/nodejs/node/commit/0bd78dc391)] - **buffer**: use v8::TypedArray::kMaxLength as buffer::kMaxLength (Joyee Cheung) [#19738](https://github.com/nodejs/node/pull/19738) +* [[`54b84f3c26`](https://github.com/nodejs/node/commit/54b84f3c26)] - **buffer**: remove "new" from deprecation message (Rich Trott) [#19687](https://github.com/nodejs/node/pull/19687) +* [[`0127712cf5`](https://github.com/nodejs/node/commit/0127712cf5)] - **build**: introduce make jstest (Joyee Cheung) [#19324](https://github.com/nodejs/node/pull/19324) +* [[`58f61dbf8e`](https://github.com/nodejs/node/commit/58f61dbf8e)] - **deps**: ICU 61.1 bump (Steven R. Loomis) [#19621](https://github.com/nodejs/node/pull/19621) +* [[`97a92c4973`](https://github.com/nodejs/node/commit/97a92c4973)] - **deps**: turn in std::string for ICU (Steven R. Loomis) [#19624](https://github.com/nodejs/node/pull/19624) +* [[`ae86adc086`](https://github.com/nodejs/node/commit/ae86adc086)] - **doc**: fix various nits (Vse Mozhet Byt) [#19743](https://github.com/nodejs/node/pull/19743) +* [[`041f6cd9c9`](https://github.com/nodejs/node/commit/041f6cd9c9)] - **doc**: improve Buffer.allocUnsafeSlow() and related (Rich Trott) [#19742](https://github.com/nodejs/node/pull/19742) +* [[`42671f24ba`](https://github.com/nodejs/node/commit/42671f24ba)] - **doc**: add mafintosh to collaborators (Mathias Buus) [#19773](https://github.com/nodejs/node/pull/19773) +* [[`c1b83fcbc2`](https://github.com/nodejs/node/commit/c1b83fcbc2)] - **doc**: update to adding listens on SIGUSR1 (willhayslett) [#19709](https://github.com/nodejs/node/pull/19709) +* [[`1aaad92101`](https://github.com/nodejs/node/commit/1aaad92101)] - **doc**: fix lint nits in COLLABORATOR\_GUIDE.md (Vse Mozhet Byt) [#19762](https://github.com/nodejs/node/pull/19762) +* [[`5149e18719`](https://github.com/nodejs/node/commit/5149e18719)] - **doc**: deprecation clarifications (James M Snell) [#19522](https://github.com/nodejs/node/pull/19522) +* [[`c5469bb7a8`](https://github.com/nodejs/node/commit/c5469bb7a8)] - **doc**: remove "if provided" for optional arguments (Rich Trott) [#19690](https://github.com/nodejs/node/pull/19690) +* [[`3a3ae0134d`](https://github.com/nodejs/node/commit/3a3ae0134d)] - **doc**: do not identify string as "JavaScript string" (Rich Trott) [#19689](https://github.com/nodejs/node/pull/19689) +* [[`d111037624`](https://github.com/nodejs/node/commit/d111037624)] - **doc**: favor utf16le over ucs2 in buffer.md (Rich Trott) [#19688](https://github.com/nodejs/node/pull/19688) +* [[`bb32bc8686`](https://github.com/nodejs/node/commit/bb32bc8686)] - **doc**: fix links in vm.md (Vse Mozhet Byt) [#19721](https://github.com/nodejs/node/pull/19721) +* [[`44361bd1c8`](https://github.com/nodejs/node/commit/44361bd1c8)] - **doc**: fix quotes mistypes in inline code blocks (Сковорода Никита Андреевич) [#19713](https://github.com/nodejs/node/pull/19713) +* [[`c8fa8f1f9d`](https://github.com/nodejs/node/commit/c8fa8f1f9d)] - **doc**: remove ES6/ECMAScript 2015 from buffer.md (Rich Trott) [#19685](https://github.com/nodejs/node/pull/19685) +* [[`9f20534889`](https://github.com/nodejs/node/commit/9f20534889)] - **doc**: shorten character encoding introduction (Rich Trott) [#19648](https://github.com/nodejs/node/pull/19648) +* [[`078616109c`](https://github.com/nodejs/node/commit/078616109c)] - **doc**: guard against md list parsing edge case (Vse Mozhet Byt) [#19647](https://github.com/nodejs/node/pull/19647) +* [[`2ea7f90728`](https://github.com/nodejs/node/commit/2ea7f90728)] - **doc**: fix grammar error in process.md (Kenji Okamoto) [#19641](https://github.com/nodejs/node/pull/19641) +* [[`7555deeb8c`](https://github.com/nodejs/node/commit/7555deeb8c)] - **doc**: improve zero-fill-buffers text (Rich Trott) [#19623](https://github.com/nodejs/node/pull/19623) +* [[`5e90fc6a85`](https://github.com/nodejs/node/commit/5e90fc6a85)] - **fs**: use fs.access in fs.exists (Bartosz Sosnowski) [#18618](https://github.com/nodejs/node/pull/18618) +* [[`8a8b43e1b1`](https://github.com/nodejs/node/commit/8a8b43e1b1)] - **(SEMVER-MINOR)** **fs,net**: emit 'ready' for fs streams and sockets (Sameer Srivastava) [#19408](https://github.com/nodejs/node/pull/19408) +* [[`e1f44a6366`](https://github.com/nodejs/node/commit/e1f44a6366)] - **http**: fix `request` when `setHost` is `true` (XadillaX) [#19502](https://github.com/nodejs/node/pull/19502) +* [[`dac5f67e64`](https://github.com/nodejs/node/commit/dac5f67e64)] - **http**: support server options on createServer (Wes Todd) [#19461](https://github.com/nodejs/node/pull/19461) +* [[`2bdf3ca235`](https://github.com/nodejs/node/commit/2bdf3ca235)] - **http2**: callback valid check before closing request (Trivikram) [#19061](https://github.com/nodejs/node/pull/19061) +* [[`7b850a7565`](https://github.com/nodejs/node/commit/7b850a7565)] - **http2**: destroy() stream, upon errnoException (Sarat Addepalli) [#19389](https://github.com/nodejs/node/pull/19389) +* [[`441175c29a`](https://github.com/nodejs/node/commit/441175c29a)] - **http2**: refer to stream errors by name (Anna Henningsen) [#18966](https://github.com/nodejs/node/pull/18966) +* [[`0bcad33c7a`](https://github.com/nodejs/node/commit/0bcad33c7a)] - **inspector**: report client-visible host and port (Eugene Ostroukhov) [#19664](https://github.com/nodejs/node/pull/19664) +* [[`8e440115ec`](https://github.com/nodejs/node/commit/8e440115ec)] - **lib**: add back lib/module.js redirection (Joyee Cheung) [#19177](https://github.com/nodejs/node/pull/19177) +* [[`45c477c2e6`](https://github.com/nodejs/node/commit/45c477c2e6)] - **lib**: restructure cjs and esm loaders (Joyee Cheung) [#19177](https://github.com/nodejs/node/pull/19177) +* [[`152a86c6aa`](https://github.com/nodejs/node/commit/152a86c6aa)] - **lib**: make isStackOverflowError() engine-agnostic (Mike Kaufman) [#19705](https://github.com/nodejs/node/pull/19705) +* [[`889a3b44b3`](https://github.com/nodejs/node/commit/889a3b44b3)] - **lib**: fix a typo in lib/timers "read through" (wangzengdi) [#19666](https://github.com/nodejs/node/pull/19666) +* [[`a45f3f8fd2`](https://github.com/nodejs/node/commit/a45f3f8fd2)] - **lib**: document nextTick queue internals (Anna Henningsen) [#19469](https://github.com/nodejs/node/pull/19469) +* [[`d3d1ee7279`](https://github.com/nodejs/node/commit/d3d1ee7279)] - **lib**: add internal check macros (Gus Caplan) [#18852](https://github.com/nodejs/node/pull/18852) +* [[`e0c7d783e0`](https://github.com/nodejs/node/commit/e0c7d783e0)] - **lint**: change require-buffer rule message (Gus Caplan) [#19701](https://github.com/nodejs/node/pull/19701) +* [[`859b719927`](https://github.com/nodejs/node/commit/859b719927)] - **module**: skip preserveSymlinks for main (Guy Bedford) [#19388](https://github.com/nodejs/node/pull/19388) +* [[`a0a58730e0`](https://github.com/nodejs/node/commit/a0a58730e0)] - **n-api**: back up env before finalize (Gabriel Schulhof) [#19718](https://github.com/nodejs/node/pull/19718) +* [[`b0a3a44ff6`](https://github.com/nodejs/node/commit/b0a3a44ff6)] - **n-api**: ensure in-module exceptions are propagated (Gabriel Schulhof) [#19537](https://github.com/nodejs/node/pull/19537) +* [[`94a10bad3a`](https://github.com/nodejs/node/commit/94a10bad3a)] - **(SEMVER-MINOR)** **n-api**: bump version of n-api supported (Michael Dawson) [#19497](https://github.com/nodejs/node/pull/19497) +* [[`ee4390a167`](https://github.com/nodejs/node/commit/ee4390a167)] - **repl**: fix tab completion of inspector module (Michaël Zasso) [#19505](https://github.com/nodejs/node/pull/19505) +* [[`ebdcf91dcc`](https://github.com/nodejs/node/commit/ebdcf91dcc)] - **src**: put bootstrappers in lib/internal/bootstrap/ (Joyee Cheung) [#19177](https://github.com/nodejs/node/pull/19177) +* [[`ff7a116ba3`](https://github.com/nodejs/node/commit/ff7a116ba3)] - **src**: move internal loaders out of bootstrap\_node.js (Joyee Cheung) [#19112](https://github.com/nodejs/node/pull/19112) +* [[`75d23ab2a0`](https://github.com/nodejs/node/commit/75d23ab2a0)] - **src**: fix warnings in aliased\_buffer (Kyle Farnung) [#19665](https://github.com/nodejs/node/pull/19665) +* [[`01e31906e8`](https://github.com/nodejs/node/commit/01e31906e8)] - **src**: general C++ cleanup in node\_url.cc (Anna Henningsen) [#19598](https://github.com/nodejs/node/pull/19598) +* [[`6c466811d3`](https://github.com/nodejs/node/commit/6c466811d3)] - **src**: name all builtin init functions Initialize (Daniel Bevenius) [#19550](https://github.com/nodejs/node/pull/19550) +* [[`1a38b9bd0f`](https://github.com/nodejs/node/commit/1a38b9bd0f)] - **src**: remove unused 'ares.h' include from env.h (Anna Henningsen) [#19557](https://github.com/nodejs/node/pull/19557) +* [[`cae9ff256b`](https://github.com/nodejs/node/commit/cae9ff256b)] - **src**: fix upcoming V8 deprecation warnings (Sarat Addepalli) [#19490](https://github.com/nodejs/node/pull/19490) +* [[`83ebaf08d9`](https://github.com/nodejs/node/commit/83ebaf08d9)] - **test**: remove NODE\_DEBUG in global module loading test (Joyee Cheung) [#19177](https://github.com/nodejs/node/pull/19177) +* [[`92e9ed09e9`](https://github.com/nodejs/node/commit/92e9ed09e9)] - **test**: test process.setuid for bad argument types (Divyanshu Singh) [#19703](https://github.com/nodejs/node/pull/19703) +* [[`4df3377856`](https://github.com/nodejs/node/commit/4df3377856)] - **test**: update test to comply with lint rule (Rich Trott) [#19784](https://github.com/nodejs/node/pull/19784) +* [[`f379167917`](https://github.com/nodejs/node/commit/f379167917)] - **test**: improve assert message (fatahn) [#19629](https://github.com/nodejs/node/pull/19629) +* [[`46569d644d`](https://github.com/nodejs/node/commit/46569d644d)] - **test**: remove third argument from call to assert.strictEqual() (Forrest Wolf) [#19659](https://github.com/nodejs/node/pull/19659) +* [[`e44b7779d6`](https://github.com/nodejs/node/commit/e44b7779d6)] - **test**: fix flaky test-cluster-send-handle-twice (Rich Trott) [#19700](https://github.com/nodejs/node/pull/19700) +* [[`90c85461ff`](https://github.com/nodejs/node/commit/90c85461ff)] - **test**: rename regression tests more expressively (Ujjwal Sharma) [#19668](https://github.com/nodejs/node/pull/19668) +* [[`ff7f28c4f2`](https://github.com/nodejs/node/commit/ff7f28c4f2)] - **test**: remove 3rd argument from assert.strictEqual (Arian Santrach) [#19707](https://github.com/nodejs/node/pull/19707) +* [[`0b27416516`](https://github.com/nodejs/node/commit/0b27416516)] - **test**: make test-http-expect-continue more strict (Rich Trott) [#19669](https://github.com/nodejs/node/pull/19669) +* [[`94b28aaf07`](https://github.com/nodejs/node/commit/94b28aaf07)] - **test**: use createReadStream instead of ReadStream (Daniel Bevenius) [#19636](https://github.com/nodejs/node/pull/19636) +* [[`7ae2ca4476`](https://github.com/nodejs/node/commit/7ae2ca4476)] - **test**: removed default message from assert.strictEqual (jaspal-yupana) [#19660](https://github.com/nodejs/node/pull/19660) +* [[`a89ba21ab4`](https://github.com/nodejs/node/commit/a89ba21ab4)] - **test**: refactor test-net-dns-error (Luigi Pinca) [#19640](https://github.com/nodejs/node/pull/19640) +* [[`677b613d24`](https://github.com/nodejs/node/commit/677b613d24)] - **test**: fix typo in test-tls-cnnic-whitelist (Daniel Bevenius) [#19662](https://github.com/nodejs/node/pull/19662) +* [[`806bc0d8f7`](https://github.com/nodejs/node/commit/806bc0d8f7)] - **test**: fix assert.throws error in test-http-parser (Rich Trott) [#19626](https://github.com/nodejs/node/pull/19626) +* [[`2f09ee78fb`](https://github.com/nodejs/node/commit/2f09ee78fb)] - **test**: refactor test-http-expect-continue (Rich Trott) [#19625](https://github.com/nodejs/node/pull/19625) +* [[`278e8af7a6`](https://github.com/nodejs/node/commit/278e8af7a6)] - **test**: rename tests with descriptive filenames (Ujjwal Sharma) [#19608](https://github.com/nodejs/node/pull/19608) +* [[`0daa063021`](https://github.com/nodejs/node/commit/0daa063021)] - **test**: amplify and optimize doctool/test-make-doc (Vse Mozhet Byt) [#19581](https://github.com/nodejs/node/pull/19581) +* [[`274eff5376`](https://github.com/nodejs/node/commit/274eff5376)] - **test**: update link according to NIST bibliography (Tobias Nießen) [#19593](https://github.com/nodejs/node/pull/19593) +* [[`21e69d1222`](https://github.com/nodejs/node/commit/21e69d1222)] - **test**: fix test-tty-get-color-depth (Bartosz Sosnowski) [#18478](https://github.com/nodejs/node/pull/18478) +* [[`4caf536b20`](https://github.com/nodejs/node/commit/4caf536b20)] - **test**: http2 stream.respond() error checks (Trivikram) [#18861](https://github.com/nodejs/node/pull/18861) +* [[`ca97be52a2`](https://github.com/nodejs/node/commit/ca97be52a2)] - **test**: fix wrong error classes passed in as type (Ruben Bridgewater) [#13686](https://github.com/nodejs/node/pull/13686) +* [[`44b12c158d`](https://github.com/nodejs/node/commit/44b12c158d)] - **test**: fix common.expectsError (Refael Ackermann) [#13686](https://github.com/nodejs/node/pull/13686) +* [[`cc68bc27f8`](https://github.com/nodejs/node/commit/cc68bc27f8)] - **test**: add more asserts to `test-internal-errors` (Refael Ackermann) [#13686](https://github.com/nodejs/node/pull/13686) +* [[`6bc49f03b9`](https://github.com/nodejs/node/commit/6bc49f03b9)] - **test**: http2 errors on req.close() (Trivikram) [#18854](https://github.com/nodejs/node/pull/18854) +* [[`53d7fbbbf5`](https://github.com/nodejs/node/commit/53d7fbbbf5)] - **tools**: don’t emit illegal utf-8 from icutrim/iculslocs (Steven R. Loomis) [#19756](https://github.com/nodejs/node/pull/19756) +* [[`b80d169e7c`](https://github.com/nodejs/node/commit/b80d169e7c)] - **tools**: apply editorconfig rules to tools also (Tobias Nießen) [#19521](https://github.com/nodejs/node/pull/19521) +* [[`239a036317`](https://github.com/nodejs/node/commit/239a036317)] - **tools**: remove src dir from JS editorconfig rule (Tobias Nießen) [#19521](https://github.com/nodejs/node/pull/19521) +* [[`7043e95fb7`](https://github.com/nodejs/node/commit/7043e95fb7)] - **tools**: dry utility function in tools/doc/json.js (Vse Mozhet Byt) [#19692](https://github.com/nodejs/node/pull/19692) +* [[`140611b2c6`](https://github.com/nodejs/node/commit/140611b2c6)] - **tools**: fix comment nits in tools/doc/\*.js files (Vse Mozhet Byt) [#19696](https://github.com/nodejs/node/pull/19696) +* [[`2c5d53f7cb`](https://github.com/nodejs/node/commit/2c5d53f7cb)] - **tools**: fix nits in tools/doc/type-parser.js (Vse Mozhet Byt) [#19612](https://github.com/nodejs/node/pull/19612) +* [[`fdc51a1331`](https://github.com/nodejs/node/commit/fdc51a1331)] - **url**: remove redundant function (Sergey Golovin) [#19076](https://github.com/nodejs/node/pull/19076) +* [[`99e3c77808`](https://github.com/nodejs/node/commit/99e3c77808)] - **url**: refactor "escapeParam" function to make it common (Sergey Golovin) [#19076](https://github.com/nodejs/node/pull/19076) + ## 2018-03-29, Version 9.10.1 (Current), @MylesBorins diff --git a/lib/.eslintrc.yaml b/lib/.eslintrc.yaml index e87596d4d5c21b..4eebdb6617c101 100644 --- a/lib/.eslintrc.yaml +++ b/lib/.eslintrc.yaml @@ -5,3 +5,18 @@ rules: no-let-in-for-declaration: error lowercase-name-for-primitive: error non-ascii-character: error +globals: + CHECK: false + CHECK_EQ: false + CHECK_GE: false + CHECK_GT: false + CHECK_LE: false + CHECK_LT: false + CHECK_NE: false + DCHECK: false + DCHECK_EQ: false + DCHECK_GE: false + DCHECK_GT: false + DCHECK_LE: false + DCHECK_LT: false + DCHECK_NE: false diff --git a/lib/_http_client.js b/lib/_http_client.js index 7ba4d27e85b1b5..59f7c93acb72c7 100644 --- a/lib/_http_client.js +++ b/lib/_http_client.js @@ -139,7 +139,7 @@ function ClientRequest(options, cb) { var host = options.host = validateHost(options.hostname, 'hostname') || validateHost(options.host, 'host') || 'localhost'; - var setHost = (options.setHost === undefined); + var setHost = (options.setHost === undefined || Boolean(options.setHost)); this.socketPath = options.socketPath; this.timeout = options.timeout; diff --git a/lib/buffer.js b/lib/buffer.js index ddbdbb6d03e531..331eaf76f329cb 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -127,7 +127,7 @@ function alignPool() { var bufferWarn = true; const bufferWarning = 'The Buffer() and new Buffer() constructors are not ' + 'recommended for use due to security and usability ' + - 'concerns. Please use the new Buffer.alloc(), ' + + 'concerns. Please use the Buffer.alloc(), ' + 'Buffer.allocUnsafe(), or Buffer.from() construction ' + 'methods instead.'; diff --git a/lib/domain.js b/lib/domain.js index 08fbd207f171d3..b4d0bea6647a82 100644 --- a/lib/domain.js +++ b/lib/domain.js @@ -30,6 +30,7 @@ const util = require('util'); const EventEmitter = require('events'); const errors = require('internal/errors'); const { createHook } = require('async_hooks'); +const { internalBinding } = require('internal/bootstrap/loaders'); // overwrite process.domain with a getter/setter that will allow for more // effective optimizations diff --git a/lib/fs.js b/lib/fs.js index 231303ec42c620..5d3bae1550e298 100644 --- a/lib/fs.js +++ b/lib/fs.js @@ -323,12 +323,7 @@ fs.accessSync = function(path, mode) { }; fs.exists = function(path, callback) { - if (handleError((path = getPathFromURL(path)), cb)) - return; - if (!nullCheck(path, cb)) return; - var req = new FSReqWrap(); - req.oncomplete = cb; - binding.stat(pathModule.toNamespacedPath(path), req); + fs.access(path, fs.F_OK, cb); function cb(err) { if (callback) callback(err ? false : true); } @@ -345,9 +340,7 @@ Object.defineProperty(fs.exists, internalUtil.promisify.custom, { fs.existsSync = function(path) { try { - handleError((path = getPathFromURL(path))); - nullCheck(path); - binding.stat(pathModule.toNamespacedPath(path)); + fs.accessSync(path, fs.F_OK); return true; } catch (e) { return false; @@ -2076,6 +2069,7 @@ ReadStream.prototype.open = function() { self.fd = fd; self.emit('open', fd); + self.emit('ready'); // start the flow of data. self.read(); }); @@ -2234,6 +2228,7 @@ WriteStream.prototype.open = function() { this.fd = fd; this.emit('open', fd); + this.emit('ready'); }); }; diff --git a/lib/http.js b/lib/http.js index 8431ab0d51ea39..9ed6b3d1de8721 100644 --- a/lib/http.js +++ b/lib/http.js @@ -33,8 +33,8 @@ const { ServerResponse } = require('_http_server'); -function createServer(requestListener) { - return new Server(requestListener); +function createServer(opts, requestListener) { + return new Server(opts, requestListener); } function request(options, cb) { diff --git a/lib/internal/bootstrap/loaders.js b/lib/internal/bootstrap/loaders.js new file mode 100644 index 00000000000000..e6d30e2aa919f6 --- /dev/null +++ b/lib/internal/bootstrap/loaders.js @@ -0,0 +1,217 @@ +// This file creates the internal module & binding loaders used by built-in +// modules. In contrast, user land modules are loaded using +// lib/internal/modules/cjs/loader.js (CommonJS Modules) or +// lib/internal/modules/esm/* (ES Modules). +// +// This file is compiled and run by node.cc before bootstrap/node.js +// was called, therefore the loaders are bootstraped before we start to +// actually bootstrap Node.js. It creates the following objects: +// +// C++ binding loaders: +// - process.binding(): the legacy C++ binding loader, accessible from user land +// because it is an object attached to the global process object. +// These C++ bindings are created using NODE_BUILTIN_MODULE_CONTEXT_AWARE() +// and have their nm_flags set to NM_F_BUILTIN. We do not make any guarantees +// about the stability of these bindings, but still have to take care of +// compatibility issues caused by them from time to time. +// - process._linkedBinding(): intended to be used by embedders to add +// additional C++ bindings in their applications. These C++ bindings +// can be created using NODE_MODULE_CONTEXT_AWARE_CPP() with the flag +// NM_F_LINKED. +// - internalBinding(): the private internal C++ binding loader, inaccessible +// from user land because they are only available from NativeModule.require() +// These C++ bindings are created using NODE_MODULE_CONTEXT_AWARE_INTERNAL() +// and have their nm_flags set to NM_F_INTERNAL. +// +// Internal JavaScript module loader: +// - NativeModule: a minimal module system used to load the JavaScript core +// modules found in lib/**/*.js and deps/**/*.js. All core modules are +// compiled into the node binary via node_javascript.cc generated by js2c.py, +// so they can be loaded faster without the cost of I/O. This class makes the +// lib/internal/*, deps/internal/* modules and internalBinding() available by +// default to core modules, and lets the core modules require itself via +// require('internal/bootstrap/loaders') even when this file is not written in +// CommonJS style. +// +// Other objects: +// - process.moduleLoadList: an array recording the bindings and the modules +// loaded in the process and the order in which they are loaded. + +'use strict'; + +(function bootstrapInternalLoaders(process, getBinding, getLinkedBinding, + getInternalBinding) { + + // Set up process.moduleLoadList + const moduleLoadList = []; + Object.defineProperty(process, 'moduleLoadList', { + value: moduleLoadList, + configurable: true, + enumerable: true, + writable: false + }); + + // Set up process.binding() and process._linkedBinding() + { + const bindingObj = Object.create(null); + + process.binding = function binding(module) { + module = String(module); + let mod = bindingObj[module]; + if (typeof mod !== 'object') { + mod = bindingObj[module] = getBinding(module); + moduleLoadList.push(`Binding ${module}`); + } + return mod; + }; + + process._linkedBinding = function _linkedBinding(module) { + module = String(module); + let mod = bindingObj[module]; + if (typeof mod !== 'object') + mod = bindingObj[module] = getLinkedBinding(module); + return mod; + }; + } + + // Set up internalBinding() in the closure + let internalBinding; + { + const bindingObj = Object.create(null); + internalBinding = function internalBinding(module) { + let mod = bindingObj[module]; + if (typeof mod !== 'object') { + mod = bindingObj[module] = getInternalBinding(module); + moduleLoadList.push(`Internal Binding ${module}`); + } + return mod; + }; + } + + // Minimal sandbox helper + const ContextifyScript = process.binding('contextify').ContextifyScript; + function runInThisContext(code, options) { + const script = new ContextifyScript(code, options); + return script.runInThisContext(); + } + + // Set up NativeModule + function NativeModule(id) { + this.filename = `${id}.js`; + this.id = id; + this.exports = {}; + this.loaded = false; + this.loading = false; + } + + NativeModule._source = getBinding('natives'); + NativeModule._cache = {}; + + const config = getBinding('config'); + + // Think of this as module.exports in this file even though it is not + // written in CommonJS style. + const loaderExports = { internalBinding, NativeModule }; + const loaderId = 'internal/bootstrap/loaders'; + NativeModule.require = function(id) { + if (id === loaderId) { + return loaderExports; + } + + const cached = NativeModule.getCached(id); + if (cached && (cached.loaded || cached.loading)) { + return cached.exports; + } + + if (!NativeModule.exists(id)) { + // Model the error off the internal/errors.js model, but + // do not use that module given that it could actually be + // the one causing the error if there's a bug in Node.js + const err = new Error(`No such built-in module: ${id}`); + err.code = 'ERR_UNKNOWN_BUILTIN_MODULE'; + err.name = 'Error [ERR_UNKNOWN_BUILTIN_MODULE]'; + throw err; + } + + moduleLoadList.push(`NativeModule ${id}`); + + const nativeModule = new NativeModule(id); + + nativeModule.cache(); + nativeModule.compile(); + + return nativeModule.exports; + }; + + NativeModule.getCached = function(id) { + return NativeModule._cache[id]; + }; + + NativeModule.exists = function(id) { + return NativeModule._source.hasOwnProperty(id); + }; + + if (config.exposeInternals) { + NativeModule.nonInternalExists = function(id) { + // Do not expose this to user land even with --expose-internals + if (id === loaderId) { + return false; + } + return NativeModule.exists(id); + }; + + NativeModule.isInternal = function(id) { + // Do not expose this to user land even with --expose-internals + return id === loaderId; + }; + } else { + NativeModule.nonInternalExists = function(id) { + return NativeModule.exists(id) && !NativeModule.isInternal(id); + }; + + NativeModule.isInternal = function(id) { + return id.startsWith('internal/'); + }; + } + + NativeModule.getSource = function(id) { + return NativeModule._source[id]; + }; + + NativeModule.wrap = function(script) { + return NativeModule.wrapper[0] + script + NativeModule.wrapper[1]; + }; + + NativeModule.wrapper = [ + '(function (exports, require, module, process) {', + '\n});' + ]; + + NativeModule.prototype.compile = function() { + let source = NativeModule.getSource(this.id); + source = NativeModule.wrap(source); + + this.loading = true; + + try { + const fn = runInThisContext(source, { + filename: this.filename, + lineOffset: 0, + displayErrors: true + }); + fn(this.exports, NativeModule.require, this, process); + + this.loaded = true; + } finally { + this.loading = false; + } + }; + + NativeModule.prototype.cache = function() { + NativeModule._cache[this.id] = this; + }; + + // This will be passed to the bootstrapNodeJSCore function in + // bootstrap/node.js. + return loaderExports; +}); diff --git a/lib/internal/bootstrap_node.js b/lib/internal/bootstrap/node.js similarity index 75% rename from lib/internal/bootstrap_node.js rename to lib/internal/bootstrap/node.js index d8096f90e172ea..8a87712613de7c 100644 --- a/lib/internal/bootstrap_node.js +++ b/lib/internal/bootstrap/node.js @@ -4,11 +4,16 @@ // responsible for bootstrapping the node.js core. As special caution is given // to the performance of the startup process, many dependencies are invoked // lazily. +// +// Before this file is run, lib/internal/bootstrap/loaders.js gets run first +// to bootstrap the internal binding and module loaders, including +// process.binding(), process._linkedBinding(), internalBinding() and +// NativeModule. And then { internalBinding, NativeModule } will be passed +// into this bootstrapper to bootstrap Node.js core. 'use strict'; -(function(process) { - let internalBinding; +(function bootstrapNodeJSCore(process, { internalBinding, NativeModule }) { const exceptionHandlerState = { captureFn: null }; function startup() { @@ -105,7 +110,7 @@ process.emitWarning( 'The ESM module loader is experimental.', 'ExperimentalWarning', undefined); - NativeModule.require('internal/process/modules').setup(); + NativeModule.require('internal/process/esm_loader').setup(); } @@ -165,8 +170,10 @@ preloadModules(); perf.markMilestone(NODE_PERFORMANCE_MILESTONE_PRELOAD_MODULE_LOAD_END); - const internalModule = NativeModule.require('internal/module'); - internalModule.addBuiltinLibsToObject(global); + const { + addBuiltinLibsToObject + } = NativeModule.require('internal/modules/cjs/helpers'); + addBuiltinLibsToObject(global); evalScript('[eval]'); } else if (process.argv[1] && process.argv[1] !== '-') { perf.markMilestone(NODE_PERFORMANCE_MILESTONE_MODULE_LOAD_START); @@ -174,13 +181,13 @@ const path = NativeModule.require('path'); process.argv[1] = path.resolve(process.argv[1]); - const Module = NativeModule.require('module'); + const CJSModule = NativeModule.require('internal/modules/cjs/loader'); // check if user passed `-c` or `--check` arguments to Node. if (process._syntax_check_only != null) { const fs = NativeModule.require('fs'); // read the source - const filename = Module._resolveFilename(process.argv[1]); + const filename = CJSModule._resolveFilename(process.argv[1]); const source = fs.readFileSync(filename, 'utf-8'); checkScriptSyntax(source, filename); process.exit(0); @@ -191,7 +198,7 @@ preloadModules(); perf.markMilestone( NODE_PERFORMANCE_MILESTONE_PRELOAD_MODULE_LOAD_END); - Module.runMain(); + CJSModule.runMain(); } else { perf.markMilestone(NODE_PERFORMANCE_MILESTONE_MODULE_LOAD_START); perf.markMilestone(NODE_PERFORMANCE_MILESTONE_MODULE_LOAD_END); @@ -246,54 +253,6 @@ perf.markMilestone(NODE_PERFORMANCE_MILESTONE_BOOTSTRAP_COMPLETE); } - const moduleLoadList = []; - Object.defineProperty(process, 'moduleLoadList', { - value: moduleLoadList, - configurable: true, - enumerable: true, - writable: false - }); - - { - const bindingObj = Object.create(null); - - const getBinding = process.binding; - process.binding = function binding(module) { - module = String(module); - let mod = bindingObj[module]; - if (typeof mod !== 'object') { - mod = bindingObj[module] = getBinding(module); - moduleLoadList.push(`Binding ${module}`); - } - return mod; - }; - - const getLinkedBinding = process._linkedBinding; - process._linkedBinding = function _linkedBinding(module) { - module = String(module); - let mod = bindingObj[module]; - if (typeof mod !== 'object') - mod = bindingObj[module] = getLinkedBinding(module); - return mod; - }; - } - - { - const bindingObj = Object.create(null); - - const getInternalBinding = process._internalBinding; - delete process._internalBinding; - - internalBinding = function internalBinding(module) { - let mod = bindingObj[module]; - if (typeof mod !== 'object') { - mod = bindingObj[module] = getInternalBinding(module); - moduleLoadList.push(`Internal Binding ${module}`); - } - return mod; - }; - } - function setupProcessObject() { process._setupProcessObject(pushValueToArray); @@ -364,7 +323,7 @@ function setupGlobalConsole() { const originalConsole = global.console; - const Module = NativeModule.require('module'); + const CJSModule = NativeModule.require('internal/modules/cjs/loader'); // Setup Node.js global.console const wrappedConsole = NativeModule.require('console'); Object.defineProperty(global, 'console', { @@ -374,22 +333,23 @@ return wrappedConsole; } }); - setupInspector(originalConsole, wrappedConsole, Module); + setupInspector(originalConsole, wrappedConsole, CJSModule); } - function setupInspector(originalConsole, wrappedConsole, Module) { + function setupInspector(originalConsole, wrappedConsole, CJSModule) { if (!process.config.variables.v8_enable_inspector) { return; } const { addCommandLineAPI, consoleCall } = process.binding('inspector'); // Setup inspector command line API - const { makeRequireFunction } = NativeModule.require('internal/module'); + const { makeRequireFunction } = + NativeModule.require('internal/modules/cjs/helpers'); const path = NativeModule.require('path'); const cwd = tryGetCwd(path); - const consoleAPIModule = new Module(''); + const consoleAPIModule = new CJSModule(''); consoleAPIModule.paths = - Module._nodeModulePaths(cwd).concat(Module.globalPaths); + CJSModule._nodeModulePaths(cwd).concat(CJSModule.globalPaths); addCommandLineAPI('require', makeRequireFunction(consoleAPIModule)); const config = {}; for (const key of Object.keys(wrappedConsole)) { @@ -504,13 +464,13 @@ } function evalScript(name) { - const Module = NativeModule.require('module'); + const CJSModule = NativeModule.require('internal/modules/cjs/loader'); const path = NativeModule.require('path'); const cwd = tryGetCwd(path); - const module = new Module(name); + const module = new CJSModule(name); module.filename = path.join(cwd, name); - module.paths = Module._nodeModulePaths(cwd); + module.paths = CJSModule._nodeModulePaths(cwd); const body = wrapForBreakOnFirstLine(process._eval); const script = `global.__filename = ${JSON.stringify(name)};\n` + 'global.exports = exports;\n' + @@ -529,139 +489,29 @@ // Load preload modules function preloadModules() { if (process._preload_modules) { - NativeModule.require('module')._preloadModules(process._preload_modules); + const { + _preloadModules + } = NativeModule.require('internal/modules/cjs/loader'); + _preloadModules(process._preload_modules); } } function checkScriptSyntax(source, filename) { - const Module = NativeModule.require('module'); + const CJSModule = NativeModule.require('internal/modules/cjs/loader'); const vm = NativeModule.require('vm'); - const internalModule = NativeModule.require('internal/module'); + const { + stripShebang, stripBOM + } = NativeModule.require('internal/modules/cjs/helpers'); // remove Shebang - source = internalModule.stripShebang(source); + source = stripShebang(source); // remove BOM - source = internalModule.stripBOM(source); + source = stripBOM(source); // wrap it - source = Module.wrap(source); + source = CJSModule.wrap(source); // compile the script, this will throw if it fails new vm.Script(source, { displayErrors: true, filename }); } - // Below you find a minimal module system, which is used to load the node - // core modules found in lib/*.js. All core modules are compiled into the - // node binary, so they can be loaded faster. - - const ContextifyScript = process.binding('contextify').ContextifyScript; - function runInThisContext(code, options) { - const script = new ContextifyScript(code, options); - return script.runInThisContext(); - } - - function NativeModule(id) { - this.filename = `${id}.js`; - this.id = id; - this.exports = {}; - this.loaded = false; - this.loading = false; - } - - NativeModule._source = process.binding('natives'); - NativeModule._cache = {}; - - const config = process.binding('config'); - - NativeModule.require = function(id) { - if (id === 'native_module') { - return NativeModule; - } - - const cached = NativeModule.getCached(id); - if (cached && (cached.loaded || cached.loading)) { - return cached.exports; - } - - if (!NativeModule.exists(id)) { - // Model the error off the internal/errors.js model, but - // do not use that module given that it could actually be - // the one causing the error if there's a bug in Node.js - const err = new Error(`No such built-in module: ${id}`); - err.code = 'ERR_UNKNOWN_BUILTIN_MODULE'; - err.name = 'Error [ERR_UNKNOWN_BUILTIN_MODULE]'; - throw err; - } - - moduleLoadList.push(`NativeModule ${id}`); - - const nativeModule = new NativeModule(id); - - nativeModule.cache(); - nativeModule.compile(); - - return nativeModule.exports; - }; - - NativeModule.getCached = function(id) { - return NativeModule._cache[id]; - }; - - NativeModule.exists = function(id) { - return NativeModule._source.hasOwnProperty(id); - }; - - if (config.exposeInternals) { - NativeModule.nonInternalExists = NativeModule.exists; - - NativeModule.isInternal = function(id) { - return false; - }; - } else { - NativeModule.nonInternalExists = function(id) { - return NativeModule.exists(id) && !NativeModule.isInternal(id); - }; - - NativeModule.isInternal = function(id) { - return id.startsWith('internal/'); - }; - } - - - NativeModule.getSource = function(id) { - return NativeModule._source[id]; - }; - - NativeModule.wrap = function(script) { - return NativeModule.wrapper[0] + script + NativeModule.wrapper[1]; - }; - - NativeModule.wrapper = [ - '(function (exports, require, module, internalBinding, process) {', - '\n});' - ]; - - NativeModule.prototype.compile = function() { - let source = NativeModule.getSource(this.id); - source = NativeModule.wrap(source); - - this.loading = true; - - try { - const fn = runInThisContext(source, { - filename: this.filename, - lineOffset: 0, - displayErrors: true - }); - fn(this.exports, NativeModule.require, this, internalBinding, process); - - this.loaded = true; - } finally { - this.loading = false; - } - }; - - NativeModule.prototype.cache = function() { - NativeModule._cache[this.id] = this; - }; - startup(); }); diff --git a/lib/internal/errors.js b/lib/internal/errors.js index 833246c8a5ff66..ec5b0effe78932 100644 --- a/lib/internal/errors.js +++ b/lib/internal/errors.js @@ -445,25 +445,29 @@ function dnsException(err, syscall, hostname) { return ex; } -let MAX_STACK_MESSAGE; +let maxStack_ErrorName; +let maxStack_ErrorMessage; /** - * Returns true if `err` is a `RangeError` with an engine-specific message. + * Returns true if `err.name` and `err.message` are equal to engine-specific + * values indicating max call stack size has been exceeded. * "Maximum call stack size exceeded" in V8. * * @param {Error} err * @returns {boolean} */ function isStackOverflowError(err) { - if (MAX_STACK_MESSAGE === undefined) { + if (maxStack_ErrorMessage === undefined) { try { function overflowStack() { overflowStack(); } overflowStack(); } catch (err) { - MAX_STACK_MESSAGE = err.message; + maxStack_ErrorMessage = err.message; + maxStack_ErrorName = err.name; } } - return err.name === 'RangeError' && err.message === MAX_STACK_MESSAGE; + return err.name === maxStack_ErrorName && + err.message === maxStack_ErrorMessage; } module.exports = exports = { diff --git a/lib/internal/http2/core.js b/lib/internal/http2/core.js index 010bec55451c83..c83cd79814061d 100644 --- a/lib/internal/http2/core.js +++ b/lib/internal/http2/core.js @@ -62,7 +62,7 @@ const { } = require('timers'); const { ShutdownWrap, WriteWrap } = process.binding('stream_wrap'); -const { constants } = binding; +const { constants, nameForErrorCode } = binding; const NETServer = net.Server; const TLSServer = tls.Server; @@ -1646,7 +1646,7 @@ class Http2Stream extends Duplex { req.async = false; const err = createWriteReq(req, handle, data, encoding); if (err) - throw errors.errnoException(err, 'write', req.error); + return this.destroy(errors.errnoException(err, 'write', req.error), cb); trackWriteState(this, req.bytes); } @@ -1689,7 +1689,7 @@ class Http2Stream extends Duplex { } const err = handle.writev(req, chunks); if (err) - throw errors.errnoException(err, 'write', req.error); + return this.destroy(errors.errnoException(err, 'write', req.error), cb); trackWriteState(this, req.bytes); } @@ -1763,6 +1763,8 @@ class Http2Stream extends Duplex { throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'code', 'number'); if (code < 0 || code > kMaxInt) throw new errors.RangeError('ERR_OUT_OF_RANGE', 'code'); + if (callback !== undefined && typeof callback !== 'function') + throw new errors.TypeError('ERR_INVALID_CALLBACK'); // Unenroll the timeout. unenroll(this); @@ -1780,8 +1782,6 @@ class Http2Stream extends Duplex { state.rstCode = code; if (callback !== undefined) { - if (typeof callback !== 'function') - throw new errors.TypeError('ERR_INVALID_CALLBACK'); this.once('close', callback); } @@ -1841,7 +1841,8 @@ class Http2Stream extends Duplex { // abort and is already covered by aborted event, also allows more // seamless compatibility with http1 if (err == null && code !== NGHTTP2_NO_ERROR && code !== NGHTTP2_CANCEL) - err = new errors.Error('ERR_HTTP2_STREAM_ERROR', code); + err = new errors.Error('ERR_HTTP2_STREAM_ERROR', + nameForErrorCode[code] || code); this[kSession] = undefined; this[kHandle] = undefined; diff --git a/lib/internal/module.js b/lib/internal/modules/cjs/helpers.js similarity index 98% rename from lib/internal/module.js rename to lib/internal/modules/cjs/helpers.js index d2140411552429..0bb1cea4050e16 100644 --- a/lib/internal/module.js +++ b/lib/internal/modules/cjs/helpers.js @@ -89,7 +89,7 @@ const builtinLibs = [ 'stream', 'string_decoder', 'tls', 'tty', 'url', 'util', 'v8', 'vm', 'zlib' ]; -if (typeof process.binding('inspector').connect === 'function') { +if (typeof process.binding('inspector').open === 'function') { builtinLibs.push('inspector'); builtinLibs.sort(); } diff --git a/lib/internal/modules/cjs/loader.js b/lib/internal/modules/cjs/loader.js new file mode 100644 index 00000000000000..39333f049f6c1a --- /dev/null +++ b/lib/internal/modules/cjs/loader.js @@ -0,0 +1,761 @@ +// Copyright Joyent, Inc. and other Node contributors. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to permit +// persons to whom the Software is furnished to do so, subject to the +// following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN +// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +// USE OR OTHER DEALINGS IN THE SOFTWARE. + +'use strict'; + +const { NativeModule } = require('internal/bootstrap/loaders'); +const util = require('util'); +const { decorateErrorStack } = require('internal/util'); +const { getURLFromFilePath } = require('internal/url'); +const vm = require('vm'); +const assert = require('assert').ok; +const fs = require('fs'); +const internalFS = require('internal/fs'); +const path = require('path'); +const { + internalModuleReadFile, + internalModuleStat +} = process.binding('fs'); +const { + makeRequireFunction, + requireDepth, + stripBOM, + stripShebang +} = require('internal/modules/cjs/helpers'); +const preserveSymlinks = !!process.binding('config').preserveSymlinks; +const experimentalModules = !!process.binding('config').experimentalModules; + +const errors = require('internal/errors'); + +module.exports = Module; + +// these are below module.exports for the circular reference +const asyncESM = require('internal/process/esm_loader'); +const ModuleJob = require('internal/modules/esm/ModuleJob'); +const createDynamicModule = require('internal/modules/esm/CreateDynamicModule'); + +function stat(filename) { + filename = path.toNamespacedPath(filename); + const cache = stat.cache; + if (cache !== null) { + const result = cache.get(filename); + if (result !== undefined) return result; + } + const result = internalModuleStat(filename); + if (cache !== null) cache.set(filename, result); + return result; +} +stat.cache = null; + +function updateChildren(parent, child, scan) { + var children = parent && parent.children; + if (children && !(scan && children.includes(child))) + children.push(child); +} + +function Module(id, parent) { + this.id = id; + this.exports = {}; + this.parent = parent; + updateChildren(parent, this, false); + this.filename = null; + this.loaded = false; + this.children = []; +} + +const builtinModules = Object.keys(NativeModule._source) + .filter(NativeModule.nonInternalExists); + +Object.freeze(builtinModules); +Module.builtinModules = builtinModules; + +Module._cache = Object.create(null); +Module._pathCache = Object.create(null); +Module._extensions = Object.create(null); +var modulePaths = []; +Module.globalPaths = []; + +Module.wrap = function(script) { + return Module.wrapper[0] + script + Module.wrapper[1]; +}; + +Module.wrapper = [ + '(function (exports, require, module, __filename, __dirname) { ', + '\n});' +]; + +const debug = util.debuglog('module'); + +Module._debug = util.deprecate(debug, 'Module._debug is deprecated.', + 'DEP0077'); + +// given a module name, and a list of paths to test, returns the first +// matching file in the following precedence. +// +// require("a.") +// -> a. +// +// require("a") +// -> a +// -> a. +// -> a/index. + +// check if the directory is a package.json dir +const packageMainCache = Object.create(null); + +function readPackage(requestPath) { + const entry = packageMainCache[requestPath]; + if (entry) + return entry; + + const jsonPath = path.resolve(requestPath, 'package.json'); + const json = internalModuleReadFile(path.toNamespacedPath(jsonPath)); + + if (json === undefined) { + return false; + } + + try { + var pkg = packageMainCache[requestPath] = JSON.parse(json).main; + } catch (e) { + e.path = jsonPath; + e.message = 'Error parsing ' + jsonPath + ': ' + e.message; + throw e; + } + return pkg; +} + +function tryPackage(requestPath, exts, isMain) { + var pkg = readPackage(requestPath); + + if (!pkg) return false; + + var filename = path.resolve(requestPath, pkg); + return tryFile(filename, isMain) || + tryExtensions(filename, exts, isMain) || + tryExtensions(path.resolve(filename, 'index'), exts, isMain); +} + +// In order to minimize unnecessary lstat() calls, +// this cache is a list of known-real paths. +// Set to an empty Map to reset. +const realpathCache = new Map(); + +// check if the file exists and is not a directory +// if using --preserve-symlinks and isMain is false, +// keep symlinks intact, otherwise resolve to the +// absolute realpath. +function tryFile(requestPath, isMain) { + const rc = stat(requestPath); + if (preserveSymlinks && !isMain) { + return rc === 0 && path.resolve(requestPath); + } + return rc === 0 && toRealPath(requestPath); +} + +function toRealPath(requestPath) { + return fs.realpathSync(requestPath, { + [internalFS.realpathCacheKey]: realpathCache + }); +} + +// given a path, check if the file exists with any of the set extensions +function tryExtensions(p, exts, isMain) { + for (var i = 0; i < exts.length; i++) { + const filename = tryFile(p + exts[i], isMain); + + if (filename) { + return filename; + } + } + return false; +} + +var warned = false; +Module._findPath = function(request, paths, isMain) { + if (path.isAbsolute(request)) { + paths = ['']; + } else if (!paths || paths.length === 0) { + return false; + } + + var cacheKey = request + '\x00' + + (paths.length === 1 ? paths[0] : paths.join('\x00')); + var entry = Module._pathCache[cacheKey]; + if (entry) + return entry; + + var exts; + var trailingSlash = request.length > 0 && + request.charCodeAt(request.length - 1) === 47/*/*/; + + // For each path + for (var i = 0; i < paths.length; i++) { + // Don't search further if path doesn't exist + const curPath = paths[i]; + if (curPath && stat(curPath) < 1) continue; + var basePath = path.resolve(curPath, request); + var filename; + + var rc = stat(basePath); + if (!trailingSlash) { + if (rc === 0) { // File. + if (preserveSymlinks && !isMain) { + filename = path.resolve(basePath); + } else { + filename = toRealPath(basePath); + } + } else if (rc === 1) { // Directory. + if (exts === undefined) + exts = Object.keys(Module._extensions); + filename = tryPackage(basePath, exts, isMain); + } + + if (!filename) { + // try it with each of the extensions + if (exts === undefined) + exts = Object.keys(Module._extensions); + filename = tryExtensions(basePath, exts, isMain); + } + } + + if (!filename && rc === 1) { // Directory. + if (exts === undefined) + exts = Object.keys(Module._extensions); + filename = tryPackage(basePath, exts, isMain) || + // try it with each of the extensions at "index" + tryExtensions(path.resolve(basePath, 'index'), exts, isMain); + } + + if (filename) { + // Warn once if '.' resolved outside the module dir + if (request === '.' && i > 0) { + if (!warned) { + warned = true; + process.emitWarning( + 'warning: require(\'.\') resolved outside the package ' + + 'directory. This functionality is deprecated and will be removed ' + + 'soon.', + 'DeprecationWarning', 'DEP0019'); + } + } + + Module._pathCache[cacheKey] = filename; + return filename; + } + } + return false; +}; + +// 'node_modules' character codes reversed +var nmChars = [ 115, 101, 108, 117, 100, 111, 109, 95, 101, 100, 111, 110 ]; +var nmLen = nmChars.length; +if (process.platform === 'win32') { + // 'from' is the __dirname of the module. + Module._nodeModulePaths = function(from) { + // guarantee that 'from' is absolute. + from = path.resolve(from); + + // note: this approach *only* works when the path is guaranteed + // to be absolute. Doing a fully-edge-case-correct path.split + // that works on both Windows and Posix is non-trivial. + + // return root node_modules when path is 'D:\\'. + // path.resolve will make sure from.length >=3 in Windows. + if (from.charCodeAt(from.length - 1) === 92/*\*/ && + from.charCodeAt(from.length - 2) === 58/*:*/) + return [from + 'node_modules']; + + const paths = []; + var p = 0; + var last = from.length; + for (var i = from.length - 1; i >= 0; --i) { + const code = from.charCodeAt(i); + // The path segment separator check ('\' and '/') was used to get + // node_modules path for every path segment. + // Use colon as an extra condition since we can get node_modules + // path for drive root like 'C:\node_modules' and don't need to + // parse drive name. + if (code === 92/*\*/ || code === 47/*/*/ || code === 58/*:*/) { + if (p !== nmLen) + paths.push(from.slice(0, last) + '\\node_modules'); + last = i; + p = 0; + } else if (p !== -1) { + if (nmChars[p] === code) { + ++p; + } else { + p = -1; + } + } + } + + return paths; + }; +} else { // posix + // 'from' is the __dirname of the module. + Module._nodeModulePaths = function(from) { + // guarantee that 'from' is absolute. + from = path.resolve(from); + // Return early not only to avoid unnecessary work, but to *avoid* returning + // an array of two items for a root: [ '//node_modules', '/node_modules' ] + if (from === '/') + return ['/node_modules']; + + // note: this approach *only* works when the path is guaranteed + // to be absolute. Doing a fully-edge-case-correct path.split + // that works on both Windows and Posix is non-trivial. + const paths = []; + var p = 0; + var last = from.length; + for (var i = from.length - 1; i >= 0; --i) { + const code = from.charCodeAt(i); + if (code === 47/*/*/) { + if (p !== nmLen) + paths.push(from.slice(0, last) + '/node_modules'); + last = i; + p = 0; + } else if (p !== -1) { + if (nmChars[p] === code) { + ++p; + } else { + p = -1; + } + } + } + + // Append /node_modules to handle root paths. + paths.push('/node_modules'); + + return paths; + }; +} + + +// 'index.' character codes +var indexChars = [ 105, 110, 100, 101, 120, 46 ]; +var indexLen = indexChars.length; +Module._resolveLookupPaths = function(request, parent, newReturn) { + if (NativeModule.nonInternalExists(request)) { + debug('looking for %j in []', request); + return (newReturn ? null : [request, []]); + } + + // Check for relative path + if (request.length < 2 || + request.charCodeAt(0) !== 46/*.*/ || + (request.charCodeAt(1) !== 46/*.*/ && + request.charCodeAt(1) !== 47/*/*/)) { + var paths = modulePaths; + if (parent) { + if (!parent.paths) + paths = parent.paths = []; + else + paths = parent.paths.concat(paths); + } + + // Maintain backwards compat with certain broken uses of require('.') + // by putting the module's directory in front of the lookup paths. + if (request === '.') { + if (parent && parent.filename) { + paths.unshift(path.dirname(parent.filename)); + } else { + paths.unshift(path.resolve(request)); + } + } + + debug('looking for %j in %j', request, paths); + return (newReturn ? (paths.length > 0 ? paths : null) : [request, paths]); + } + + // with --eval, parent.id is not set and parent.filename is null + if (!parent || !parent.id || !parent.filename) { + // make require('./path/to/foo') work - normally the path is taken + // from realpath(__filename) but with eval there is no filename + var mainPaths = ['.'].concat(Module._nodeModulePaths('.'), modulePaths); + + debug('looking for %j in %j', request, mainPaths); + return (newReturn ? mainPaths : [request, mainPaths]); + } + + // Is the parent an index module? + // We can assume the parent has a valid extension, + // as it already has been accepted as a module. + const base = path.basename(parent.filename); + var parentIdPath; + if (base.length > indexLen) { + var i = 0; + for (; i < indexLen; ++i) { + if (indexChars[i] !== base.charCodeAt(i)) + break; + } + if (i === indexLen) { + // We matched 'index.', let's validate the rest + for (; i < base.length; ++i) { + const code = base.charCodeAt(i); + if (code !== 95/*_*/ && + (code < 48/*0*/ || code > 57/*9*/) && + (code < 65/*A*/ || code > 90/*Z*/) && + (code < 97/*a*/ || code > 122/*z*/)) + break; + } + if (i === base.length) { + // Is an index module + parentIdPath = parent.id; + } else { + // Not an index module + parentIdPath = path.dirname(parent.id); + } + } else { + // Not an index module + parentIdPath = path.dirname(parent.id); + } + } else { + // Not an index module + parentIdPath = path.dirname(parent.id); + } + var id = path.resolve(parentIdPath, request); + + // make sure require('./path') and require('path') get distinct ids, even + // when called from the toplevel js file + if (parentIdPath === '.' && id.indexOf('/') === -1) { + id = './' + id; + } + + debug('RELATIVE: requested: %s set ID to: %s from %s', request, id, + parent.id); + + var parentDir = [path.dirname(parent.filename)]; + debug('looking for %j in %j', id, parentDir); + return (newReturn ? parentDir : [id, parentDir]); +}; + +// Check the cache for the requested file. +// 1. If a module already exists in the cache: return its exports object. +// 2. If the module is native: call `NativeModule.require()` with the +// filename and return the result. +// 3. Otherwise, create a new module for the file and save it to the cache. +// Then have it load the file contents before returning its exports +// object. +Module._load = function(request, parent, isMain) { + if (parent) { + debug('Module._load REQUEST %s parent: %s', request, parent.id); + } + + if (experimentalModules && isMain) { + asyncESM.loaderPromise.then((loader) => { + return loader.import(getURLFromFilePath(request).pathname); + }) + .catch((e) => { + decorateErrorStack(e); + console.error(e); + process.exit(1); + }); + return; + } + + var filename = Module._resolveFilename(request, parent, isMain); + + var cachedModule = Module._cache[filename]; + if (cachedModule) { + updateChildren(parent, cachedModule, true); + return cachedModule.exports; + } + + if (NativeModule.nonInternalExists(filename)) { + debug('load native module %s', request); + return NativeModule.require(filename); + } + + // Don't call updateChildren(), Module constructor already does. + var module = new Module(filename, parent); + + if (isMain) { + process.mainModule = module; + module.id = '.'; + } + + Module._cache[filename] = module; + + tryModuleLoad(module, filename); + + return module.exports; +}; + +function tryModuleLoad(module, filename) { + var threw = true; + try { + module.load(filename); + threw = false; + } finally { + if (threw) { + delete Module._cache[filename]; + } + } +} + +Module._resolveFilename = function(request, parent, isMain, options) { + if (NativeModule.nonInternalExists(request)) { + return request; + } + + var paths; + + if (typeof options === 'object' && options !== null && + Array.isArray(options.paths)) { + const fakeParent = new Module('', null); + + paths = []; + + for (var i = 0; i < options.paths.length; i++) { + const path = options.paths[i]; + fakeParent.paths = Module._nodeModulePaths(path); + const lookupPaths = Module._resolveLookupPaths(request, fakeParent, true); + + if (!paths.includes(path)) + paths.push(path); + + for (var j = 0; j < lookupPaths.length; j++) { + if (!paths.includes(lookupPaths[j])) + paths.push(lookupPaths[j]); + } + } + } else { + paths = Module._resolveLookupPaths(request, parent, true); + } + + // look up the filename first, since that's the cache key. + var filename = Module._findPath(request, paths, isMain); + if (!filename) { + var err = new Error(`Cannot find module '${request}'`); + err.code = 'MODULE_NOT_FOUND'; + throw err; + } + return filename; +}; + + +// Given a file name, pass it to the proper extension handler. +Module.prototype.load = function(filename) { + debug('load %j for module %j', filename, this.id); + + assert(!this.loaded); + this.filename = filename; + this.paths = Module._nodeModulePaths(path.dirname(filename)); + + var extension = path.extname(filename) || '.js'; + if (!Module._extensions[extension]) extension = '.js'; + Module._extensions[extension](this, filename); + this.loaded = true; + + if (experimentalModules) { + const ESMLoader = asyncESM.ESMLoader; + const url = getURLFromFilePath(filename); + const urlString = `${url}`; + const exports = this.exports; + if (ESMLoader.moduleMap.has(urlString) !== true) { + ESMLoader.moduleMap.set( + urlString, + new ModuleJob(ESMLoader, url, async () => { + const ctx = createDynamicModule( + ['default'], url); + ctx.reflect.exports.default.set(exports); + return ctx; + }) + ); + } else { + const job = ESMLoader.moduleMap.get(urlString); + if (job.reflect) + job.reflect.exports.default.set(exports); + } + } +}; + + +// Loads a module at the given file path. Returns that module's +// `exports` property. +Module.prototype.require = function(path) { + assert(path, 'missing path'); + assert(typeof path === 'string', 'path must be a string'); + return Module._load(path, this, /* isMain */ false); +}; + + +// Resolved path to process.argv[1] will be lazily placed here +// (needed for setting breakpoint when called with --inspect-brk) +var resolvedArgv; + + +// Run the file contents in the correct scope or sandbox. Expose +// the correct helper variables (require, module, exports) to +// the file. +// Returns exception, if any. +Module.prototype._compile = function(content, filename) { + + content = stripShebang(content); + + // create wrapper function + var wrapper = Module.wrap(content); + + var compiledWrapper = vm.runInThisContext(wrapper, { + filename: filename, + lineOffset: 0, + displayErrors: true + }); + + var inspectorWrapper = null; + if (process._breakFirstLine && process._eval == null) { + if (!resolvedArgv) { + // we enter the repl if we're not given a filename argument. + if (process.argv[1]) { + resolvedArgv = Module._resolveFilename(process.argv[1], null, false); + } else { + resolvedArgv = 'repl'; + } + } + + // Set breakpoint on module start + if (filename === resolvedArgv) { + delete process._breakFirstLine; + inspectorWrapper = process.binding('inspector').callAndPauseOnStart; + if (!inspectorWrapper) { + const Debug = vm.runInDebugContext('Debug'); + Debug.setBreakPoint(compiledWrapper, 0, 0); + } + } + } + var dirname = path.dirname(filename); + var require = makeRequireFunction(this); + var depth = requireDepth; + if (depth === 0) stat.cache = new Map(); + var result; + if (inspectorWrapper) { + result = inspectorWrapper(compiledWrapper, this.exports, this.exports, + require, this, filename, dirname); + } else { + result = compiledWrapper.call(this.exports, this.exports, require, this, + filename, dirname); + } + if (depth === 0) stat.cache = null; + return result; +}; + + +// Native extension for .js +Module._extensions['.js'] = function(module, filename) { + var content = fs.readFileSync(filename, 'utf8'); + module._compile(stripBOM(content), filename); +}; + + +// Native extension for .json +Module._extensions['.json'] = function(module, filename) { + var content = fs.readFileSync(filename, 'utf8'); + try { + module.exports = JSON.parse(stripBOM(content)); + } catch (err) { + err.message = filename + ': ' + err.message; + throw err; + } +}; + + +//Native extension for .node +Module._extensions['.node'] = function(module, filename) { + return process.dlopen(module, path.toNamespacedPath(filename)); +}; + +if (experimentalModules) { + Module._extensions['.mjs'] = function(module, filename) { + throw new errors.Error('ERR_REQUIRE_ESM', filename); + }; +} + +// bootstrap main module. +Module.runMain = function() { + // Load the main module--the command line argument. + Module._load(process.argv[1], null, true); + // Handle any nextTicks added in the first tick of the program + process._tickCallback(); +}; + +Module._initPaths = function() { + const isWindows = process.platform === 'win32'; + + var homeDir; + if (isWindows) { + homeDir = process.env.USERPROFILE; + } else { + homeDir = process.env.HOME; + } + + // $PREFIX/lib/node, where $PREFIX is the root of the Node.js installation. + var prefixDir; + // process.execPath is $PREFIX/bin/node except on Windows where it is + // $PREFIX\node.exe. + if (isWindows) { + prefixDir = path.resolve(process.execPath, '..'); + } else { + prefixDir = path.resolve(process.execPath, '..', '..'); + } + var paths = [path.resolve(prefixDir, 'lib', 'node')]; + + if (homeDir) { + paths.unshift(path.resolve(homeDir, '.node_libraries')); + paths.unshift(path.resolve(homeDir, '.node_modules')); + } + + var nodePath = process.env.NODE_PATH; + if (nodePath) { + paths = nodePath.split(path.delimiter).filter(function(path) { + return !!path; + }).concat(paths); + } + + modulePaths = paths; + + // clone as a shallow copy, for introspection. + Module.globalPaths = modulePaths.slice(0); +}; + +Module._preloadModules = function(requests) { + if (!Array.isArray(requests)) + return; + + // Preloaded modules have a dummy parent module which is deemed to exist + // in the current working directory. This seeds the search path for + // preloaded modules. + var parent = new Module('internal/preload', null); + try { + parent.paths = Module._nodeModulePaths(process.cwd()); + } catch (e) { + if (e.code !== 'ENOENT') { + throw e; + } + } + for (var n = 0; n < requests.length; n++) + parent.require(requests[n]); +}; + +Module._initPaths(); + +// backwards compatibility +Module.Module = Module; diff --git a/lib/internal/loader/CreateDynamicModule.js b/lib/internal/modules/esm/CreateDynamicModule.js similarity index 96% rename from lib/internal/loader/CreateDynamicModule.js rename to lib/internal/modules/esm/CreateDynamicModule.js index f2596de04bfcb3..7e9777af51ee2b 100644 --- a/lib/internal/loader/CreateDynamicModule.js +++ b/lib/internal/modules/esm/CreateDynamicModule.js @@ -1,5 +1,6 @@ 'use strict'; +const { internalBinding } = require('internal/bootstrap/loaders'); const { ModuleWrap } = internalBinding('module_wrap'); const debug = require('util').debuglog('esm'); const ArrayJoin = Function.call.bind(Array.prototype.join); diff --git a/lib/internal/loader/DefaultResolve.js b/lib/internal/modules/esm/DefaultResolve.js similarity index 91% rename from lib/internal/loader/DefaultResolve.js rename to lib/internal/modules/esm/DefaultResolve.js index bd99f31e54c599..8d9240dedbd89a 100644 --- a/lib/internal/loader/DefaultResolve.js +++ b/lib/internal/modules/esm/DefaultResolve.js @@ -1,9 +1,9 @@ 'use strict'; const { URL } = require('url'); -const CJSmodule = require('module'); +const CJSmodule = require('internal/modules/cjs/loader'); const internalFS = require('internal/fs'); -const NativeModule = require('native_module'); +const { NativeModule, internalBinding } = require('internal/bootstrap/loaders'); const { extname } = require('path'); const { realpathSync } = require('fs'); const preserveSymlinks = !!process.binding('config').preserveSymlinks; @@ -66,7 +66,9 @@ function resolve(specifier, parentURL) { throw e; } - if (!preserveSymlinks) { + const isMain = parentURL === undefined; + + if (!preserveSymlinks || isMain) { const real = realpathSync(getPathFromURL(url), { [internalFS.realpathCacheKey]: realpathCache }); @@ -80,7 +82,6 @@ function resolve(specifier, parentURL) { let format = extensionFormatMap[ext]; if (!format) { - const isMain = parentURL === undefined; if (isMain) format = 'cjs'; else diff --git a/lib/internal/loader/Loader.js b/lib/internal/modules/esm/Loader.js similarity index 92% rename from lib/internal/loader/Loader.js rename to lib/internal/modules/esm/Loader.js index f0edbbf921f40f..170fa23ec798ff 100644 --- a/lib/internal/loader/Loader.js +++ b/lib/internal/modules/esm/Loader.js @@ -1,11 +1,11 @@ 'use strict'; const errors = require('internal/errors'); -const ModuleMap = require('internal/loader/ModuleMap'); -const ModuleJob = require('internal/loader/ModuleJob'); -const defaultResolve = require('internal/loader/DefaultResolve'); -const createDynamicModule = require('internal/loader/CreateDynamicModule'); -const translators = require('internal/loader/Translators'); +const ModuleMap = require('internal/modules/esm/ModuleMap'); +const ModuleJob = require('internal/modules/esm/ModuleJob'); +const defaultResolve = require('internal/modules/esm/DefaultResolve'); +const createDynamicModule = require('internal/modules/esm/CreateDynamicModule'); +const translators = require('internal/modules/esm/Translators'); const FunctionBind = Function.call.bind(Function.prototype.bind); diff --git a/lib/internal/loader/ModuleJob.js b/lib/internal/modules/esm/ModuleJob.js similarity index 98% rename from lib/internal/loader/ModuleJob.js rename to lib/internal/modules/esm/ModuleJob.js index b3553fc7235d95..d948252829ddbf 100644 --- a/lib/internal/loader/ModuleJob.js +++ b/lib/internal/modules/esm/ModuleJob.js @@ -1,5 +1,6 @@ 'use strict'; +const { internalBinding } = require('internal/bootstrap/loaders'); const { ModuleWrap } = internalBinding('module_wrap'); const { SafeSet, SafePromise } = require('internal/safe_globals'); const { decorateErrorStack } = require('internal/util'); diff --git a/lib/internal/loader/ModuleMap.js b/lib/internal/modules/esm/ModuleMap.js similarity index 93% rename from lib/internal/loader/ModuleMap.js rename to lib/internal/modules/esm/ModuleMap.js index aa238afbaedc05..7f5ee8bf9e7762 100644 --- a/lib/internal/loader/ModuleMap.js +++ b/lib/internal/modules/esm/ModuleMap.js @@ -1,6 +1,6 @@ 'use strict'; -const ModuleJob = require('internal/loader/ModuleJob'); +const ModuleJob = require('internal/modules/esm/ModuleJob'); const { SafeMap } = require('internal/safe_globals'); const debug = require('util').debuglog('esm'); const errors = require('internal/errors'); diff --git a/lib/internal/loader/ModuleWrap.js b/lib/internal/modules/esm/ModuleWrap.js similarity index 62% rename from lib/internal/loader/ModuleWrap.js rename to lib/internal/modules/esm/ModuleWrap.js index b2b11daead7dde..e589960193c94c 100644 --- a/lib/internal/loader/ModuleWrap.js +++ b/lib/internal/modules/esm/ModuleWrap.js @@ -2,4 +2,5 @@ // exposes ModuleWrap for testing +const { internalBinding } = require('internal/bootstrap/loaders'); module.exports = internalBinding('module_wrap').ModuleWrap; diff --git a/lib/internal/loader/Translators.js b/lib/internal/modules/esm/Translators.js similarity index 88% rename from lib/internal/loader/Translators.js rename to lib/internal/modules/esm/Translators.js index 18b1b12fd15854..2928115be515e0 100644 --- a/lib/internal/loader/Translators.js +++ b/lib/internal/modules/esm/Translators.js @@ -1,11 +1,14 @@ 'use strict'; +const { NativeModule, internalBinding } = require('internal/bootstrap/loaders'); const { ModuleWrap } = internalBinding('module_wrap'); -const NativeModule = require('native_module'); -const internalCJSModule = require('internal/module'); -const CJSModule = require('module'); +const { + stripShebang, + stripBOM +} = require('internal/modules/cjs/helpers'); +const CJSModule = require('internal/modules/cjs/loader'); const internalURLModule = require('internal/url'); -const createDynamicModule = require('internal/loader/CreateDynamicModule'); +const createDynamicModule = require('internal/modules/esm/CreateDynamicModule'); const fs = require('fs'); const { _makeLong } = require('path'); const { SafeMap } = require('internal/safe_globals'); @@ -24,7 +27,7 @@ translators.set('esm', async (url) => { const source = `${await readFileAsync(new URL(url))}`; debug(`Translating StandardModule ${url}`); return { - module: new ModuleWrap(internalCJSModule.stripShebang(source), url), + module: new ModuleWrap(stripShebang(source), url), reflect: undefined }; }); @@ -82,7 +85,7 @@ translators.set('json', async (url) => { const pathname = internalURLModule.getPathFromURL(new URL(url)); const content = readFileSync(pathname, 'utf8'); try { - const exports = JsonParse(internalCJSModule.stripBOM(content)); + const exports = JsonParse(stripBOM(content)); reflect.exports.default.set(exports); } catch (err) { err.message = pathname + ': ' + err.message; diff --git a/lib/internal/process/modules.js b/lib/internal/process/esm_loader.js similarity index 91% rename from lib/internal/process/modules.js rename to lib/internal/process/esm_loader.js index bc977c718725f2..ca2ce57feb54d8 100644 --- a/lib/internal/process/modules.js +++ b/lib/internal/process/esm_loader.js @@ -1,11 +1,12 @@ 'use strict'; +const { internalBinding } = require('internal/bootstrap/loaders'); const { setImportModuleDynamicallyCallback } = internalBinding('module_wrap'); const { getURLFromFilePath } = require('internal/url'); -const Loader = require('internal/loader/Loader'); +const Loader = require('internal/modules/esm/Loader'); const path = require('path'); const { URL } = require('url'); diff --git a/lib/internal/process/next_tick.js b/lib/internal/process/next_tick.js index 84a7402117c5c2..0bced30445a170 100644 --- a/lib/internal/process/next_tick.js +++ b/lib/internal/process/next_tick.js @@ -32,10 +32,60 @@ function setupNextTick() { const kHasScheduled = 0; const kHasPromiseRejections = 1; - // Queue size for each tick array. Must be a factor of two. + // Queue size for each tick array. Must be a power of two. const kQueueSize = 2048; const kQueueMask = kQueueSize - 1; + // The next tick queue is implemented as a singly-linked list of fixed-size + // circular buffers. It looks something like this: + // + // head tail + // | | + // v v + // +-----------+ <-----\ +-----------+ <------\ +-----------+ + // | [null] | \----- | next | \------- | next | + // +-----------+ +-----------+ +-----------+ + // | tick | <-- bottom | tick | <-- bottom | [empty] | + // | tick | | tick | | [empty] | + // | tick | | tick | | [empty] | + // | tick | | tick | | [empty] | + // | tick | | tick | bottom --> | tick | + // | tick | | tick | | tick | + // | ... | | ... | | ... | + // | tick | | tick | | tick | + // | tick | | tick | | tick | + // | [empty] | <-- top | tick | | tick | + // | [empty] | | tick | | tick | + // | [empty] | | tick | | tick | + // +-----------+ +-----------+ <-- top top --> +-----------+ + // + // Or, if there is only one fixed-size queue, it looks something + // like either of these: + // + // head tail head tail + // | | | | + // v v v v + // +-----------+ +-----------+ + // | [null] | | [null] | + // +-----------+ +-----------+ + // | [empty] | | tick | + // | [empty] | | tick | + // | tick | <-- bottom top --> | [empty] | + // | tick | | [empty] | + // | [empty] | <-- top bottom --> | tick | + // | [empty] | | tick | + // +-----------+ +-----------+ + // + // Adding a value means moving `top` forward by one, removing means + // moving `bottom` forward by one. + // + // We let `bottom` and `top` wrap around, so when `top` is conceptually + // pointing to the end of the list, that means that the actual value is `0`. + // + // In particular, when `top === bottom`, this can mean *either* that the + // current queue is empty or that it is full. We can differentiate by + // checking whether an entry in the queue is empty (a.k.a. `=== undefined`). + class FixedQueue { constructor() { this.bottom = 0; @@ -50,11 +100,12 @@ function setupNextTick() { } shift() { - const next = this.list[this.bottom]; - if (next === undefined) return null; + const nextItem = this.list[this.bottom]; + if (nextItem === undefined) + return null; this.list[this.bottom] = undefined; this.bottom = (this.bottom + 1) & kQueueMask; - return next; + return nextItem; } } @@ -63,21 +114,34 @@ function setupNextTick() { function push(data) { if (head.bottom === head.top) { - if (head.list[head.top] !== undefined) + // Either empty or full: + if (head.list[head.top] !== undefined) { + // It's full: Creates a new queue, sets the old queue's `.next` to it, + // and sets it as the new main queue. head = head.next = new FixedQueue(); - else + } else { + // If the head is empty, that means that it was the only fixed-sized + // queue in existence. + DCHECK_EQ(head.next, null); + // This is the first tick object in existence, so we need to inform + // the C++ side that we do want to run `_tickCallback()`. tickInfo[kHasScheduled] = 1; + } } head.push(data); } function shift() { const next = tail.shift(); - if (tail.top === tail.bottom) { - if (tail.next) + if (tail.top === tail.bottom) { // -> .shift() emptied the current queue. + if (tail.next !== null) { + // If there is another queue, it forms the new tail. tail = tail.next; - else + } else { + // We've just run out of items. Let the native side know that it + // doesn't need to bother calling into JS to run the queue. tickInfo[kHasScheduled] = 0; + } } return next; } diff --git a/lib/internal/url.js b/lib/internal/url.js index cae4037b6e7792..d79d9b13bd058a 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -805,7 +805,7 @@ const noEscape = [ const paramHexTable = hexTable.slice(); paramHexTable[0x20] = '+'; -function escapeParam(str) { +function encodeStr(str, noEscapeTable, hexTable) { const len = str.length; if (len === 0) return ''; @@ -818,12 +818,12 @@ function escapeParam(str) { // ASCII if (c < 0x80) { - if (noEscape[c] === 1) + if (noEscapeTable[c] === 1) continue; if (lastPos < i) out += str.slice(lastPos, i); lastPos = i + 1; - out += paramHexTable[c]; + out += hexTable[c]; continue; } @@ -833,15 +833,15 @@ function escapeParam(str) { // Multi-byte characters ... if (c < 0x800) { lastPos = i + 1; - out += paramHexTable[0xC0 | (c >> 6)] + - paramHexTable[0x80 | (c & 0x3F)]; + out += hexTable[0xC0 | (c >> 6)] + + hexTable[0x80 | (c & 0x3F)]; continue; } if (c < 0xD800 || c >= 0xE000) { lastPos = i + 1; - out += paramHexTable[0xE0 | (c >> 12)] + - paramHexTable[0x80 | ((c >> 6) & 0x3F)] + - paramHexTable[0x80 | (c & 0x3F)]; + out += hexTable[0xE0 | (c >> 12)] + + hexTable[0x80 | ((c >> 6) & 0x3F)] + + hexTable[0x80 | (c & 0x3F)]; continue; } // Surrogate pair @@ -857,10 +857,10 @@ function escapeParam(str) { } lastPos = i + 1; c = 0x10000 + (((c & 0x3FF) << 10) | c2); - out += paramHexTable[0xF0 | (c >> 18)] + - paramHexTable[0x80 | ((c >> 12) & 0x3F)] + - paramHexTable[0x80 | ((c >> 6) & 0x3F)] + - paramHexTable[0x80 | (c & 0x3F)]; + out += hexTable[0xF0 | (c >> 18)] + + hexTable[0x80 | ((c >> 12) & 0x3F)] + + hexTable[0x80 | ((c >> 6) & 0x3F)] + + hexTable[0x80 | (c & 0x3F)]; } if (lastPos === 0) return str; @@ -876,9 +876,16 @@ function serializeParams(array) { if (len === 0) return ''; - var output = `${escapeParam(array[0])}=${escapeParam(array[1])}`; - for (var i = 2; i < len; i += 2) - output += `&${escapeParam(array[i])}=${escapeParam(array[i + 1])}`; + const firstEncodedParam = encodeStr(array[0], noEscape, paramHexTable); + const firstEncodedValue = encodeStr(array[1], noEscape, paramHexTable); + let output = `${firstEncodedParam}=${firstEncodedValue}`; + + for (var i = 2; i < len; i += 2) { + const encodedParam = encodeStr(array[i], noEscape, paramHexTable); + const encodedValue = encodeStr(array[i + 1], noEscape, paramHexTable); + output += `&${encodedParam}=${encodedValue}`; + } + return output; } @@ -1422,5 +1429,6 @@ module.exports = { domainToUnicode, urlToOptions, formatSymbol: kFormat, - searchParamsSymbol: searchParams + searchParamsSymbol: searchParams, + encodeStr }; diff --git a/lib/internal/vm/Module.js b/lib/internal/vm/Module.js index a8fb7303aec131..f0fa55f39b8103 100644 --- a/lib/internal/vm/Module.js +++ b/lib/internal/vm/Module.js @@ -1,5 +1,6 @@ 'use strict'; +const { internalBinding } = require('internal/bootstrap/loaders'); const { emitExperimentalWarning } = require('internal/util'); const { URL } = require('internal/url'); const { kParsingContext, isContext } = process.binding('contextify'); diff --git a/lib/module.js b/lib/module.js index 877cca590f077b..962f18b054cc90 100644 --- a/lib/module.js +++ b/lib/module.js @@ -1,756 +1,3 @@ -// Copyright Joyent, Inc. and other Node contributors. -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the -// "Software"), to deal in the Software without restriction, including -// without limitation the rights to use, copy, modify, merge, publish, -// distribute, sublicense, and/or sell copies of the Software, and to permit -// persons to whom the Software is furnished to do so, subject to the -// following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN -// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE -// USE OR OTHER DEALINGS IN THE SOFTWARE. - 'use strict'; -const NativeModule = require('native_module'); -const util = require('util'); -const { decorateErrorStack } = require('internal/util'); -const { getURLFromFilePath } = require('internal/url'); -const vm = require('vm'); -const assert = require('assert').ok; -const fs = require('fs'); -const internalFS = require('internal/fs'); -const path = require('path'); -const { - internalModuleReadFile, - internalModuleStat -} = process.binding('fs'); -const internalModule = require('internal/module'); -const preserveSymlinks = !!process.binding('config').preserveSymlinks; -const experimentalModules = !!process.binding('config').experimentalModules; - -const errors = require('internal/errors'); - -module.exports = Module; - -// these are below module.exports for the circular reference -const internalESModule = require('internal/process/modules'); -const ModuleJob = require('internal/loader/ModuleJob'); -const createDynamicModule = require('internal/loader/CreateDynamicModule'); - -function stat(filename) { - filename = path.toNamespacedPath(filename); - const cache = stat.cache; - if (cache !== null) { - const result = cache.get(filename); - if (result !== undefined) return result; - } - const result = internalModuleStat(filename); - if (cache !== null) cache.set(filename, result); - return result; -} -stat.cache = null; - -function updateChildren(parent, child, scan) { - var children = parent && parent.children; - if (children && !(scan && children.includes(child))) - children.push(child); -} - -function Module(id, parent) { - this.id = id; - this.exports = {}; - this.parent = parent; - updateChildren(parent, this, false); - this.filename = null; - this.loaded = false; - this.children = []; -} - -const builtinModules = Object.keys(NativeModule._source) - .filter(NativeModule.nonInternalExists); - -Object.freeze(builtinModules); -Module.builtinModules = builtinModules; - -Module._cache = Object.create(null); -Module._pathCache = Object.create(null); -Module._extensions = Object.create(null); -var modulePaths = []; -Module.globalPaths = []; - -Module.wrap = function(script) { - return Module.wrapper[0] + script + Module.wrapper[1]; -}; - -Module.wrapper = [ - '(function (exports, require, module, __filename, __dirname) { ', - '\n});' -]; - -const debug = util.debuglog('module'); - -Module._debug = util.deprecate(debug, 'Module._debug is deprecated.', - 'DEP0077'); - -// given a module name, and a list of paths to test, returns the first -// matching file in the following precedence. -// -// require("a.") -// -> a. -// -// require("a") -// -> a -// -> a. -// -> a/index. - -// check if the directory is a package.json dir -const packageMainCache = Object.create(null); - -function readPackage(requestPath) { - const entry = packageMainCache[requestPath]; - if (entry) - return entry; - - const jsonPath = path.resolve(requestPath, 'package.json'); - const json = internalModuleReadFile(path.toNamespacedPath(jsonPath)); - - if (json === undefined) { - return false; - } - - try { - var pkg = packageMainCache[requestPath] = JSON.parse(json).main; - } catch (e) { - e.path = jsonPath; - e.message = 'Error parsing ' + jsonPath + ': ' + e.message; - throw e; - } - return pkg; -} - -function tryPackage(requestPath, exts, isMain) { - var pkg = readPackage(requestPath); - - if (!pkg) return false; - - var filename = path.resolve(requestPath, pkg); - return tryFile(filename, isMain) || - tryExtensions(filename, exts, isMain) || - tryExtensions(path.resolve(filename, 'index'), exts, isMain); -} - -// In order to minimize unnecessary lstat() calls, -// this cache is a list of known-real paths. -// Set to an empty Map to reset. -const realpathCache = new Map(); - -// check if the file exists and is not a directory -// if using --preserve-symlinks and isMain is false, -// keep symlinks intact, otherwise resolve to the -// absolute realpath. -function tryFile(requestPath, isMain) { - const rc = stat(requestPath); - if (preserveSymlinks && !isMain) { - return rc === 0 && path.resolve(requestPath); - } - return rc === 0 && toRealPath(requestPath); -} - -function toRealPath(requestPath) { - return fs.realpathSync(requestPath, { - [internalFS.realpathCacheKey]: realpathCache - }); -} - -// given a path, check if the file exists with any of the set extensions -function tryExtensions(p, exts, isMain) { - for (var i = 0; i < exts.length; i++) { - const filename = tryFile(p + exts[i], isMain); - - if (filename) { - return filename; - } - } - return false; -} - -var warned = false; -Module._findPath = function(request, paths, isMain) { - if (path.isAbsolute(request)) { - paths = ['']; - } else if (!paths || paths.length === 0) { - return false; - } - - var cacheKey = request + '\x00' + - (paths.length === 1 ? paths[0] : paths.join('\x00')); - var entry = Module._pathCache[cacheKey]; - if (entry) - return entry; - - var exts; - var trailingSlash = request.length > 0 && - request.charCodeAt(request.length - 1) === 47/*/*/; - - // For each path - for (var i = 0; i < paths.length; i++) { - // Don't search further if path doesn't exist - const curPath = paths[i]; - if (curPath && stat(curPath) < 1) continue; - var basePath = path.resolve(curPath, request); - var filename; - - var rc = stat(basePath); - if (!trailingSlash) { - if (rc === 0) { // File. - if (preserveSymlinks && !isMain) { - filename = path.resolve(basePath); - } else { - filename = toRealPath(basePath); - } - } else if (rc === 1) { // Directory. - if (exts === undefined) - exts = Object.keys(Module._extensions); - filename = tryPackage(basePath, exts, isMain); - } - - if (!filename) { - // try it with each of the extensions - if (exts === undefined) - exts = Object.keys(Module._extensions); - filename = tryExtensions(basePath, exts, isMain); - } - } - - if (!filename && rc === 1) { // Directory. - if (exts === undefined) - exts = Object.keys(Module._extensions); - filename = tryPackage(basePath, exts, isMain) || - // try it with each of the extensions at "index" - tryExtensions(path.resolve(basePath, 'index'), exts, isMain); - } - - if (filename) { - // Warn once if '.' resolved outside the module dir - if (request === '.' && i > 0) { - if (!warned) { - warned = true; - process.emitWarning( - 'warning: require(\'.\') resolved outside the package ' + - 'directory. This functionality is deprecated and will be removed ' + - 'soon.', - 'DeprecationWarning', 'DEP0019'); - } - } - - Module._pathCache[cacheKey] = filename; - return filename; - } - } - return false; -}; - -// 'node_modules' character codes reversed -var nmChars = [ 115, 101, 108, 117, 100, 111, 109, 95, 101, 100, 111, 110 ]; -var nmLen = nmChars.length; -if (process.platform === 'win32') { - // 'from' is the __dirname of the module. - Module._nodeModulePaths = function(from) { - // guarantee that 'from' is absolute. - from = path.resolve(from); - - // note: this approach *only* works when the path is guaranteed - // to be absolute. Doing a fully-edge-case-correct path.split - // that works on both Windows and Posix is non-trivial. - - // return root node_modules when path is 'D:\\'. - // path.resolve will make sure from.length >=3 in Windows. - if (from.charCodeAt(from.length - 1) === 92/*\*/ && - from.charCodeAt(from.length - 2) === 58/*:*/) - return [from + 'node_modules']; - - const paths = []; - var p = 0; - var last = from.length; - for (var i = from.length - 1; i >= 0; --i) { - const code = from.charCodeAt(i); - // The path segment separator check ('\' and '/') was used to get - // node_modules path for every path segment. - // Use colon as an extra condition since we can get node_modules - // path for drive root like 'C:\node_modules' and don't need to - // parse drive name. - if (code === 92/*\*/ || code === 47/*/*/ || code === 58/*:*/) { - if (p !== nmLen) - paths.push(from.slice(0, last) + '\\node_modules'); - last = i; - p = 0; - } else if (p !== -1) { - if (nmChars[p] === code) { - ++p; - } else { - p = -1; - } - } - } - - return paths; - }; -} else { // posix - // 'from' is the __dirname of the module. - Module._nodeModulePaths = function(from) { - // guarantee that 'from' is absolute. - from = path.resolve(from); - // Return early not only to avoid unnecessary work, but to *avoid* returning - // an array of two items for a root: [ '//node_modules', '/node_modules' ] - if (from === '/') - return ['/node_modules']; - - // note: this approach *only* works when the path is guaranteed - // to be absolute. Doing a fully-edge-case-correct path.split - // that works on both Windows and Posix is non-trivial. - const paths = []; - var p = 0; - var last = from.length; - for (var i = from.length - 1; i >= 0; --i) { - const code = from.charCodeAt(i); - if (code === 47/*/*/) { - if (p !== nmLen) - paths.push(from.slice(0, last) + '/node_modules'); - last = i; - p = 0; - } else if (p !== -1) { - if (nmChars[p] === code) { - ++p; - } else { - p = -1; - } - } - } - - // Append /node_modules to handle root paths. - paths.push('/node_modules'); - - return paths; - }; -} - - -// 'index.' character codes -var indexChars = [ 105, 110, 100, 101, 120, 46 ]; -var indexLen = indexChars.length; -Module._resolveLookupPaths = function(request, parent, newReturn) { - if (NativeModule.nonInternalExists(request)) { - debug('looking for %j in []', request); - return (newReturn ? null : [request, []]); - } - - // Check for relative path - if (request.length < 2 || - request.charCodeAt(0) !== 46/*.*/ || - (request.charCodeAt(1) !== 46/*.*/ && - request.charCodeAt(1) !== 47/*/*/)) { - var paths = modulePaths; - if (parent) { - if (!parent.paths) - paths = parent.paths = []; - else - paths = parent.paths.concat(paths); - } - - // Maintain backwards compat with certain broken uses of require('.') - // by putting the module's directory in front of the lookup paths. - if (request === '.') { - if (parent && parent.filename) { - paths.unshift(path.dirname(parent.filename)); - } else { - paths.unshift(path.resolve(request)); - } - } - - debug('looking for %j in %j', request, paths); - return (newReturn ? (paths.length > 0 ? paths : null) : [request, paths]); - } - - // with --eval, parent.id is not set and parent.filename is null - if (!parent || !parent.id || !parent.filename) { - // make require('./path/to/foo') work - normally the path is taken - // from realpath(__filename) but with eval there is no filename - var mainPaths = ['.'].concat(Module._nodeModulePaths('.'), modulePaths); - - debug('looking for %j in %j', request, mainPaths); - return (newReturn ? mainPaths : [request, mainPaths]); - } - - // Is the parent an index module? - // We can assume the parent has a valid extension, - // as it already has been accepted as a module. - const base = path.basename(parent.filename); - var parentIdPath; - if (base.length > indexLen) { - var i = 0; - for (; i < indexLen; ++i) { - if (indexChars[i] !== base.charCodeAt(i)) - break; - } - if (i === indexLen) { - // We matched 'index.', let's validate the rest - for (; i < base.length; ++i) { - const code = base.charCodeAt(i); - if (code !== 95/*_*/ && - (code < 48/*0*/ || code > 57/*9*/) && - (code < 65/*A*/ || code > 90/*Z*/) && - (code < 97/*a*/ || code > 122/*z*/)) - break; - } - if (i === base.length) { - // Is an index module - parentIdPath = parent.id; - } else { - // Not an index module - parentIdPath = path.dirname(parent.id); - } - } else { - // Not an index module - parentIdPath = path.dirname(parent.id); - } - } else { - // Not an index module - parentIdPath = path.dirname(parent.id); - } - var id = path.resolve(parentIdPath, request); - - // make sure require('./path') and require('path') get distinct ids, even - // when called from the toplevel js file - if (parentIdPath === '.' && id.indexOf('/') === -1) { - id = './' + id; - } - - debug('RELATIVE: requested: %s set ID to: %s from %s', request, id, - parent.id); - - var parentDir = [path.dirname(parent.filename)]; - debug('looking for %j in %j', id, parentDir); - return (newReturn ? parentDir : [id, parentDir]); -}; - -// Check the cache for the requested file. -// 1. If a module already exists in the cache: return its exports object. -// 2. If the module is native: call `NativeModule.require()` with the -// filename and return the result. -// 3. Otherwise, create a new module for the file and save it to the cache. -// Then have it load the file contents before returning its exports -// object. -Module._load = function(request, parent, isMain) { - if (parent) { - debug('Module._load REQUEST %s parent: %s', request, parent.id); - } - - if (experimentalModules && isMain) { - internalESModule.loaderPromise.then((loader) => { - return loader.import(getURLFromFilePath(request).pathname); - }) - .catch((e) => { - decorateErrorStack(e); - console.error(e); - process.exit(1); - }); - return; - } - - var filename = Module._resolveFilename(request, parent, isMain); - - var cachedModule = Module._cache[filename]; - if (cachedModule) { - updateChildren(parent, cachedModule, true); - return cachedModule.exports; - } - - if (NativeModule.nonInternalExists(filename)) { - debug('load native module %s', request); - return NativeModule.require(filename); - } - - // Don't call updateChildren(), Module constructor already does. - var module = new Module(filename, parent); - - if (isMain) { - process.mainModule = module; - module.id = '.'; - } - - Module._cache[filename] = module; - - tryModuleLoad(module, filename); - - return module.exports; -}; - -function tryModuleLoad(module, filename) { - var threw = true; - try { - module.load(filename); - threw = false; - } finally { - if (threw) { - delete Module._cache[filename]; - } - } -} - -Module._resolveFilename = function(request, parent, isMain, options) { - if (NativeModule.nonInternalExists(request)) { - return request; - } - - var paths; - - if (typeof options === 'object' && options !== null && - Array.isArray(options.paths)) { - const fakeParent = new Module('', null); - - paths = []; - - for (var i = 0; i < options.paths.length; i++) { - const path = options.paths[i]; - fakeParent.paths = Module._nodeModulePaths(path); - const lookupPaths = Module._resolveLookupPaths(request, fakeParent, true); - - if (!paths.includes(path)) - paths.push(path); - - for (var j = 0; j < lookupPaths.length; j++) { - if (!paths.includes(lookupPaths[j])) - paths.push(lookupPaths[j]); - } - } - } else { - paths = Module._resolveLookupPaths(request, parent, true); - } - - // look up the filename first, since that's the cache key. - var filename = Module._findPath(request, paths, isMain); - if (!filename) { - var err = new Error(`Cannot find module '${request}'`); - err.code = 'MODULE_NOT_FOUND'; - throw err; - } - return filename; -}; - - -// Given a file name, pass it to the proper extension handler. -Module.prototype.load = function(filename) { - debug('load %j for module %j', filename, this.id); - - assert(!this.loaded); - this.filename = filename; - this.paths = Module._nodeModulePaths(path.dirname(filename)); - - var extension = path.extname(filename) || '.js'; - if (!Module._extensions[extension]) extension = '.js'; - Module._extensions[extension](this, filename); - this.loaded = true; - - if (experimentalModules) { - const ESMLoader = internalESModule.ESMLoader; - const url = getURLFromFilePath(filename); - const urlString = `${url}`; - const exports = this.exports; - if (ESMLoader.moduleMap.has(urlString) !== true) { - ESMLoader.moduleMap.set( - urlString, - new ModuleJob(ESMLoader, url, async () => { - const ctx = createDynamicModule( - ['default'], url); - ctx.reflect.exports.default.set(exports); - return ctx; - }) - ); - } else { - const job = ESMLoader.moduleMap.get(urlString); - if (job.reflect) - job.reflect.exports.default.set(exports); - } - } -}; - - -// Loads a module at the given file path. Returns that module's -// `exports` property. -Module.prototype.require = function(path) { - assert(path, 'missing path'); - assert(typeof path === 'string', 'path must be a string'); - return Module._load(path, this, /* isMain */ false); -}; - - -// Resolved path to process.argv[1] will be lazily placed here -// (needed for setting breakpoint when called with --inspect-brk) -var resolvedArgv; - - -// Run the file contents in the correct scope or sandbox. Expose -// the correct helper variables (require, module, exports) to -// the file. -// Returns exception, if any. -Module.prototype._compile = function(content, filename) { - - content = internalModule.stripShebang(content); - - // create wrapper function - var wrapper = Module.wrap(content); - - var compiledWrapper = vm.runInThisContext(wrapper, { - filename: filename, - lineOffset: 0, - displayErrors: true - }); - - var inspectorWrapper = null; - if (process._breakFirstLine && process._eval == null) { - if (!resolvedArgv) { - // we enter the repl if we're not given a filename argument. - if (process.argv[1]) { - resolvedArgv = Module._resolveFilename(process.argv[1], null, false); - } else { - resolvedArgv = 'repl'; - } - } - - // Set breakpoint on module start - if (filename === resolvedArgv) { - delete process._breakFirstLine; - inspectorWrapper = process.binding('inspector').callAndPauseOnStart; - if (!inspectorWrapper) { - const Debug = vm.runInDebugContext('Debug'); - Debug.setBreakPoint(compiledWrapper, 0, 0); - } - } - } - var dirname = path.dirname(filename); - var require = internalModule.makeRequireFunction(this); - var depth = internalModule.requireDepth; - if (depth === 0) stat.cache = new Map(); - var result; - if (inspectorWrapper) { - result = inspectorWrapper(compiledWrapper, this.exports, this.exports, - require, this, filename, dirname); - } else { - result = compiledWrapper.call(this.exports, this.exports, require, this, - filename, dirname); - } - if (depth === 0) stat.cache = null; - return result; -}; - - -// Native extension for .js -Module._extensions['.js'] = function(module, filename) { - var content = fs.readFileSync(filename, 'utf8'); - module._compile(internalModule.stripBOM(content), filename); -}; - - -// Native extension for .json -Module._extensions['.json'] = function(module, filename) { - var content = fs.readFileSync(filename, 'utf8'); - try { - module.exports = JSON.parse(internalModule.stripBOM(content)); - } catch (err) { - err.message = filename + ': ' + err.message; - throw err; - } -}; - - -//Native extension for .node -Module._extensions['.node'] = function(module, filename) { - return process.dlopen(module, path.toNamespacedPath(filename)); -}; - -if (experimentalModules) { - Module._extensions['.mjs'] = function(module, filename) { - throw new errors.Error('ERR_REQUIRE_ESM', filename); - }; -} - -// bootstrap main module. -Module.runMain = function() { - // Load the main module--the command line argument. - Module._load(process.argv[1], null, true); - // Handle any nextTicks added in the first tick of the program - process._tickCallback(); -}; - -Module._initPaths = function() { - const isWindows = process.platform === 'win32'; - - var homeDir; - if (isWindows) { - homeDir = process.env.USERPROFILE; - } else { - homeDir = process.env.HOME; - } - - // $PREFIX/lib/node, where $PREFIX is the root of the Node.js installation. - var prefixDir; - // process.execPath is $PREFIX/bin/node except on Windows where it is - // $PREFIX\node.exe. - if (isWindows) { - prefixDir = path.resolve(process.execPath, '..'); - } else { - prefixDir = path.resolve(process.execPath, '..', '..'); - } - var paths = [path.resolve(prefixDir, 'lib', 'node')]; - - if (homeDir) { - paths.unshift(path.resolve(homeDir, '.node_libraries')); - paths.unshift(path.resolve(homeDir, '.node_modules')); - } - - var nodePath = process.env.NODE_PATH; - if (nodePath) { - paths = nodePath.split(path.delimiter).filter(function(path) { - return !!path; - }).concat(paths); - } - - modulePaths = paths; - - // clone as a shallow copy, for introspection. - Module.globalPaths = modulePaths.slice(0); -}; - -Module._preloadModules = function(requests) { - if (!Array.isArray(requests)) - return; - - // Preloaded modules have a dummy parent module which is deemed to exist - // in the current working directory. This seeds the search path for - // preloaded modules. - var parent = new Module('internal/preload', null); - try { - parent.paths = Module._nodeModulePaths(process.cwd()); - } catch (e) { - if (e.code !== 'ENOENT') { - throw e; - } - } - for (var n = 0; n < requests.length; n++) - parent.require(requests[n]); -}; - -Module._initPaths(); - -// backwards compatibility -Module.Module = Module; +module.exports = require('internal/modules/cjs/loader'); diff --git a/lib/net.js b/lib/net.js index 17a128a4f0c8db..9b22628ebc7e7c 100644 --- a/lib/net.js +++ b/lib/net.js @@ -1158,6 +1158,7 @@ function afterConnect(status, handle, req, readable, writable) { self._unrefTimer(); self.emit('connect'); + self.emit('ready'); // start the first read, or get an immediate EOF. // this doesn't actually consume any bytes, because len=0. diff --git a/lib/repl.js b/lib/repl.js index 5779e849f06017..83d6b59a721061 100644 --- a/lib/repl.js +++ b/lib/repl.js @@ -42,7 +42,11 @@ 'use strict'; -const internalModule = require('internal/module'); +const { + builtinLibs, + makeRequireFunction, + addBuiltinLibsToObject +} = require('internal/modules/cjs/helpers'); const internalUtil = require('internal/util'); const { isTypedArray } = require('internal/util/types'); const util = require('util'); @@ -54,7 +58,7 @@ const path = require('path'); const fs = require('fs'); const { Interface } = require('readline'); const { Console } = require('console'); -const Module = require('module'); +const CJSModule = require('internal/modules/cjs/loader'); const domain = require('domain'); const debug = util.debuglog('repl'); const errors = require('internal/errors'); @@ -90,7 +94,7 @@ try { } // Hack for repl require to work properly with node_modules folders -module.paths = Module._nodeModulePaths(module.filename); +module.paths = CJSModule._nodeModulePaths(module.filename); // If obj.hasOwnProperty has been overridden, then calling // obj.hasOwnProperty(prop) will break. @@ -103,7 +107,7 @@ function hasOwnProperty(obj, prop) { // This is the default "writer" value if none is passed in the REPL options. exports.writer = util.inspect; -exports._builtinLibs = internalModule.builtinLibs; +exports._builtinLibs = builtinLibs; function REPLServer(prompt, stream, @@ -673,14 +677,15 @@ REPLServer.prototype.createContext = function() { } } - var module = new Module(''); - module.paths = Module._resolveLookupPaths('', parentModule, true) || []; + var module = new CJSModule(''); + module.paths = + CJSModule._resolveLookupPaths('', parentModule, true) || []; - var require = internalModule.makeRequireFunction(module); + var require = makeRequireFunction(module); context.module = module; context.require = require; - internalModule.addBuiltinLibsToObject(context); + addBuiltinLibsToObject(context); return context; }; @@ -889,7 +894,7 @@ function complete(line, callback) { } else if (/^\.\.?\//.test(completeOn)) { paths = [process.cwd()]; } else { - paths = module.paths.concat(Module.globalPaths); + paths = module.paths.concat(CJSModule.globalPaths); } for (i = 0; i < paths.length; i++) { diff --git a/lib/timers.js b/lib/timers.js index c82d6cfdbb11e9..4d3f655a1a1271 100644 --- a/lib/timers.js +++ b/lib/timers.js @@ -73,7 +73,7 @@ const TIMEOUT_MAX = 2 ** 31 - 1; // Therefore, it is very important that the timers implementation is performant // and efficient. // -// Note: It is suggested you first read though the lib/internal/linkedlist.js +// Note: It is suggested you first read through the lib/internal/linkedlist.js // linked list implementation, since timers depend on it extensively. It can be // somewhat counter-intuitive at first, as it is not actually a class. Instead, // it is a set of helpers that operate on an existing object. diff --git a/lib/url.js b/lib/url.js index df9d917a479aba..9ada211cae7b9f 100644 --- a/lib/url.js +++ b/lib/url.js @@ -36,7 +36,8 @@ const { URLSearchParams, domainToASCII, domainToUnicode, - formatSymbol + formatSymbol, + encodeStr, } = require('internal/url'); // Original url.parse() API @@ -541,10 +542,27 @@ function urlFormat(urlObject, options) { return urlObject.format(); } +// These characters do not need escaping: +// ! - . _ ~ +// ' ( ) * : +// digits +// alpha (uppercase) +// alpha (lowercase) +const noEscapeAuth = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1F + 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, // 0x20 - 0x2F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, // 0x30 - 0x3F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 - 0x4F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 0x50 - 0x5F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 - 0x6F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 // 0x70 - 0x7F +]; + Url.prototype.format = function format() { var auth = this.auth || ''; if (auth) { - auth = encodeAuth(auth); + auth = encodeStr(auth, noEscapeAuth, hexTable); auth += '@'; } @@ -929,78 +947,6 @@ Url.prototype.parseHost = function parseHost() { if (host) this.hostname = host; }; -// These characters do not need escaping: -// ! - . _ ~ -// ' ( ) * : -// digits -// alpha (uppercase) -// alpha (lowercase) -const noEscapeAuth = [ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0F - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1F - 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, // 0x20 - 0x2F - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, // 0x30 - 0x3F - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 - 0x4F - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 0x50 - 0x5F - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 - 0x6F - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 // 0x70 - 0x7F -]; - -function encodeAuth(str) { - // faster encodeURIComponent alternative for encoding auth uri components - var out = ''; - var lastPos = 0; - for (var i = 0; i < str.length; ++i) { - var c = str.charCodeAt(i); - - // ASCII - if (c < 0x80) { - if (noEscapeAuth[c] === 1) - continue; - if (lastPos < i) - out += str.slice(lastPos, i); - lastPos = i + 1; - out += hexTable[c]; - continue; - } - - if (lastPos < i) - out += str.slice(lastPos, i); - - // Multi-byte characters ... - if (c < 0x800) { - lastPos = i + 1; - out += hexTable[0xC0 | (c >> 6)] + hexTable[0x80 | (c & 0x3F)]; - continue; - } - if (c < 0xD800 || c >= 0xE000) { - lastPos = i + 1; - out += hexTable[0xE0 | (c >> 12)] + - hexTable[0x80 | ((c >> 6) & 0x3F)] + - hexTable[0x80 | (c & 0x3F)]; - continue; - } - // Surrogate pair - ++i; - var c2; - if (i < str.length) - c2 = str.charCodeAt(i) & 0x3FF; - else - c2 = 0; - lastPos = i + 1; - c = 0x10000 + (((c & 0x3FF) << 10) | c2); - out += hexTable[0xF0 | (c >> 18)] + - hexTable[0x80 | ((c >> 12) & 0x3F)] + - hexTable[0x80 | ((c >> 6) & 0x3F)] + - hexTable[0x80 | (c & 0x3F)]; - } - if (lastPos === 0) - return str; - if (lastPos < str.length) - return out + str.slice(lastPos); - return out; -} - module.exports = { // Original API Url, diff --git a/node.gyp b/node.gyp index 1ab7b207e21339..01e7cc3a5e1bfd 100644 --- a/node.gyp +++ b/node.gyp @@ -25,7 +25,8 @@ 'node_lib_target_name%': 'node_lib', 'node_intermediate_lib_type%': 'static_library', 'library_files': [ - 'lib/internal/bootstrap_node.js', + 'lib/internal/bootstrap/loaders.js', + 'lib/internal/bootstrap/node.js', 'lib/async_hooks.js', 'lib/assert.js', 'lib/buffer.js', @@ -104,18 +105,19 @@ 'lib/internal/http.js', 'lib/internal/inspector_async_hook.js', 'lib/internal/linkedlist.js', - 'lib/internal/loader/Loader.js', - 'lib/internal/loader/CreateDynamicModule.js', - 'lib/internal/loader/DefaultResolve.js', - 'lib/internal/loader/ModuleJob.js', - 'lib/internal/loader/ModuleMap.js', - 'lib/internal/loader/ModuleWrap.js', - 'lib/internal/loader/Translators.js', + 'lib/internal/modules/cjs/helpers.js', + 'lib/internal/modules/cjs/loader.js', + 'lib/internal/modules/esm/Loader.js', + 'lib/internal/modules/esm/CreateDynamicModule.js', + 'lib/internal/modules/esm/DefaultResolve.js', + 'lib/internal/modules/esm/ModuleJob.js', + 'lib/internal/modules/esm/ModuleMap.js', + 'lib/internal/modules/esm/ModuleWrap.js', + 'lib/internal/modules/esm/Translators.js', 'lib/internal/safe_globals.js', 'lib/internal/net.js', - 'lib/internal/module.js', 'lib/internal/os.js', - 'lib/internal/process/modules.js', + 'lib/internal/process/esm_loader.js', 'lib/internal/process/next_tick.js', 'lib/internal/process/promises.js', 'lib/internal/process/stdio.js', @@ -702,6 +704,7 @@ 'inputs': [ '<@(library_files)', './config.gypi', + 'tools/check_macros.py' ], 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/node_javascript.cc', @@ -715,6 +718,12 @@ }], [ 'node_use_perfctr=="false"', { 'inputs': [ 'src/noperfctr_macros.py' ] + }], + [ 'node_debug_lib=="false"', { + 'inputs': [ 'tools/nodcheck_macros.py' ] + }], + [ 'node_debug_lib=="true"', { + 'inputs': [ 'tools/dcheck_macros.py' ] }] ], 'action': [ diff --git a/src/aliased_buffer.h b/src/aliased_buffer.h index 8b103f4949030c..10659f6d529f02 100644 --- a/src/aliased_buffer.h +++ b/src/aliased_buffer.h @@ -126,13 +126,11 @@ class AliasedBuffer { index_(that.index_) { } - template - inline Reference& operator=(const T& val) { + inline Reference& operator=(const NativeT& val) { aliased_buffer_->SetValue(index_, val); return *this; } - // This is not caught by the template operator= above. inline Reference& operator=(const Reference& val) { return *this = static_cast(val); } @@ -141,9 +139,8 @@ class AliasedBuffer { return aliased_buffer_->GetValue(index_); } - template - inline Reference& operator+=(const T& val) { - const T current = aliased_buffer_->GetValue(index_); + inline Reference& operator+=(const NativeT& val) { + const NativeT current = aliased_buffer_->GetValue(index_); aliased_buffer_->SetValue(index_, current + val); return *this; } @@ -152,9 +149,10 @@ class AliasedBuffer { return this->operator+=(static_cast(val)); } - template - inline Reference& operator-=(const T& val) { - return this->operator+=(-val); + inline Reference& operator-=(const NativeT& val) { + const NativeT current = aliased_buffer_->GetValue(index_); + aliased_buffer_->SetValue(index_, current - val); + return *this; } private: diff --git a/src/env.h b/src/env.h index 6459eb90a965e1..8e0cf61a4a57ab 100644 --- a/src/env.h +++ b/src/env.h @@ -25,7 +25,6 @@ #if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS #include "aliased_buffer.h" -#include "ares.h" #if HAVE_INSPECTOR #include "inspector_agent.h" #endif @@ -171,11 +170,13 @@ struct PackageConfig { V(fingerprint_string, "fingerprint") \ V(fingerprint256_string, "fingerprint256") \ V(flags_string, "flags") \ + V(fragment_string, "fragment") \ V(get_data_clone_error_string, "_getDataCloneError") \ V(get_shared_array_buffer_id_string, "_getSharedArrayBufferId") \ V(gid_string, "gid") \ V(handle_string, "handle") \ V(homedir_string, "homedir") \ + V(host_string, "host") \ V(hostmaster_string, "hostmaster") \ V(ignore_string, "ignore") \ V(infoaccess_string, "infoAccess") \ @@ -233,6 +234,7 @@ struct PackageConfig { V(order_string, "order") \ V(owner_string, "owner") \ V(parse_error_string, "Parse Error") \ + V(password_string, "password") \ V(path_string, "path") \ V(pending_handle_string, "pendingHandle") \ V(pbkdf2_error_string, "PBKDF2 Error") \ @@ -243,6 +245,7 @@ struct PackageConfig { V(priority_string, "priority") \ V(produce_cached_data_string, "produceCachedData") \ V(pubkey_string, "pubkey") \ + V(query_string, "query") \ V(raw_string, "raw") \ V(read_host_object_string, "_readHostObject") \ V(readable_string, "readable") \ @@ -252,6 +255,7 @@ struct PackageConfig { V(rename_string, "rename") \ V(replacement_string, "replacement") \ V(retry_string, "retry") \ + V(scheme_string, "scheme") \ V(serial_string, "serial") \ V(scopeid_string, "scopeid") \ V(sent_shutdown_string, "sentShutdown") \ diff --git a/src/inspector_js_api.cc b/src/inspector_js_api.cc index 1cced9420aea6c..35d17413762364 100644 --- a/src/inspector_js_api.cc +++ b/src/inspector_js_api.cc @@ -232,7 +232,7 @@ static void AsyncTaskScheduledWrapper(const FunctionCallbackInfo& args) { CHECK(args[0]->IsString()); Local task_name = args[0].As(); - String::Value task_name_value(task_name); + String::Value task_name_value(args.GetIsolate(), task_name); StringView task_name_view(*task_name_value, task_name_value.length()); CHECK(args[1]->IsNumber()); @@ -298,8 +298,8 @@ void Url(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(OneByteString(env->isolate(), url.c_str())); } -void InitInspectorBindings(Local target, Local unused, - Local context, void* priv) { +void Initialize(Local target, Local unused, + Local context, void* priv) { Environment* env = Environment::GetCurrent(context); { auto obj = Object::New(env->isolate()); @@ -343,4 +343,4 @@ void InitInspectorBindings(Local target, Local unused, } // namespace node NODE_BUILTIN_MODULE_CONTEXT_AWARE(inspector, - node::inspector::InitInspectorBindings); + node::inspector::Initialize); diff --git a/src/inspector_socket.cc b/src/inspector_socket.cc index b3a810d99c31ed..fa46c45decdd8c 100644 --- a/src/inspector_socket.cc +++ b/src/inspector_socket.cc @@ -495,12 +495,12 @@ class HttpHandler : public ProtocolHandler { CancelHandshake(); return; } else if (!event.upgrade) { - delegate()->OnHttpGet(event.path); + delegate()->OnHttpGet(event.host, event.path); } else if (event.ws_key.empty()) { CancelHandshake(); return; } else { - delegate()->OnSocketUpgrade(event.path, event.ws_key); + delegate()->OnSocketUpgrade(event.host, event.path, event.ws_key); } } } diff --git a/src/inspector_socket.h b/src/inspector_socket.h index 1a3411435ee2c5..81b894f95cb88f 100644 --- a/src/inspector_socket.h +++ b/src/inspector_socket.h @@ -17,8 +17,10 @@ class InspectorSocket { public: class Delegate { public: - virtual void OnHttpGet(const std::string& path) = 0; - virtual void OnSocketUpgrade(const std::string& path, + virtual void OnHttpGet(const std::string& host, + const std::string& path) = 0; + virtual void OnSocketUpgrade(const std::string& host, + const std::string& path, const std::string& accept_key) = 0; virtual void OnWsFrame(const std::vector& frame) = 0; virtual ~Delegate() {} diff --git a/src/inspector_socket_server.cc b/src/inspector_socket_server.cc index 03e5c60e65641e..edea483cfe0e64 100644 --- a/src/inspector_socket_server.cc +++ b/src/inspector_socket_server.cc @@ -16,12 +16,23 @@ namespace inspector { // depend on inspector_socket_server.h std::string FormatWsAddress(const std::string& host, int port, const std::string& target_id, - bool include_protocol) { + bool include_protocol); +namespace { + +static const uint8_t PROTOCOL_JSON[] = { + #include "v8_inspector_protocol_json.h" // NOLINT(build/include_order) +}; + +void Escape(std::string* string) { + for (char& c : *string) { + c = (c == '\"' || c == '\\') ? '_' : c; + } +} + +std::string FormatHostPort(const std::string& host, int port) { // Host is valid (socket was bound) so colon means it's a v6 IP address bool v6 = host.find(':') != std::string::npos; std::ostringstream url; - if (include_protocol) - url << "ws://"; if (v6) { url << '['; } @@ -29,20 +40,18 @@ std::string FormatWsAddress(const std::string& host, int port, if (v6) { url << ']'; } - url << ':' << port << '/' << target_id; + url << ':' << port; return url.str(); } -namespace { - -static const uint8_t PROTOCOL_JSON[] = { - #include "v8_inspector_protocol_json.h" // NOLINT(build/include_order) -}; - -void Escape(std::string* string) { - for (char& c : *string) { - c = (c == '\"' || c == '\\') ? '_' : c; - } +std::string FormatAddress(const std::string& host, + const std::string& target_id, + bool include_protocol) { + std::ostringstream url; + if (include_protocol) + url << "ws://"; + url << host << '/' << target_id; + return url.str(); } std::string MapToString(const std::map& object) { @@ -141,6 +150,11 @@ void SendProtocolJson(InspectorSocket* socket) { } } // namespace +std::string FormatWsAddress(const std::string& host, int port, + const std::string& target_id, + bool include_protocol) { + return FormatAddress(FormatHostPort(host, port), target_id, include_protocol); +} class Closer { public: @@ -213,8 +227,8 @@ class SocketSession { ~Delegate() { server_->SessionTerminated(session_id_); } - void OnHttpGet(const std::string& path) override; - void OnSocketUpgrade(const std::string& path, + void OnHttpGet(const std::string& host, const std::string& path) override; + void OnSocketUpgrade(const std::string& host, const std::string& path, const std::string& ws_key) override; void OnWsFrame(const std::vector& data) override; @@ -320,6 +334,7 @@ void InspectorSocketServer::SessionTerminated(int session_id) { } bool InspectorSocketServer::HandleGetRequest(int session_id, + const std::string& host, const std::string& path) { SocketSession* session = Session(session_id); InspectorSocket* socket = session->ws_socket(); @@ -328,7 +343,7 @@ bool InspectorSocketServer::HandleGetRequest(int session_id, return false; if (MatchPathSegment(command, "list") || command[0] == '\0') { - SendListResponse(socket, session); + SendListResponse(socket, host, session); return true; } else if (MatchPathSegment(command, "protocol")) { SendProtocolJson(socket); @@ -336,17 +351,12 @@ bool InspectorSocketServer::HandleGetRequest(int session_id, } else if (MatchPathSegment(command, "version")) { SendVersionResponse(socket); return true; - } else if (const char* target_id = MatchPathSegment(command, "activate")) { - if (TargetExists(target_id)) { - SendHttpResponse(socket, "Target activated"); - return true; - } - return false; } return false; } void InspectorSocketServer::SendListResponse(InspectorSocket* socket, + const std::string& host, SocketSession* session) { std::vector> response; for (const std::string& id : delegate_->GetTargetIds()) { @@ -371,15 +381,18 @@ void InspectorSocketServer::SendListResponse(InspectorSocket* socket, } } if (!connected) { - std::string host = socket->GetHost(); - int port = session->server_port(); + std::string detected_host = host; + if (detected_host.empty()) { + detected_host = FormatHostPort(socket->GetHost(), + session->server_port()); + } std::ostringstream frontend_url; frontend_url << "chrome-devtools://devtools/bundled"; frontend_url << "/inspector.html?experiments=true&v8only=true&ws="; - frontend_url << FormatWsAddress(host, port, id, false); + frontend_url << FormatAddress(detected_host, id, false); target_map["devtoolsFrontendUrl"] += frontend_url.str(); target_map["webSocketDebuggerUrl"] = - FormatWsAddress(host, port, id, true); + FormatAddress(detected_host, id, true); } } SendHttpResponse(socket, MapsToString(response)); @@ -531,12 +544,14 @@ void SocketSession::Send(const std::string& message) { ws_socket_->Write(message.data(), message.length()); } -void SocketSession::Delegate::OnHttpGet(const std::string& path) { - if (!server_->HandleGetRequest(session_id_, path)) +void SocketSession::Delegate::OnHttpGet(const std::string& host, + const std::string& path) { + if (!server_->HandleGetRequest(session_id_, host, path)) Session()->ws_socket()->CancelHandshake(); } -void SocketSession::Delegate::OnSocketUpgrade(const std::string& path, +void SocketSession::Delegate::OnSocketUpgrade(const std::string& host, + const std::string& path, const std::string& ws_key) { std::string id = path.empty() ? path : path.substr(1); server_->SessionStarted(session_id_, id, ws_key); diff --git a/src/inspector_socket_server.h b/src/inspector_socket_server.h index b193e33a46d6d3..f6003c4c4b4d70 100644 --- a/src/inspector_socket_server.h +++ b/src/inspector_socket_server.h @@ -67,7 +67,8 @@ class InspectorSocketServer { // Session connection lifecycle void Accept(int server_port, uv_stream_t* server_socket); - bool HandleGetRequest(int session_id, const std::string& path); + bool HandleGetRequest(int session_id, const std::string& host, + const std::string& path); void SessionStarted(int session_id, const std::string& target_id, const std::string& ws_id); void SessionTerminated(int session_id); @@ -77,7 +78,8 @@ class InspectorSocketServer { SocketSession* Session(int session_id); private: - void SendListResponse(InspectorSocket* socket, SocketSession* session); + void SendListResponse(InspectorSocket* socket, const std::string& host, + SocketSession* session); bool TargetExists(const std::string& id); enum class ServerState {kNew, kRunning, kStopping, kStopped}; diff --git a/src/node.cc b/src/node.cc index 35a1d5cb598d5b..737f589eaad01c 100644 --- a/src/node.cc +++ b/src/node.cc @@ -250,7 +250,7 @@ bool config_experimental_vm_modules = false; // Set in node.cc by ParseArgs when --loader is used. // Used in node_config.cc to set a constant on process.binding('config') -// that is used by lib/internal/bootstrap_node.js +// that is used by lib/internal/bootstrap/node.js std::string config_userland_loader; // NOLINT(runtime/string) // Set by ParseArgs when --pending-deprecation or NODE_PENDING_DEPRECATION @@ -263,7 +263,7 @@ std::string config_warning_file; // NOLINT(runtime/string) // Set in node.cc by ParseArgs when --expose-internals or --expose_internals is // used. // Used in node_config.cc to set a constant on process.binding('config') -// that is used by lib/internal/bootstrap_node.js +// that is used by lib/internal/bootstrap/node.js bool config_expose_internals = false; bool v8_initialized = false; @@ -1595,9 +1595,11 @@ void AppendExceptionLine(Environment* env, ScriptOrigin origin = message->GetScriptOrigin(); node::Utf8Value filename(env->isolate(), message->GetScriptResourceName()); const char* filename_string = *filename; - int linenum = message->GetLineNumber(); + int linenum = message->GetLineNumber(env->context()).FromJust(); // Print line of source code. - node::Utf8Value sourceline(env->isolate(), message->GetSourceLine()); + MaybeLocal source_line_maybe = message->GetSourceLine(env->context()); + node::Utf8Value sourceline(env->isolate(), + source_line_maybe.ToLocalChecked()); const char* sourceline_string = *sourceline; if (strstr(sourceline_string, "node-do-not-add-exception-line") != nullptr) return; @@ -1745,7 +1747,7 @@ static void ReportException(Environment* env, name.IsEmpty() || name->IsUndefined()) { // Not an error object. Just print as-is. - String::Utf8Value message(er); + String::Utf8Value message(env->isolate(), er); PrintErrorString("%s\n", *message ? *message : ""); @@ -1797,13 +1799,13 @@ static Local ExecuteString(Environment* env, exit(3); } - Local result = script.ToLocalChecked()->Run(); + MaybeLocal result = script.ToLocalChecked()->Run(env->context()); if (result.IsEmpty()) { ReportException(env, try_catch); exit(4); } - return scope.Escape(result); + return scope.Escape(result.ToLocalChecked()); } @@ -2852,7 +2854,7 @@ static void ThrowIfNoSuchModule(Environment* env, const char* module_v) { env->ThrowError(errmsg); } -static void Binding(const FunctionCallbackInfo& args) { +static void GetBinding(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK(args[0]->IsString()); @@ -2879,7 +2881,7 @@ static void Binding(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(exports); } -static void InternalBinding(const FunctionCallbackInfo& args) { +static void GetInternalBinding(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK(args[0]->IsString()); @@ -2894,7 +2896,7 @@ static void InternalBinding(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(exports); } -static void LinkedBinding(const FunctionCallbackInfo& args) { +static void GetLinkedBinding(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args.GetIsolate()); CHECK(args[0]->IsString()); @@ -3594,10 +3596,6 @@ void SetupProcessObject(Environment* env, env->SetMethod(process, "uptime", Uptime); env->SetMethod(process, "memoryUsage", MemoryUsage); - env->SetMethod(process, "binding", Binding); - env->SetMethod(process, "_linkedBinding", LinkedBinding); - env->SetMethod(process, "_internalBinding", InternalBinding); - env->SetMethod(process, "_setupProcessObject", SetupProcessObject); env->SetMethod(process, "_setupNextTick", SetupNextTick); env->SetMethod(process, "_setupPromises", SetupPromises); @@ -3635,8 +3633,10 @@ static void RawDebug(const FunctionCallbackInfo& args) { fflush(stderr); } -void LoadEnvironment(Environment* env) { - HandleScope handle_scope(env->isolate()); + +static Local GetBootstrapper(Environment* env, Local source, + Local script_name) { + EscapableHandleScope scope(env->isolate()); TryCatch try_catch(env->isolate()); @@ -3645,20 +3645,61 @@ void LoadEnvironment(Environment* env) { // are not safe to ignore. try_catch.SetVerbose(false); - // Execute the lib/internal/bootstrap_node.js file which was included as a - // static C string in node_natives.h by node_js2c. - // 'internal_bootstrap_node_native' is the string containing that source code. - Local script_name = FIXED_ONE_BYTE_STRING(env->isolate(), - "bootstrap_node.js"); - Local f_value = ExecuteString(env, MainSource(env), script_name); + // Execute the bootstrapper javascript file + Local bootstrapper_v = ExecuteString(env, source, script_name); if (try_catch.HasCaught()) { ReportException(env, try_catch); exit(10); } - // The bootstrap_node.js file returns a function 'f' - CHECK(f_value->IsFunction()); - Local f = Local::Cast(f_value); + CHECK(bootstrapper_v->IsFunction()); + Local bootstrapper = Local::Cast(bootstrapper_v); + + return scope.Escape(bootstrapper); +} + +static bool ExecuteBootstrapper(Environment* env, Local bootstrapper, + int argc, Local argv[], + Local* out) { + bool ret = bootstrapper->Call( + env->context(), Null(env->isolate()), argc, argv).ToLocal(out); + + // If there was an error during bootstrap then it was either handled by the + // FatalException handler or it's unrecoverable (e.g. max call stack + // exceeded). Either way, clear the stack so that the AsyncCallbackScope + // destructor doesn't fail on the id check. + // There are only two ways to have a stack size > 1: 1) the user manually + // called MakeCallback or 2) user awaited during bootstrap, which triggered + // _tickCallback(). + if (!ret) { + env->async_hooks()->clear_async_id_stack(); + } + + return ret; +} + + +void LoadEnvironment(Environment* env) { + HandleScope handle_scope(env->isolate()); + + TryCatch try_catch(env->isolate()); + // Disable verbose mode to stop FatalException() handler from trying + // to handle the exception. Errors this early in the start-up phase + // are not safe to ignore. + try_catch.SetVerbose(false); + + // The bootstrapper scripts are lib/internal/bootstrap/loaders.js and + // lib/internal/bootstrap/node.js, each included as a static C string + // defined in node_javascript.h, generated in node_javascript.cc by + // node_js2c. + Local loaders_name = + FIXED_ONE_BYTE_STRING(env->isolate(), "internal/bootstrap/loaders.js"); + Local loaders_bootstrapper = + GetBootstrapper(env, LoadersBootstrapperSource(env), loaders_name); + Local node_name = + FIXED_ONE_BYTE_STRING(env->isolate(), "internal/bootstrap/node.js"); + Local node_bootstrapper = + GetBootstrapper(env, NodeBootstrapperSource(env), node_name); // Add a reference to the global object Local global = env->context()->Global(); @@ -3689,25 +3730,47 @@ void LoadEnvironment(Environment* env) { // (Allows you to set stuff on `global` from anywhere in JavaScript.) global->Set(FIXED_ONE_BYTE_STRING(env->isolate(), "global"), global); - // Now we call 'f' with the 'process' variable that we've built up with - // all our bindings. Inside bootstrap_node.js and internal/process we'll - // take care of assigning things to their places. + // Create binding loaders + v8::Local get_binding_fn = + env->NewFunctionTemplate(GetBinding)->GetFunction(env->context()) + .ToLocalChecked(); - // We start the process this way in order to be more modular. Developers - // who do not like how bootstrap_node.js sets up the module system but do - // like Node's I/O bindings may want to replace 'f' with their own function. - Local arg = env->process_object(); + v8::Local get_linked_binding_fn = + env->NewFunctionTemplate(GetLinkedBinding)->GetFunction(env->context()) + .ToLocalChecked(); - auto ret = f->Call(env->context(), Null(env->isolate()), 1, &arg); - // If there was an error during bootstrap then it was either handled by the - // FatalException handler or it's unrecoverable (e.g. max call stack - // exceeded). Either way, clear the stack so that the AsyncCallbackScope - // destructor doesn't fail on the id check. - // There are only two ways to have a stack size > 1: 1) the user manually - // called MakeCallback or 2) user awaited during bootstrap, which triggered - // _tickCallback(). - if (ret.IsEmpty()) - env->async_hooks()->clear_async_id_stack(); + v8::Local get_internal_binding_fn = + env->NewFunctionTemplate(GetInternalBinding)->GetFunction(env->context()) + .ToLocalChecked(); + + Local loaders_bootstrapper_args[] = { + env->process_object(), + get_binding_fn, + get_linked_binding_fn, + get_internal_binding_fn + }; + + // Bootstrap internal loaders + Local bootstrapped_loaders; + if (!ExecuteBootstrapper(env, loaders_bootstrapper, + arraysize(loaders_bootstrapper_args), + loaders_bootstrapper_args, + &bootstrapped_loaders)) { + return; + } + + // Bootstrap Node.js + Local bootstrapped_node; + Local node_bootstrapper_args[] = { + env->process_object(), + bootstrapped_loaders + }; + if (!ExecuteBootstrapper(env, node_bootstrapper, + arraysize(node_bootstrapper_args), + node_bootstrapper_args, + &bootstrapped_node)) { + return; + } } static void PrintHelp() { @@ -4908,7 +4971,7 @@ void RegisterBuiltinModules() { } // namespace node #if !HAVE_INSPECTOR -void InitEmptyBindings() {} +void Initialize() {} -NODE_BUILTIN_MODULE_CONTEXT_AWARE(inspector, InitEmptyBindings) +NODE_BUILTIN_MODULE_CONTEXT_AWARE(inspector, Initialize) #endif // !HAVE_INSPECTOR diff --git a/src/node_api.cc b/src/node_api.cc index ea6bccef3f77de..8ab89796630297 100644 --- a/src/node_api.cc +++ b/src/node_api.cc @@ -167,6 +167,23 @@ struct napi_env__ { (out) = v8::type::New((buffer), (byte_offset), (length)); \ } while (0) +#define NAPI_CALL_INTO_MODULE(env, call, handle_exception) \ + do { \ + int open_handle_scopes = (env)->open_handle_scopes; \ + int open_callback_scopes = (env)->open_callback_scopes; \ + napi_clear_last_error((env)); \ + call; \ + CHECK_EQ((env)->open_handle_scopes, open_handle_scopes); \ + CHECK_EQ((env)->open_callback_scopes, open_callback_scopes); \ + if (!(env)->last_exception.IsEmpty()) { \ + handle_exception( \ + v8::Local::New((env)->isolate, (env)->last_exception)); \ + (env)->last_exception.Reset(); \ + } \ + } while (0) + +#define NAPI_CALL_INTO_MODULE_THROW(env, call) \ + NAPI_CALL_INTO_MODULE((env), call, (env)->isolate->ThrowException) namespace { namespace v8impl { @@ -346,10 +363,11 @@ class Finalizer { static void FinalizeBufferCallback(char* data, void* hint) { Finalizer* finalizer = static_cast(hint); if (finalizer->_finalize_callback != nullptr) { - finalizer->_finalize_callback( - finalizer->_env, - data, - finalizer->_finalize_hint); + NAPI_CALL_INTO_MODULE_THROW(finalizer->_env, + finalizer->_finalize_callback( + finalizer->_env, + data, + finalizer->_finalize_hint)); } Delete(finalizer); @@ -449,12 +467,14 @@ class Reference : private Finalizer { // Check before calling the finalize callback, because the callback might // delete it. bool delete_self = reference->_delete_self; + napi_env env = reference->_env; if (reference->_finalize_callback != nullptr) { - reference->_finalize_callback( - reference->_env, - reference->_finalize_data, - reference->_finalize_hint); + NAPI_CALL_INTO_MODULE_THROW(env, + reference->_finalize_callback( + reference->_env, + reference->_finalize_data, + reference->_finalize_hint)); } if (delete_self) { @@ -539,32 +559,17 @@ class CallbackWrapperBase : public CallbackWrapper { napi_callback cb = reinterpret_cast( v8::Local::Cast( _cbdata->GetInternalField(kInternalFieldIndex))->Value()); - v8::Isolate* isolate = _cbinfo.GetIsolate(); napi_env env = static_cast( v8::Local::Cast( _cbdata->GetInternalField(kEnvIndex))->Value()); - // Make sure any errors encountered last time we were in N-API are gone. - napi_clear_last_error(env); - - int open_handle_scopes = env->open_handle_scopes; - int open_callback_scopes = env->open_callback_scopes; - - napi_value result = cb(env, cbinfo_wrapper); + napi_value result; + NAPI_CALL_INTO_MODULE_THROW(env, result = cb(env, cbinfo_wrapper)); if (result != nullptr) { this->SetReturnValue(result); } - - CHECK_EQ(env->open_handle_scopes, open_handle_scopes); - CHECK_EQ(env->open_callback_scopes, open_callback_scopes); - - if (!env->last_exception.IsEmpty()) { - isolate->ThrowException( - v8::Local::New(isolate, env->last_exception)); - env->last_exception.Reset(); - } } const Info& _cbinfo; @@ -871,8 +876,10 @@ void napi_module_register_cb(v8::Local exports, // one is found. napi_env env = v8impl::GetEnv(context); - napi_value _exports = - mod->nm_register_func(env, v8impl::JsValueFromV8LocalValue(exports)); + napi_value _exports; + NAPI_CALL_INTO_MODULE_THROW(env, + _exports = mod->nm_register_func(env, + v8impl::JsValueFromV8LocalValue(exports))); // If register function returned a non-null exports object different from // the exports object we passed it, set that as the "exports" property of @@ -3325,7 +3332,7 @@ class Work : public node::AsyncResource { void* data = nullptr) : AsyncResource(env->isolate, async_resource, - *v8::String::Utf8Value(async_resource_name)), + *v8::String::Utf8Value(env->isolate, async_resource_name)), _env(env), _data(data), _execute(execute), @@ -3367,19 +3374,17 @@ class Work : public node::AsyncResource { v8::HandleScope scope(env->isolate); CallbackScope callback_scope(work); - work->_complete(env, ConvertUVErrorCode(status), work->_data); + NAPI_CALL_INTO_MODULE(env, + work->_complete(env, ConvertUVErrorCode(status), work->_data), + [env] (v8::Local local_err) { + // If there was an unhandled exception in the complete callback, + // report it as a fatal exception. (There is no JavaScript on the + // callstack that can possibly handle it.) + v8impl::trigger_fatal_exception(env, local_err); + }); // Note: Don't access `work` after this point because it was // likely deleted by the complete callback. - - // If there was an unhandled exception in the complete callback, - // report it as a fatal exception. (There is no JavaScript on the - // callstack that can possibly handle it.) - if (!env->last_exception.IsEmpty()) { - v8::Local local_err = v8::Local::New( - env->isolate, env->last_exception); - v8impl::trigger_fatal_exception(env, local_err); - } } } diff --git a/src/node_buffer.cc b/src/node_buffer.cc index f9a807602f612c..773b3341821f82 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -946,7 +946,7 @@ void IndexOfString(const FunctionCallbackInfo& args) { size_t result = haystack_length; if (enc == UCS2) { - String::Value needle_value(needle); + String::Value needle_value(args.GetIsolate(), needle); if (*needle_value == nullptr) return args.GetReturnValue().Set(-1); @@ -979,7 +979,7 @@ void IndexOfString(const FunctionCallbackInfo& args) { } result *= 2; } else if (enc == UTF8) { - String::Utf8Value needle_value(needle); + String::Utf8Value needle_value(args.GetIsolate(), needle); if (*needle_value == nullptr) return args.GetReturnValue().Set(-1); diff --git a/src/node_buffer.h b/src/node_buffer.h index acf9b23c3b3256..b4aa12cbcfadc6 100644 --- a/src/node_buffer.h +++ b/src/node_buffer.h @@ -31,8 +31,7 @@ extern bool zero_fill_all_buffers; namespace Buffer { -static const unsigned int kMaxLength = - sizeof(int32_t) == sizeof(intptr_t) ? 0x3fffffff : 0x7fffffff; +static const unsigned int kMaxLength = v8::TypedArray::kMaxLength; typedef void (*FreeCallback)(char* data, void* hint); diff --git a/src/node_config.cc b/src/node_config.cc index cac551ad2c410a..0542bff1d65186 100644 --- a/src/node_config.cc +++ b/src/node_config.cc @@ -36,7 +36,7 @@ using v8::Value; value, ReadOnly).FromJust(); \ } while (0) -static void InitConfig(Local target, +static void Initialize(Local target, Local unused, Local context) { Environment* env = Environment::GetCurrent(context); @@ -138,4 +138,4 @@ static void InitConfig(Local target, } // namespace node -NODE_BUILTIN_MODULE_CONTEXT_AWARE(config, node::InitConfig) +NODE_BUILTIN_MODULE_CONTEXT_AWARE(config, node::Initialize) diff --git a/src/node_contextify.cc b/src/node_contextify.cc index f49a2362769bc2..33134825b11fc9 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -1098,21 +1098,21 @@ class ContextifyScript : public BaseObject { PersistentToLocal(env->isolate(), wrapped_script->script_); Local