-
Notifications
You must be signed in to change notification settings - Fork 920
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use JSON11 for handling long numerals #6915
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
feat: | ||
- Use JSON11 for handling long numerals ([#6915](https://github.com/opensearch-project/OpenSearch-Dashboards/pull/6915)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -103,6 +103,7 @@ | |
"**/jest-config": "npm:@amoo-miki/[email protected]", | ||
"**/jest-jasmine2": "npm:@amoo-miki/[email protected]", | ||
"**/joi/hoek": "npm:@amoo-miki/[email protected]", | ||
"**/json11": "^1.1.2", | ||
"**/json-schema": "^0.4.0", | ||
"**/kind-of": ">=6.0.3", | ||
"**/loader-utils": "^2.0.4", | ||
|
@@ -154,7 +155,7 @@ | |
"@hapi/podium": "^4.1.3", | ||
"@hapi/vision": "^6.1.0", | ||
"@hapi/wreck": "^17.1.0", | ||
"@opensearch-project/opensearch": "^2.6.0", | ||
"@opensearch-project/opensearch": "^2.9.0", | ||
"@opensearch/datemath": "5.0.3", | ||
"@osd/ace": "1.0.0", | ||
"@osd/analytics": "1.0.0", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,260 +3,7 @@ | |
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
/* In JavaScript, a `Number` is a 64-bit floating-point value which can store 16 digits. However, the | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Huge +1 of the cleanup |
||
* serializer and deserializer will need to cater to numeric values generated by other languages which | ||
* can have up to 19 digits. Native JSON parser and stringifier, incapable of handling the extra | ||
* digits, corrupt the values, making them unusable. | ||
* | ||
* To work around this limitation, the deserializer converts long sequences of digits into strings and | ||
* marks them before applying the parser. During the parsing, string values that begin with the mark | ||
* are converted to `BigInt` values. | ||
* Similarly, during stringification, the serializer converts `BigInt` values to marked strings and | ||
* when done, it replaces them with plain numerals. | ||
* | ||
* `Number.MAX_SAFE_INTEGER`, 9,007,199,254,740,991, is the largest number that the native methods can | ||
* parse and stringify, and any numeral greater than that would need to be translated using the | ||
* workaround; all 17-digits or longer and only tail-end of the 16-digits need translation. It would | ||
* be unfair to all the 16-digit numbers if the translation applied to `\d{16,}` only to cover the | ||
* less than 10%. Hence, a RegExp is created to only match numerals too long to be a number. | ||
* | ||
* To make the explanation simpler, let's assume that MAX_SAFE_INTEGER is 8921 which has 4 digits. | ||
* Starting from the right, we take each digit onwards, `[<start>-9]`: | ||
* 1) 7922 - 7929: 792[2-9]\d{0} | ||
* 2) 7930 - 7999: 79[3-9]\d{1} | ||
* 9) 9 + 1 = 10 which results in a rollover; no need to do anything. | ||
* 8) 9000 - 9999: [9-9]\d{3} | ||
* Finally we add anything 5 digits or longer: `\d{5,} | ||
* | ||
* Note: A better solution would use AST but considering its performance penalty, RegExp is the next | ||
* best thing. | ||
*/ | ||
const maxIntAsString = String(Number.MAX_SAFE_INTEGER); | ||
const maxIntLength = maxIntAsString.length; | ||
// Sub-patterns for each digit | ||
const longNumeralMatcherTokens = [`\\d{${maxIntAsString.length + 1},}`]; | ||
for (let i = 0; i < maxIntLength; i++) { | ||
if (maxIntAsString[i] !== '9') { | ||
longNumeralMatcherTokens.push( | ||
maxIntAsString.substring(0, i) + | ||
`[${parseInt(maxIntAsString[i], 10) + 1}-9]` + | ||
`\\d{${maxIntLength - i - 1}}` | ||
); | ||
} | ||
} | ||
|
||
/* The matcher that looks for `": <numerals>, ...}` and `[..., <numeral>, ...]` | ||
* | ||
* The pattern starts by looking for `":` not immediately preceded by a `\`. That should be | ||
* followed by any of the numeric sub-patterns. A comma, end of an array, end of an object, or | ||
* the end of the input are the only acceptable elements after it. | ||
* | ||
* Note: This RegExp can result in false-positive hits on the likes of `{"key": "[ <numeral> ]"}` and | ||
* those are cleaned out during parsing. | ||
*/ | ||
const longNumeralMatcher = new RegExp( | ||
`((?:\\[|,|(?<!\\\\)"\\s*:)\\s*)(-?(?:${longNumeralMatcherTokens.join('|')}))(\\s*)(?=,|}|]|$)`, | ||
'g' | ||
); | ||
|
||
/* The characters with a highly unlikely chance of occurrence in strings, alone or in combination. | ||
* These will be combined in various orders and lengths, to find a specific "marker" that is not | ||
* present in the JSON string. | ||
*/ | ||
const markerChars = ['෴', '߷', '֍']; | ||
|
||
/* Generates an array of all combinations of `markerChars` with the requested length. */ | ||
const getMarkerChoices = (length: number): string[] => { | ||
// coverage:ignore-line | ||
if (!length || length < 0) return []; | ||
const choices = []; | ||
const arr = markerChars; | ||
const arrLength = arr.length; | ||
const temp = Array(length); | ||
|
||
(function fill(pos, start) { | ||
if (pos === length) return choices.push(temp.join('')); | ||
|
||
for (let i = start; i < arrLength; i++) { | ||
temp[pos] = arr[i]; | ||
fill(pos + 1, i); | ||
} | ||
})(0, 0); | ||
|
||
return choices; | ||
}; | ||
|
||
/* Experiments with different combinations of various lengths, until one is found to not be in | ||
* the input string. | ||
*/ | ||
const getMarker = (text: string): { marker: string; length: number } => { | ||
let marker; | ||
let length = 0; | ||
do { | ||
length++; | ||
getMarkerChoices(length).some((markerChoice) => { | ||
if (text.indexOf(markerChoice) === -1) { | ||
marker = markerChoice; | ||
return true; | ||
} | ||
}); | ||
} while (!marker); | ||
|
||
return { | ||
marker, | ||
length, | ||
}; | ||
}; | ||
|
||
const parseStringWithLongNumerals = ( | ||
text: string, | ||
reviver?: ((this: any, key: string, value: any) => any) | null | ||
): any => { | ||
const { marker, length } = getMarker(text); | ||
|
||
let hadException; | ||
let obj; | ||
let markedJSON = text.replace(longNumeralMatcher, `$1"${marker}$2"$3`); | ||
const markedValueMatcher = new RegExp(`^${marker}-?\\d+$`); | ||
|
||
/* Convert marked values to BigInt values. | ||
* The `startsWith` is purely for performance, to avoid running `test` if not needed. | ||
*/ | ||
const convertMarkedValues = (val: any) => | ||
typeof val === 'string' && val.startsWith(marker) && markedValueMatcher.test(val) | ||
? BigInt(val.substring(length)) | ||
: val; | ||
|
||
/* For better performance, instead of testing for existence of `reviver` on each value, two almost | ||
* identical functions are used. | ||
*/ | ||
const parseMarkedText = reviver | ||
? (markedText: string) => | ||
JSON.parse(markedText, function (key, val) { | ||
return reviver.call(this, key, convertMarkedValues(val)); | ||
}) | ||
: (markedText: string) => JSON.parse(markedText, (key, val) => convertMarkedValues(val)); | ||
|
||
/* RegExp cannot replace AST and the process of marking adds quotes. So, any false-positive hit | ||
* will make the JSON string unparseable. | ||
* | ||
* To find those instances, we try to parse and watch for the location of any errors. If an error | ||
* is caused by the marking, we remove that single marking and try again. | ||
*/ | ||
try { | ||
do { | ||
try { | ||
hadException = false; | ||
obj = parseMarkedText(markedJSON); | ||
} catch (e) { | ||
hadException = true; | ||
/* There are two types of exception objects that can be raised: | ||
* 1) a textual message with the position that we need to parse | ||
* i. Unexpected [token|string ...] at position ... | ||
* ii. Expected ',' or ... after ... in JSON at position ... | ||
* iii. expected ',' or ... after ... in object at line ... column ... | ||
* 2) a proper object with lineNumber and columnNumber which we can use | ||
* Note: this might refer to the part of the code that threw the exception but | ||
* we will try it anyway; the regex is specific enough to not produce | ||
* false-positives. | ||
*/ | ||
let { lineNumber, columnNumber } = e; | ||
|
||
if (typeof e?.message === 'string') { | ||
/* Check for 1-i and 1-ii | ||
* Finding "..."෴1111"..." inside a string value, the extra quotes throw a syntax error | ||
* and the position points to " that is assumed to be the begining of a value. | ||
*/ | ||
let match = e.message.match(/^(?:Un)?expected .*at position (\d+)(\D|$)/i); | ||
if (match) { | ||
lineNumber = 1; | ||
// Add 1 to reach the marker | ||
columnNumber = parseInt(match[1], 10) + 1; | ||
} else { | ||
/* Check for 1-iii | ||
* Finding "...,"෴1111"..." inside a string value, the extra quotes throw a syntax error | ||
* and the column number points to the marker after the " that is assumed to be terminating the | ||
* value. | ||
* PS: There are different versions of this error across browsers and platforms. | ||
*/ | ||
// ToDo: Add functional tests for this path | ||
match = e.message.match(/expected .*line (\d+) column (\d+)(\D|$)/i); | ||
if (match) { | ||
lineNumber = parseInt(match[1], 10); | ||
columnNumber = parseInt(match[2], 10); | ||
} | ||
} | ||
} | ||
|
||
if (lineNumber < 1 || columnNumber < 2) { | ||
/* The problem is not with this replacement. | ||
* Note: This will never happen because the outer parse would have already thrown. | ||
*/ | ||
// coverage:ignore-line | ||
throw e; | ||
} | ||
|
||
/* We need to skip e.lineNumber - 1 number of `\n` occurrences. | ||
* Then, we need to go to e.columnNumber - 2 to look for `"<mark>\d+"`; we need to `-1` to | ||
* account for the quote but an additional `-1` is needed because columnNumber starts from 1. | ||
*/ | ||
const re = new RegExp( | ||
`^((?:.*\\n){${lineNumber - 1}}[^\\n]{${columnNumber - 2}})"${marker}(-?\\d+)"` | ||
); | ||
if (!re.test(markedJSON)) { | ||
/* The exception is not caused by adding the marker. | ||
* Note: This will never happen because the outer parse would have already thrown. | ||
*/ | ||
// coverage:ignore-line | ||
throw e; | ||
} | ||
|
||
// We have found a bad replacement; let's remove it. | ||
markedJSON = markedJSON.replace(re, '$1$2'); | ||
} | ||
} while (hadException); | ||
} catch (ex) { | ||
// If parsing of marked `text` fails, fallback to parsing the original `text` | ||
obj = JSON.parse(text, reviver || undefined); | ||
} | ||
|
||
return obj; | ||
}; | ||
|
||
const stringifyObjectWithBigInts = ( | ||
obj: any, | ||
candidate: string, | ||
replacer?: ((this: any, key: string, value: any) => any) | null, | ||
space?: string | number | ||
): string => { | ||
const { marker } = getMarker(candidate); | ||
|
||
/* The matcher that looks for "<marker><numerals>" | ||
* Because we have made sure that `marker` was never present in the original object, we can | ||
* carelessly assume every "<marker><numerals>" is due to our marking. | ||
*/ | ||
const markedBigIntMatcher = new RegExp(`"${marker}(-?\\d+)"`, 'g'); | ||
|
||
/* Convert BigInt values to a string and mark them. | ||
* Can't be bothered with Number values outside the safe range because they are already corrupted. | ||
* | ||
* For better performance, instead of testing for existence of `replacer` on each value, two almost | ||
* identical functions are used. | ||
*/ | ||
const addMarkerToBigInts = replacer | ||
? function (this: any, key: string, val: any) { | ||
// replacer is called before marking because marking changes the type | ||
const newVal = replacer.call(this, key, val); | ||
return typeof newVal === 'bigint' ? `${marker}${newVal.toString()}` : newVal; | ||
} | ||
: (key: string, val: any) => (typeof val === 'bigint' ? `${marker}${val.toString()}` : val); | ||
|
||
return ( | ||
JSON.stringify(obj, addMarkerToBigInts, space) | ||
// Replace marked substrings with just the numerals | ||
.replace(markedBigIntMatcher, '$1') | ||
); | ||
}; | ||
import JSON11 from 'json11'; | ||
|
||
export const stringify = ( | ||
obj: any, | ||
|
@@ -298,7 +45,14 @@ export const stringify = ( | |
text = JSON.stringify(obj, checkForBigInts, space); | ||
|
||
if (!numeralsAreNumbers) { | ||
text = stringifyObjectWithBigInts(obj, text, replacer, space); | ||
const temp = JSON11.stringify(obj, { | ||
replacer, | ||
space, | ||
withBigInt: false, | ||
quote: '"', | ||
quoteNames: true, | ||
}); | ||
if (temp) text = temp; | ||
} | ||
|
||
return text; | ||
|
@@ -344,7 +98,8 @@ export const parse = ( | |
obj = JSON.parse(text, checkForLargeNumerals); | ||
|
||
if (!numeralsAreNumbers) { | ||
obj = parseStringWithLongNumerals(text, reviver); | ||
const temp = JSON11.parse(text, reviver, { withLongNumerals: true }); | ||
if (temp) obj = temp; | ||
} | ||
|
||
return obj; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2765,14 +2765,15 @@ | |
version "1.0.6" | ||
resolved "https://github.com/opensearch-project/opensearch-dashboards-test-library/archive/refs/tags/1.0.6.tar.gz#f2f489832a75191e243c6d2b42d49047265d9ce3" | ||
|
||
"@opensearch-project/opensearch@^2.6.0": | ||
version "2.6.0" | ||
resolved "https://registry.yarnpkg.com/@opensearch-project/opensearch/-/opensearch-2.6.0.tgz#cbacb34f92aed04e98cabcdc0dc65eb495023880" | ||
integrity sha512-zgDSa/qUpoEwA+Nxjtv0qtln63M+hS4SVO94R9XjwzJAoqsUiNMjjzF6D6Djq/xJMgCzIYjvBZ5vUlB8/kXwjQ== | ||
"@opensearch-project/opensearch@^2.9.0": | ||
version "2.9.0" | ||
resolved "https://registry.yarnpkg.com/@opensearch-project/opensearch/-/opensearch-2.9.0.tgz#319b4d174540b6d000c31477a56618e5054c6fcb" | ||
integrity sha512-BXPWSBME1rszZ8OvtBVQ9F6kLiZSENDSFPawbPa1fv0GouuQfWxkKSI9TcnfGLp869fgLTEIfeC5Qexd4RbAYw== | ||
dependencies: | ||
aws4 "^1.11.0" | ||
debug "^4.3.1" | ||
hpagent "^1.2.0" | ||
json11 "^1.0.4" | ||
ms "^2.1.3" | ||
secure-json-parse "^2.4.0" | ||
|
||
|
@@ -12186,6 +12187,11 @@ [email protected], json-stringify-safe@^5.0.1, json-stringify-safe@~5.0. | |
resolved "https://registry.yarnpkg.com/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz#1296a2d58fd45f19a0f6ce01d65701e2c735b6eb" | ||
integrity sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus= | ||
|
||
json11@^1.0.4, json11@^1.1.2: | ||
version "1.1.2" | ||
resolved "https://registry.yarnpkg.com/json11/-/json11-1.1.2.tgz#35ffd3ee5073b0cc09ef826b0a0dc005ebef2b5b" | ||
integrity sha512-5r1RHT1/Gr/jsI/XZZj/P6F11BKM8xvTaftRuiLkQI9Z2PFDukM82Ysxw8yDszb3NJP/NKnRlSGmhUdG99rlBw== | ||
|
||
json5@^1.0.1: | ||
version "1.0.2" | ||
resolved "https://registry.yarnpkg.com/json5/-/json5-1.0.2.tgz#63d98d60f21b313b77c4d6da18bfa69d80e1d593" | ||
|
@@ -13248,13 +13254,6 @@ minipass@^3.0.0, minipass@^3.1.1: | |
dependencies: | ||
yallist "^4.0.0" | ||
|
||
minipass@^4.0.0: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was probably left off of another PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need to rebase? or do you mean like we merged this and it shouldn't be there? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is a leftover from #6492 |
||
version "4.0.0" | ||
resolved "https://registry.yarnpkg.com/minipass/-/minipass-4.0.0.tgz#7cebb0f9fa7d56f0c5b17853cbe28838a8dbbd3b" | ||
integrity sha512-g2Uuh2jEKoht+zvO6vJqXmYpflPqzRBT+Th2h01DKh5z7wbY/AZ2gCQ78cP70YoHPyFdY30YBV5WxgLOEwOykw== | ||
dependencies: | ||
yallist "^4.0.0" | ||
|
||
minipass@^5.0.0: | ||
version "5.0.0" | ||
resolved "https://registry.yarnpkg.com/minipass/-/minipass-5.0.0.tgz#3e9788ffb90b694a5d0ec94479a45b5d8738133d" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Is it necessary we upgrade the client version?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah. We need the version that uses JSON11